mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
add deflate compression, simplify decompression
Implements deflate compression from scratch. A history window is kept in the writer's buffer for matching and a chained hash table is used to find matches. Tokens are accumulated until a threshold is reached and then outputted as a block. Flush is used to indicate end of stream. Additionally, two other deflate writers are provided: * `Raw` writes only in store blocks (the uncompressed bytes). It utilizes data vectors to efficiently send block headers and data. * `Huffman` only performs Huffman compression on data and no matching. The above are also able to take advantage of writer semantics since they do not need to keep a history. Literal and distance code parameters in `token` have also been reworked. Their parameters are now derived mathematically, however the more expensive ones are still obtained through a lookup table (expect on ReleaseSmall). Decompression bit reading has been greatly simplified, taking advantage of the ability to peek on the underlying reader. Additionally, a few bugs with limit handling have been fixed.
This commit is contained in:
parent
e79a00adf6
commit
f50c647977
@ -1,8 +1,7 @@
|
||||
const std = @import("../std.zig");
|
||||
|
||||
/// When decompressing, the output buffer is used as the history window, so
|
||||
/// less than this may result in failure to decompress streams that were
|
||||
/// compressed with a larger window.
|
||||
/// When compressing and decompressing, the provided buffer is used as the
|
||||
/// history window, so it must be at least this size.
|
||||
pub const max_window_len = history_len * 2;
|
||||
|
||||
pub const history_len = 32768;
|
||||
@ -15,10 +14,6 @@ pub const Compress = @import("flate/Compress.zig");
|
||||
/// produces the original full-size data.
|
||||
pub const Decompress = @import("flate/Decompress.zig");
|
||||
|
||||
/// Compression without Lempel-Ziv match searching. Faster compression, less
|
||||
/// memory requirements but bigger compressed sizes.
|
||||
pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
|
||||
|
||||
/// Container of the deflate bit stream body. Container adds header before
|
||||
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
|
||||
/// no footer, raw bit stream).
|
||||
@ -112,28 +107,24 @@ pub const Container = enum {
|
||||
switch (h.*) {
|
||||
.raw => {},
|
||||
.gzip => |*gzip| {
|
||||
gzip.update(buf);
|
||||
gzip.count +%= buf.len;
|
||||
gzip.crc.update(buf);
|
||||
gzip.count +%= @truncate(buf.len);
|
||||
},
|
||||
.zlib => |*zlib| {
|
||||
zlib.update(buf);
|
||||
},
|
||||
inline .gzip, .zlib => |*x| x.update(buf),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
var bits: [4]u8 = undefined;
|
||||
switch (hasher.*) {
|
||||
.gzip => |*gzip| {
|
||||
// GZIP 8 bytes footer
|
||||
// - 4 bytes, CRC32 (CRC-32)
|
||||
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
|
||||
std.mem.writeInt(u32, &bits, gzip.final(), .little);
|
||||
try writer.writeAll(&bits);
|
||||
|
||||
std.mem.writeInt(u32, &bits, gzip.bytes_read, .little);
|
||||
try writer.writeAll(&bits);
|
||||
// - 4 bytes, ISIZE (Input SIZE) - size of the original
|
||||
// (uncompressed) input data modulo 2^32
|
||||
try writer.writeInt(u32, gzip.crc.final(), .little);
|
||||
try writer.writeInt(u32, gzip.count, .little);
|
||||
},
|
||||
.zlib => |*zlib| {
|
||||
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
|
||||
@ -141,8 +132,7 @@ pub const Container = enum {
|
||||
// Checksum value of the uncompressed data (excluding any
|
||||
// dictionary data) computed according to Adler-32
|
||||
// algorithm.
|
||||
std.mem.writeInt(u32, &bits, zlib.final, .big);
|
||||
try writer.writeAll(&bits);
|
||||
try writer.writeInt(u32, zlib.adler, .big);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
@ -174,7 +164,6 @@ pub const Container = enum {
|
||||
};
|
||||
|
||||
test {
|
||||
_ = HuffmanEncoder;
|
||||
_ = Compress;
|
||||
_ = Decompress;
|
||||
}
|
||||
|
||||
@ -1,591 +0,0 @@
|
||||
//! Accepts list of tokens, decides what is best block type to write. What block
|
||||
//! type will provide best compression. Writes header and body of the block.
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Writer = std.Io.Writer;
|
||||
|
||||
const BlockWriter = @This();
|
||||
const flate = @import("../flate.zig");
|
||||
const Compress = flate.Compress;
|
||||
const HuffmanEncoder = flate.HuffmanEncoder;
|
||||
const Token = @import("Token.zig");
|
||||
|
||||
const codegen_order = HuffmanEncoder.codegen_order;
|
||||
const end_code_mark = 255;
|
||||
|
||||
output: *Writer,
|
||||
|
||||
codegen_freq: [HuffmanEncoder.codegen_code_count]u16,
|
||||
literal_freq: [HuffmanEncoder.max_num_lit]u16,
|
||||
distance_freq: [HuffmanEncoder.distance_code_count]u16,
|
||||
codegen: [HuffmanEncoder.max_num_lit + HuffmanEncoder.distance_code_count + 1]u8,
|
||||
literal_encoding: HuffmanEncoder,
|
||||
distance_encoding: HuffmanEncoder,
|
||||
codegen_encoding: HuffmanEncoder,
|
||||
fixed_literal_encoding: HuffmanEncoder,
|
||||
fixed_distance_encoding: HuffmanEncoder,
|
||||
huff_distance: HuffmanEncoder,
|
||||
|
||||
fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code,
|
||||
fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
|
||||
distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
|
||||
|
||||
pub fn init(output: *Writer) BlockWriter {
|
||||
return .{
|
||||
.output = output,
|
||||
.codegen_freq = undefined,
|
||||
.literal_freq = undefined,
|
||||
.distance_freq = undefined,
|
||||
.codegen = undefined,
|
||||
.literal_encoding = undefined,
|
||||
.distance_encoding = undefined,
|
||||
.codegen_encoding = undefined,
|
||||
.fixed_literal_encoding = undefined,
|
||||
.fixed_distance_encoding = undefined,
|
||||
.huff_distance = undefined,
|
||||
.fixed_literal_codes = undefined,
|
||||
.fixed_distance_codes = undefined,
|
||||
.distance_codes = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn initBuffers(bw: *BlockWriter) void {
|
||||
bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes);
|
||||
bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes);
|
||||
bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes);
|
||||
}
|
||||
|
||||
/// Flush intrenal bit buffer to the writer.
|
||||
/// Should be called only when bit stream is at byte boundary.
|
||||
///
|
||||
/// That is after final block; when last byte could be incomplete or
|
||||
/// after stored block; which is aligned to the byte boundary (it has x
|
||||
/// padding bits after first 3 bits).
|
||||
pub fn flush(self: *BlockWriter) Writer.Error!void {
|
||||
try self.bit_writer.flush();
|
||||
}
|
||||
|
||||
fn writeCode(self: *BlockWriter, c: Compress.HuffCode) Writer.Error!void {
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
|
||||
/// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
|
||||
/// the literal and distance lengths arrays (which are concatenated into a single
|
||||
/// array). This method generates that run-length encoding.
|
||||
///
|
||||
/// The result is written into the codegen array, and the frequencies
|
||||
/// of each code is written into the codegen_freq array.
|
||||
/// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
|
||||
/// information. Code bad_code is an end marker
|
||||
///
|
||||
/// num_literals: The number of literals in literal_encoding
|
||||
/// num_distances: The number of distances in distance_encoding
|
||||
/// lit_enc: The literal encoder to use
|
||||
/// dist_enc: The distance encoder to use
|
||||
fn generateCodegen(
|
||||
self: *BlockWriter,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
lit_enc: *Compress.LiteralEncoder,
|
||||
dist_enc: *Compress.DistanceEncoder,
|
||||
) void {
|
||||
for (self.codegen_freq, 0..) |_, i| {
|
||||
self.codegen_freq[i] = 0;
|
||||
}
|
||||
|
||||
// Note that we are using codegen both as a temporary variable for holding
|
||||
// a copy of the frequencies, and as the place where we put the result.
|
||||
// This is fine because the output is always shorter than the input used
|
||||
// so far.
|
||||
var codegen = &self.codegen; // cache
|
||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||
var cgnl = codegen[0..num_literals];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
|
||||
}
|
||||
|
||||
cgnl = codegen[num_literals .. num_literals + num_distances];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
|
||||
}
|
||||
codegen[num_literals + num_distances] = end_code_mark;
|
||||
|
||||
var size = codegen[0];
|
||||
var count: i32 = 1;
|
||||
var out_index: u32 = 0;
|
||||
var in_index: u32 = 1;
|
||||
while (size != end_code_mark) : (in_index += 1) {
|
||||
// INVARIANT: We have seen "count" copies of size that have not yet
|
||||
// had output generated for them.
|
||||
const next_size = codegen[in_index];
|
||||
if (next_size == size) {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
// We need to generate codegen indicating "count" of size.
|
||||
if (size != 0) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
count -= 1;
|
||||
while (count >= 3) {
|
||||
var n: i32 = 6;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 16;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[16] += 1;
|
||||
count -= n;
|
||||
}
|
||||
} else {
|
||||
while (count >= 11) {
|
||||
var n: i32 = 138;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 18;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 11));
|
||||
out_index += 1;
|
||||
self.codegen_freq[18] += 1;
|
||||
count -= n;
|
||||
}
|
||||
if (count >= 3) {
|
||||
// 3 <= count <= 10
|
||||
codegen[out_index] = 17;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(count - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[17] += 1;
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
count -= 1;
|
||||
while (count >= 0) : (count -= 1) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
}
|
||||
// Set up invariant for next time through the loop.
|
||||
size = next_size;
|
||||
count = 1;
|
||||
}
|
||||
// Marker indicating the end of the codegen.
|
||||
codegen[out_index] = end_code_mark;
|
||||
}
|
||||
|
||||
const DynamicSize = struct {
|
||||
size: u32,
|
||||
num_codegens: u32,
|
||||
};
|
||||
|
||||
/// dynamicSize returns the size of dynamically encoded data in bits.
|
||||
fn dynamicSize(
|
||||
self: *BlockWriter,
|
||||
lit_enc: *Compress.LiteralEncoder, // literal encoder
|
||||
dist_enc: *Compress.DistanceEncoder, // distance encoder
|
||||
extra_bits: u32,
|
||||
) DynamicSize {
|
||||
var num_codegens = self.codegen_freq.len;
|
||||
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
|
||||
num_codegens -= 1;
|
||||
}
|
||||
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
|
||||
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
|
||||
self.codegen_freq[16] * 2 +
|
||||
self.codegen_freq[17] * 3 +
|
||||
self.codegen_freq[18] * 7;
|
||||
const size = header +
|
||||
lit_enc.bitLength(&self.literal_freq) +
|
||||
dist_enc.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
|
||||
return DynamicSize{
|
||||
.size = @as(u32, @intCast(size)),
|
||||
.num_codegens = @as(u32, @intCast(num_codegens)),
|
||||
};
|
||||
}
|
||||
|
||||
/// fixedSize returns the size of dynamically encoded data in bits.
|
||||
fn fixedSize(self: *BlockWriter, extra_bits: u32) u32 {
|
||||
return 3 +
|
||||
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
|
||||
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
}
|
||||
|
||||
const StoredSize = struct {
|
||||
size: u32,
|
||||
storable: bool,
|
||||
};
|
||||
|
||||
/// storedSizeFits calculates the stored size, including header.
|
||||
/// The function returns the size in bits and whether the block
|
||||
/// fits inside a single block.
|
||||
fn storedSizeFits(in: ?[]const u8) StoredSize {
|
||||
if (in == null) {
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
if (in.?.len <= HuffmanEncoder.max_store_block_size) {
|
||||
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
|
||||
}
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
|
||||
/// Write the header of a dynamic Huffman block to the output stream.
|
||||
///
|
||||
/// num_literals: The number of literals specified in codegen
|
||||
/// num_distances: The number of distances specified in codegen
|
||||
/// num_codegens: The number of codegens used in codegen
|
||||
/// eof: Is it the end-of-file? (end of stream)
|
||||
fn dynamicHeader(
|
||||
self: *BlockWriter,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
num_codegens: u32,
|
||||
eof: bool,
|
||||
) Writer.Error!void {
|
||||
const first_bits: u32 = if (eof) 5 else 4;
|
||||
try self.bit_writer.writeBits(first_bits, 3);
|
||||
try self.bit_writer.writeBits(num_literals - 257, 5);
|
||||
try self.bit_writer.writeBits(num_distances - 1, 5);
|
||||
try self.bit_writer.writeBits(num_codegens - 4, 4);
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < num_codegens) : (i += 1) {
|
||||
const value = self.codegen_encoding.codes[codegen_order[i]].len;
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (true) {
|
||||
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
|
||||
i += 1;
|
||||
if (code_word == end_code_mark) {
|
||||
break;
|
||||
}
|
||||
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
|
||||
|
||||
switch (code_word) {
|
||||
16 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 2);
|
||||
i += 1;
|
||||
},
|
||||
17 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 3);
|
||||
i += 1;
|
||||
},
|
||||
18 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 7);
|
||||
i += 1;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn storedHeader(self: *BlockWriter, length: usize, eof: bool) Writer.Error!void {
|
||||
assert(length <= 65535);
|
||||
const flag: u32 = if (eof) 1 else 0;
|
||||
try self.bit_writer.writeBits(flag, 3);
|
||||
try self.flush();
|
||||
const l: u16 = @intCast(length);
|
||||
try self.bit_writer.writeBits(l, 16);
|
||||
try self.bit_writer.writeBits(~l, 16);
|
||||
}
|
||||
|
||||
fn fixedHeader(self: *BlockWriter, eof: bool) Writer.Error!void {
|
||||
// Indicate that we are a fixed Huffman block
|
||||
var value: u32 = 2;
|
||||
if (eof) {
|
||||
value = 3;
|
||||
}
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
/// Write a block of tokens with the smallest encoding. Will choose block type.
|
||||
/// The original input can be supplied, and if the huffman encoded data
|
||||
/// is larger than the original bytes, the data will be written as a
|
||||
/// stored block.
|
||||
/// If the input is null, the tokens will always be Huffman encoded.
|
||||
pub fn write(self: *BlockWriter, tokens: []const Token, eof: bool, input: ?[]const u8) Writer.Error!void {
|
||||
const lit_and_dist = self.indexTokens(tokens);
|
||||
const num_literals = lit_and_dist.num_literals;
|
||||
const num_distances = lit_and_dist.num_distances;
|
||||
|
||||
var extra_bits: u32 = 0;
|
||||
const ret = storedSizeFits(input);
|
||||
const stored_size = ret.size;
|
||||
const storable = ret.storable;
|
||||
|
||||
if (storable) {
|
||||
// We only bother calculating the costs of the extra bits required by
|
||||
// the length of distance fields (which will be the same for both fixed
|
||||
// and dynamic encoding), if we need to compare those two encodings
|
||||
// against stored encoding.
|
||||
var length_code: u16 = Token.length_codes_start + 8;
|
||||
while (length_code < num_literals) : (length_code += 1) {
|
||||
// First eight length codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
|
||||
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
|
||||
}
|
||||
var distance_code: u16 = 4;
|
||||
while (distance_code < num_distances) : (distance_code += 1) {
|
||||
// First four distance codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
|
||||
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out smallest code.
|
||||
// Fixed Huffman baseline.
|
||||
var literal_encoding = &self.fixed_literal_encoding;
|
||||
var distance_encoding = &self.fixed_distance_encoding;
|
||||
var size = self.fixedSize(extra_bits);
|
||||
|
||||
// Dynamic Huffman?
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
extra_bits,
|
||||
);
|
||||
const dyn_size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
if (dyn_size < size) {
|
||||
size = dyn_size;
|
||||
literal_encoding = &self.literal_encoding;
|
||||
distance_encoding = &self.distance_encoding;
|
||||
}
|
||||
|
||||
// Stored bytes?
|
||||
if (storable and stored_size < size) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
|
||||
try self.fixedHeader(eof);
|
||||
} else {
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
}
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
|
||||
}
|
||||
|
||||
pub fn storedBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
|
||||
try self.storedHeader(input.len, eof);
|
||||
try self.bit_writer.writeBytes(input);
|
||||
}
|
||||
|
||||
/// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||
/// This should be used if the symbols used have a disproportionate
|
||||
/// histogram distribution.
|
||||
/// If input is supplied and the compression savings are below 1/16th of the
|
||||
/// input size the block is stored.
|
||||
fn dynamicBlock(
|
||||
self: *BlockWriter,
|
||||
tokens: []const Token,
|
||||
eof: bool,
|
||||
input: ?[]const u8,
|
||||
) Writer.Error!void {
|
||||
const total_tokens = self.indexTokens(tokens);
|
||||
const num_literals = total_tokens.num_literals;
|
||||
const num_distances = total_tokens.num_distances;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
|
||||
const size = dynamic_size.size;
|
||||
const num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
|
||||
const stored_size = storedSizeFits(input);
|
||||
const ssize = stored_size.size;
|
||||
const storable = stored_size.storable;
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write Huffman table.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
|
||||
}
|
||||
|
||||
const TotalIndexedTokens = struct {
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
};
|
||||
|
||||
/// Indexes a slice of tokens followed by an end_block_marker, and updates
|
||||
/// literal_freq and distance_freq, and generates literal_encoding
|
||||
/// and distance_encoding.
|
||||
/// The number of literal and distance tokens is returned.
|
||||
fn indexTokens(self: *BlockWriter, tokens: []const Token) TotalIndexedTokens {
|
||||
var num_literals: u32 = 0;
|
||||
var num_distances: u32 = 0;
|
||||
|
||||
for (self.literal_freq, 0..) |_, i| {
|
||||
self.literal_freq[i] = 0;
|
||||
}
|
||||
for (self.distance_freq, 0..) |_, i| {
|
||||
self.distance_freq[i] = 0;
|
||||
}
|
||||
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
self.literal_freq[t.literal()] += 1;
|
||||
continue;
|
||||
}
|
||||
self.literal_freq[t.lengthCode()] += 1;
|
||||
self.distance_freq[t.distanceCode()] += 1;
|
||||
}
|
||||
// add end_block_marker token at the end
|
||||
self.literal_freq[HuffmanEncoder.end_block_marker] += 1;
|
||||
|
||||
// get the number of literals
|
||||
num_literals = @as(u32, @intCast(self.literal_freq.len));
|
||||
while (self.literal_freq[num_literals - 1] == 0) {
|
||||
num_literals -= 1;
|
||||
}
|
||||
// get the number of distances
|
||||
num_distances = @as(u32, @intCast(self.distance_freq.len));
|
||||
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
|
||||
num_distances -= 1;
|
||||
}
|
||||
if (num_distances == 0) {
|
||||
// We haven't found a single match. If we want to go with the dynamic encoding,
|
||||
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
|
||||
self.distance_freq[0] = 1;
|
||||
num_distances = 1;
|
||||
}
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
self.distance_encoding.generate(&self.distance_freq, 15);
|
||||
return TotalIndexedTokens{
|
||||
.num_literals = num_literals,
|
||||
.num_distances = num_distances,
|
||||
};
|
||||
}
|
||||
|
||||
/// Writes a slice of tokens to the output followed by and end_block_marker.
|
||||
/// codes for literal and distance encoding must be supplied.
|
||||
fn writeTokens(
|
||||
self: *BlockWriter,
|
||||
tokens: []const Token,
|
||||
le_codes: []Compress.HuffCode,
|
||||
oe_codes: []Compress.HuffCode,
|
||||
) Writer.Error!void {
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
try self.writeCode(le_codes[t.literal()]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write the length
|
||||
const le = t.lengthEncoding();
|
||||
try self.writeCode(le_codes[le.code]);
|
||||
if (le.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
|
||||
}
|
||||
|
||||
// Write the distance
|
||||
const oe = t.distanceEncoding();
|
||||
try self.writeCode(oe_codes[oe.code]);
|
||||
if (oe.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
|
||||
}
|
||||
}
|
||||
// add end_block_marker at the end
|
||||
try self.writeCode(le_codes[HuffmanEncoder.end_block_marker]);
|
||||
}
|
||||
|
||||
/// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
|
||||
/// if the results only gains very little from compression.
|
||||
pub fn huffmanBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
|
||||
// Add everything as literals
|
||||
histogram(input, &self.literal_freq);
|
||||
|
||||
self.literal_freq[HuffmanEncoder.end_block_marker] = 1;
|
||||
|
||||
const num_literals = HuffmanEncoder.end_block_marker + 1;
|
||||
self.distance_freq[0] = 1;
|
||||
const num_distances = 1;
|
||||
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
|
||||
// Figure out smallest code.
|
||||
// Always use dynamic Huffman or Store
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.huff_distance,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
|
||||
const size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
const stored_size_ret = storedSizeFits(input);
|
||||
const ssize = stored_size_ret.size;
|
||||
const storable = stored_size_ret.storable;
|
||||
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
const encoding = self.literal_encoding.codes[0..257];
|
||||
|
||||
for (input) |t| {
|
||||
const c = encoding[t];
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
try self.writeCode(encoding[HuffmanEncoder.end_block_marker]);
|
||||
}
|
||||
|
||||
fn histogram(b: []const u8, h: *[286]u16) void {
|
||||
// Clear histogram
|
||||
for (h, 0..) |_, i| {
|
||||
h[i] = 0;
|
||||
}
|
||||
|
||||
var lh = h.*[0..256];
|
||||
for (b) |t| {
|
||||
lh[t] += 1;
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -7,11 +7,10 @@ const Reader = std.Io.Reader;
|
||||
const Container = flate.Container;
|
||||
|
||||
const Decompress = @This();
|
||||
const Token = @import("Token.zig");
|
||||
const token = @import("token.zig");
|
||||
|
||||
input: *Reader,
|
||||
next_bits: Bits,
|
||||
remaining_bits: std.math.Log2Int(Bits),
|
||||
consumed_bits: u3,
|
||||
|
||||
reader: Reader,
|
||||
|
||||
@ -25,8 +24,6 @@ state: State,
|
||||
|
||||
err: ?Error,
|
||||
|
||||
const Bits = usize;
|
||||
|
||||
const BlockType = enum(u2) {
|
||||
stored = 0,
|
||||
fixed = 1,
|
||||
@ -39,6 +36,8 @@ const State = union(enum) {
|
||||
block_header,
|
||||
stored_block: u16,
|
||||
fixed_block,
|
||||
fixed_block_literal: u8,
|
||||
fixed_block_match: u16,
|
||||
dynamic_block,
|
||||
dynamic_block_literal: u8,
|
||||
dynamic_block_match: u16,
|
||||
@ -87,8 +86,7 @@ pub fn init(input: *Reader, container: Container, buffer: []u8) Decompress {
|
||||
.end = 0,
|
||||
},
|
||||
.input = input,
|
||||
.next_bits = 0,
|
||||
.remaining_bits = 0,
|
||||
.consumed_bits = 0,
|
||||
.container_metadata = .init(container),
|
||||
.lit_dec = .{},
|
||||
.dst_dec = .{},
|
||||
@ -183,27 +181,25 @@ fn streamIndirectInner(d: *Decompress) Reader.Error!usize {
|
||||
return 0;
|
||||
}
|
||||
|
||||
fn decodeLength(self: *Decompress, code: u8) !u16 {
|
||||
if (code > 28) return error.InvalidCode;
|
||||
const ml = Token.matchLength(code);
|
||||
return if (ml.extra_bits == 0) // 0 - 5 extra bits
|
||||
ml.base
|
||||
else
|
||||
ml.base + try self.takeBitsRuntime(ml.extra_bits);
|
||||
fn decodeLength(self: *Decompress, code_int: u5) !u16 {
|
||||
if (code_int > 28) return error.InvalidCode;
|
||||
const l: token.LenCode = .fromInt(code_int);
|
||||
const base = l.base();
|
||||
const extra = l.extraBits();
|
||||
return token.min_length + (base | try self.takeBits(extra));
|
||||
}
|
||||
|
||||
fn decodeDistance(self: *Decompress, code: u8) !u16 {
|
||||
if (code > 29) return error.InvalidCode;
|
||||
const md = Token.matchDistance(code);
|
||||
return if (md.extra_bits == 0) // 0 - 13 extra bits
|
||||
md.base
|
||||
else
|
||||
md.base + try self.takeBitsRuntime(md.extra_bits);
|
||||
fn decodeDistance(self: *Decompress, code_int: u5) !u16 {
|
||||
if (code_int > 29) return error.InvalidCode;
|
||||
const d: token.DistCode = .fromInt(code_int);
|
||||
const base = d.base();
|
||||
const extra = d.extraBits();
|
||||
return token.min_distance + (base | try self.takeBits(extra));
|
||||
}
|
||||
|
||||
// Decode code length symbol to code length. Writes decoded length into
|
||||
// lens slice starting at position pos. Returns number of positions
|
||||
// advanced.
|
||||
/// Decode code length symbol to code length. Writes decoded length into
|
||||
/// lens slice starting at position pos. Returns number of positions
|
||||
/// advanced.
|
||||
fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usize {
|
||||
if (pos >= lens.len)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
@ -217,7 +213,7 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz
|
||||
16 => {
|
||||
// Copy the previous code length 3 - 6 times.
|
||||
// The next 2 bits indicate repeat length
|
||||
const n: u8 = @as(u8, try self.takeBits(u2)) + 3;
|
||||
const n: u8 = @as(u8, try self.takeIntBits(u2)) + 3;
|
||||
if (pos == 0 or pos + n > lens.len)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
for (0..n) |i| {
|
||||
@ -226,17 +222,17 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz
|
||||
return n;
|
||||
},
|
||||
// Repeat a code length of 0 for 3 - 10 times. (3 bits of length)
|
||||
17 => return @as(u8, try self.takeBits(u3)) + 3,
|
||||
17 => return @as(u8, try self.takeIntBits(u3)) + 3,
|
||||
// Repeat a code length of 0 for 11 - 138 times (7 bits of length)
|
||||
18 => return @as(u8, try self.takeBits(u7)) + 11,
|
||||
18 => return @as(u8, try self.takeIntBits(u7)) + 11,
|
||||
else => return error.InvalidDynamicBlockHeader,
|
||||
}
|
||||
}
|
||||
|
||||
fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol {
|
||||
// Maximum code len is 15 bits.
|
||||
const sym = try decoder.find(@bitReverse(try self.peekBits(u15)));
|
||||
try self.tossBits(sym.code_bits);
|
||||
const sym = try decoder.find(@bitReverse(try self.peekIntBitsShort(u15)));
|
||||
try self.tossBitsShort(sym.code_bits);
|
||||
return sym;
|
||||
}
|
||||
|
||||
@ -320,11 +316,11 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
.raw => continue :sw .block_header,
|
||||
},
|
||||
.block_header => {
|
||||
d.final_block = (try d.takeBits(u1)) != 0;
|
||||
const block_type: BlockType = @enumFromInt(try d.takeBits(u2));
|
||||
d.final_block = (try d.takeIntBits(u1)) != 0;
|
||||
const block_type: BlockType = @enumFromInt(try d.takeIntBits(u2));
|
||||
switch (block_type) {
|
||||
.stored => {
|
||||
d.alignBitsDiscarding();
|
||||
d.alignBitsForward();
|
||||
// everything after this is byte aligned in stored block
|
||||
const len = try in.takeInt(u16, .little);
|
||||
const nlen = try in.takeInt(u16, .little);
|
||||
@ -333,17 +329,17 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
},
|
||||
.fixed => continue :sw .fixed_block,
|
||||
.dynamic => {
|
||||
const hlit: u16 = @as(u16, try d.takeBits(u5)) + 257; // number of ll code entries present - 257
|
||||
const hdist: u16 = @as(u16, try d.takeBits(u5)) + 1; // number of distance code entries - 1
|
||||
const hclen: u8 = @as(u8, try d.takeBits(u4)) + 4; // hclen + 4 code lengths are encoded
|
||||
const hlit: u16 = @as(u16, try d.takeIntBits(u5)) + 257; // number of ll code entries present - 257
|
||||
const hdist: u16 = @as(u16, try d.takeIntBits(u5)) + 1; // number of distance code entries - 1
|
||||
const hclen: u8 = @as(u8, try d.takeIntBits(u4)) + 4; // hclen + 4 code lengths are encoded
|
||||
|
||||
if (hlit > 286 or hdist > 30)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
|
||||
// lengths for code lengths
|
||||
var cl_lens: [19]u4 = @splat(0);
|
||||
for (flate.HuffmanEncoder.codegen_order[0..hclen]) |i| {
|
||||
cl_lens[i] = try d.takeBits(u3);
|
||||
for (token.codegen_order[0..hclen]) |i| {
|
||||
cl_lens[i] = try d.takeIntBits(u3);
|
||||
}
|
||||
var cl_dec: CodegenDecoder = .{};
|
||||
try cl_dec.generate(&cl_lens);
|
||||
@ -352,9 +348,9 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
var dec_lens: [286 + 30]u4 = @splat(0);
|
||||
var pos: usize = 0;
|
||||
while (pos < hlit + hdist) {
|
||||
const peeked = @bitReverse(try d.peekBits(u7));
|
||||
const peeked = @bitReverse(try d.peekIntBitsShort(u7));
|
||||
const sym = try cl_dec.find(peeked);
|
||||
try d.tossBits(sym.code_bits);
|
||||
try d.tossBitsShort(sym.code_bits);
|
||||
pos += try d.dynamicCodeLength(sym.symbol, &dec_lens, pos);
|
||||
}
|
||||
if (pos > hlit + hdist) {
|
||||
@ -373,9 +369,12 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
}
|
||||
},
|
||||
.stored_block => |remaining_len| {
|
||||
const out = try w.writableSliceGreedyPreserve(flate.history_len, 1);
|
||||
const out: []u8 = if (remaining != 0)
|
||||
try w.writableSliceGreedyPreserve(flate.history_len, 1)
|
||||
else
|
||||
&.{};
|
||||
var limited_out: [1][]u8 = .{limit.min(.limited(remaining_len)).slice(out)};
|
||||
const n = try d.input.readVec(&limited_out);
|
||||
const n = try in.readVec(&limited_out);
|
||||
if (remaining_len - n == 0) {
|
||||
d.state = if (d.final_block) .protocol_footer else .block_header;
|
||||
} else {
|
||||
@ -389,8 +388,14 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
const code = try d.readFixedCode();
|
||||
switch (code) {
|
||||
0...255 => {
|
||||
try w.writeBytePreserve(flate.history_len, @intCast(code));
|
||||
remaining -= 1;
|
||||
if (remaining != 0) {
|
||||
@branchHint(.likely);
|
||||
try w.writeBytePreserve(flate.history_len, @intCast(code));
|
||||
remaining -= 1;
|
||||
} else {
|
||||
d.state = .{ .fixed_block_literal = @intCast(code) };
|
||||
return @intFromEnum(limit) - remaining;
|
||||
}
|
||||
},
|
||||
256 => {
|
||||
d.state = if (d.final_block) .protocol_footer else .block_header;
|
||||
@ -400,9 +405,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
// Handles fixed block non literal (length) code.
|
||||
// Length code is followed by 5 bits of distance code.
|
||||
const length = try d.decodeLength(@intCast(code - 257));
|
||||
const distance = try d.decodeDistance(@bitReverse(try d.takeBits(u5)));
|
||||
try writeMatch(w, length, distance);
|
||||
remaining -= length;
|
||||
continue :sw .{ .fixed_block_match = length };
|
||||
},
|
||||
else => return error.InvalidCode,
|
||||
}
|
||||
@ -410,6 +413,24 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
d.state = .fixed_block;
|
||||
return @intFromEnum(limit) - remaining;
|
||||
},
|
||||
.fixed_block_literal => |symbol| {
|
||||
assert(remaining != 0);
|
||||
remaining -= 1;
|
||||
try w.writeBytePreserve(flate.history_len, symbol);
|
||||
continue :sw .fixed_block;
|
||||
},
|
||||
.fixed_block_match => |length| {
|
||||
if (remaining >= length) {
|
||||
@branchHint(.likely);
|
||||
const distance = try d.decodeDistance(@bitReverse(try d.takeIntBits(u5)));
|
||||
try writeMatch(w, length, distance);
|
||||
remaining -= length;
|
||||
continue :sw .fixed_block;
|
||||
} else {
|
||||
d.state = .{ .fixed_block_match = length };
|
||||
return @intFromEnum(limit) - remaining;
|
||||
}
|
||||
},
|
||||
.dynamic_block => {
|
||||
// In larger archives most blocks are usually dynamic, so
|
||||
// decompression performance depends on this logic.
|
||||
@ -429,7 +450,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
},
|
||||
.match => {
|
||||
// Decode match backreference <length, distance>
|
||||
const length = try d.decodeLength(sym.symbol);
|
||||
const length = try d.decodeLength(@intCast(sym.symbol));
|
||||
continue :sw .{ .dynamic_block_match = length };
|
||||
},
|
||||
.end_of_block => {
|
||||
@ -449,7 +470,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
@branchHint(.likely);
|
||||
remaining -= length;
|
||||
const dsm = try d.decodeSymbol(&d.dst_dec);
|
||||
const distance = try d.decodeDistance(dsm.symbol);
|
||||
const distance = try d.decodeDistance(@intCast(dsm.symbol));
|
||||
try writeMatch(w, length, distance);
|
||||
continue :sw .dynamic_block;
|
||||
} else {
|
||||
@ -458,23 +479,16 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
}
|
||||
},
|
||||
.protocol_footer => {
|
||||
d.alignBitsForward();
|
||||
switch (d.container_metadata) {
|
||||
.gzip => |*gzip| {
|
||||
d.alignBitsDiscarding();
|
||||
gzip.* = .{
|
||||
.crc = try in.takeInt(u32, .little),
|
||||
.count = try in.takeInt(u32, .little),
|
||||
};
|
||||
gzip.crc = try in.takeInt(u32, .little);
|
||||
gzip.count = try in.takeInt(u32, .little);
|
||||
},
|
||||
.zlib => |*zlib| {
|
||||
d.alignBitsDiscarding();
|
||||
zlib.* = .{
|
||||
.adler = try in.takeInt(u32, .little),
|
||||
};
|
||||
},
|
||||
.raw => {
|
||||
d.alignBitsPreserving();
|
||||
zlib.adler = try in.takeInt(u32, .big);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
d.state = .end;
|
||||
return @intFromEnum(limit) - remaining;
|
||||
@ -487,10 +501,10 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
|
||||
/// back from current write position, and `length` of bytes.
|
||||
fn writeMatch(w: *Writer, length: u16, distance: u16) !void {
|
||||
if (w.end < distance) return error.InvalidMatch;
|
||||
if (length < Token.base_length) return error.InvalidMatch;
|
||||
if (length > Token.max_length) return error.InvalidMatch;
|
||||
if (distance < Token.min_distance) return error.InvalidMatch;
|
||||
if (distance > Token.max_distance) return error.InvalidMatch;
|
||||
if (length < token.min_length) return error.InvalidMatch;
|
||||
if (length > token.max_length) return error.InvalidMatch;
|
||||
if (distance < token.min_distance) return error.InvalidMatch;
|
||||
if (distance > token.max_distance) return error.InvalidMatch;
|
||||
|
||||
// This is not a @memmove; it intentionally repeats patterns caused by
|
||||
// iterating one byte at a time.
|
||||
@ -500,137 +514,71 @@ fn writeMatch(w: *Writer, length: u16, distance: u16) !void {
|
||||
for (dest, src) |*d, s| d.* = s;
|
||||
}
|
||||
|
||||
fn takeBits(d: *Decompress, comptime U: type) !U {
|
||||
const remaining_bits = d.remaining_bits;
|
||||
const next_bits = d.next_bits;
|
||||
if (remaining_bits >= @bitSizeOf(U)) {
|
||||
const u: U = @truncate(next_bits);
|
||||
d.next_bits = next_bits >> @bitSizeOf(U);
|
||||
d.remaining_bits = remaining_bits - @bitSizeOf(U);
|
||||
return u;
|
||||
}
|
||||
const in = d.input;
|
||||
const next_int = in.takeInt(Bits, .little) catch |err| switch (err) {
|
||||
error.ReadFailed => return error.ReadFailed,
|
||||
error.EndOfStream => return takeBitsEnding(d, U),
|
||||
fn peekBits(d: *Decompress, n: u4) !u16 {
|
||||
const bits = d.input.peekInt(u32, .little) catch |e| return switch (e) {
|
||||
error.ReadFailed => error.ReadFailed,
|
||||
error.EndOfStream => d.peekBitsEnding(n),
|
||||
};
|
||||
const needed_bits = @bitSizeOf(U) - remaining_bits;
|
||||
const u: U = @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits);
|
||||
d.next_bits = next_int >> needed_bits;
|
||||
d.remaining_bits = @intCast(@bitSizeOf(Bits) - @as(usize, needed_bits));
|
||||
return u;
|
||||
const mask = @shlExact(@as(u16, 1), n) - 1;
|
||||
return @intCast((bits >> d.consumed_bits) & mask);
|
||||
}
|
||||
|
||||
fn takeBitsEnding(d: *Decompress, comptime U: type) !U {
|
||||
const remaining_bits = d.remaining_bits;
|
||||
const next_bits = d.next_bits;
|
||||
const in = d.input;
|
||||
const n = in.bufferedLen();
|
||||
assert(n < @sizeOf(Bits));
|
||||
const needed_bits = @bitSizeOf(U) - remaining_bits;
|
||||
if (n * 8 < needed_bits) return error.EndOfStream;
|
||||
const next_int = in.takeVarInt(Bits, .little, n) catch |err| switch (err) {
|
||||
error.ReadFailed => return error.ReadFailed,
|
||||
error.EndOfStream => unreachable,
|
||||
fn peekBitsEnding(d: *Decompress, n: u4) !u16 {
|
||||
@branchHint(.unlikely);
|
||||
|
||||
const left = d.input.buffered();
|
||||
if (left.len * 8 - d.consumed_bits < n) return error.EndOfStream;
|
||||
const bits = std.mem.readVarInt(u32, left, .little);
|
||||
const mask = @shlExact(@as(u16, 1), n) - 1;
|
||||
return @intCast((bits >> d.consumed_bits) & mask);
|
||||
}
|
||||
|
||||
/// Safe only after `peekBits` has been called with a greater or equal `n` value.
|
||||
fn tossBits(d: *Decompress, n: u4) void {
|
||||
d.input.toss((@as(u8, n) + d.consumed_bits) / 8);
|
||||
d.consumed_bits +%= @truncate(n);
|
||||
}
|
||||
|
||||
fn takeBits(d: *Decompress, n: u4) !u16 {
|
||||
const bits = try d.peekBits(n);
|
||||
d.tossBits(n);
|
||||
return bits;
|
||||
}
|
||||
|
||||
fn alignBitsForward(d: *Decompress) void {
|
||||
d.input.toss(@intFromBool(d.consumed_bits != 0));
|
||||
d.consumed_bits = 0;
|
||||
}
|
||||
|
||||
fn peekBitsShort(d: *Decompress, n: u4) !u16 {
|
||||
const bits = d.input.peekInt(u32, .little) catch |e| return switch (e) {
|
||||
error.ReadFailed => error.ReadFailed,
|
||||
error.EndOfStream => d.peekBitsShortEnding(n),
|
||||
};
|
||||
const u: U = @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits);
|
||||
d.next_bits = next_int >> needed_bits;
|
||||
d.remaining_bits = @intCast(n * 8 - @as(usize, needed_bits));
|
||||
return u;
|
||||
const mask = @shlExact(@as(u16, 1), n) - 1;
|
||||
return @intCast((bits >> d.consumed_bits) & mask);
|
||||
}
|
||||
|
||||
fn peekBits(d: *Decompress, comptime U: type) !U {
|
||||
const remaining_bits = d.remaining_bits;
|
||||
const next_bits = d.next_bits;
|
||||
if (remaining_bits >= @bitSizeOf(U)) return @truncate(next_bits);
|
||||
const in = d.input;
|
||||
const next_int = in.peekInt(Bits, .little) catch |err| switch (err) {
|
||||
error.ReadFailed => return error.ReadFailed,
|
||||
error.EndOfStream => return peekBitsEnding(d, U),
|
||||
};
|
||||
const needed_bits = @bitSizeOf(U) - remaining_bits;
|
||||
return @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits);
|
||||
fn peekBitsShortEnding(d: *Decompress, n: u4) !u16 {
|
||||
@branchHint(.unlikely);
|
||||
|
||||
const left = d.input.buffered();
|
||||
const bits = std.mem.readVarInt(u32, left, .little);
|
||||
const mask = @shlExact(@as(u16, 1), n) - 1;
|
||||
return @intCast((bits >> d.consumed_bits) & mask);
|
||||
}
|
||||
|
||||
fn peekBitsEnding(d: *Decompress, comptime U: type) !U {
|
||||
const remaining_bits = d.remaining_bits;
|
||||
const next_bits = d.next_bits;
|
||||
const in = d.input;
|
||||
var u: Bits = 0;
|
||||
var remaining_needed_bits = @bitSizeOf(U) - remaining_bits;
|
||||
var i: usize = 0;
|
||||
while (remaining_needed_bits > 0) {
|
||||
const peeked = in.peek(i + 1) catch |err| switch (err) {
|
||||
error.ReadFailed => return error.ReadFailed,
|
||||
error.EndOfStream => break,
|
||||
};
|
||||
u |= @as(Bits, peeked[i]) << @intCast(i * 8);
|
||||
remaining_needed_bits -|= 8;
|
||||
i += 1;
|
||||
}
|
||||
if (remaining_bits == 0 and i == 0) return error.EndOfStream;
|
||||
return @truncate((u << remaining_bits) | next_bits);
|
||||
fn tossBitsShort(d: *Decompress, n: u4) !void {
|
||||
if (d.input.bufferedLen() * 8 + d.consumed_bits < n) return error.EndOfStream;
|
||||
d.tossBits(n);
|
||||
}
|
||||
|
||||
fn tossBits(d: *Decompress, n: u4) !void {
|
||||
const remaining_bits = d.remaining_bits;
|
||||
const next_bits = d.next_bits;
|
||||
if (remaining_bits >= n) {
|
||||
d.next_bits = next_bits >> n;
|
||||
d.remaining_bits = remaining_bits - n;
|
||||
} else {
|
||||
const in = d.input;
|
||||
const next_int = in.takeInt(Bits, .little) catch |err| switch (err) {
|
||||
error.ReadFailed => return error.ReadFailed,
|
||||
error.EndOfStream => return tossBitsEnding(d, n),
|
||||
};
|
||||
const needed_bits = n - remaining_bits;
|
||||
d.next_bits = next_int >> needed_bits;
|
||||
d.remaining_bits = @intCast(@bitSizeOf(Bits) - @as(usize, needed_bits));
|
||||
}
|
||||
fn takeIntBits(d: *Decompress, T: type) !T {
|
||||
return @intCast(try d.takeBits(@bitSizeOf(T)));
|
||||
}
|
||||
|
||||
fn tossBitsEnding(d: *Decompress, n: u4) !void {
|
||||
const remaining_bits = d.remaining_bits;
|
||||
const in = d.input;
|
||||
const buffered_n = in.bufferedLen();
|
||||
if (buffered_n == 0) return error.EndOfStream;
|
||||
assert(buffered_n < @sizeOf(Bits));
|
||||
const needed_bits = n - remaining_bits;
|
||||
const next_int = in.takeVarInt(Bits, .little, buffered_n) catch |err| switch (err) {
|
||||
error.ReadFailed => return error.ReadFailed,
|
||||
error.EndOfStream => unreachable,
|
||||
};
|
||||
d.next_bits = next_int >> needed_bits;
|
||||
d.remaining_bits = @intCast(@as(usize, buffered_n) * 8 -| @as(usize, needed_bits));
|
||||
}
|
||||
|
||||
fn takeBitsRuntime(d: *Decompress, n: u4) !u16 {
|
||||
const x = try peekBits(d, u16);
|
||||
const mask: u16 = (@as(u16, 1) << n) - 1;
|
||||
const u: u16 = @as(u16, @truncate(x)) & mask;
|
||||
try tossBits(d, n);
|
||||
return u;
|
||||
}
|
||||
|
||||
fn alignBitsDiscarding(d: *Decompress) void {
|
||||
const remaining_bits = d.remaining_bits;
|
||||
if (remaining_bits == 0) return;
|
||||
const n_bytes = remaining_bits / 8;
|
||||
const in = d.input;
|
||||
in.seek -= n_bytes;
|
||||
d.remaining_bits = 0;
|
||||
d.next_bits = 0;
|
||||
}
|
||||
|
||||
fn alignBitsPreserving(d: *Decompress) void {
|
||||
const remaining_bits: usize = d.remaining_bits;
|
||||
if (remaining_bits == 0) return;
|
||||
const n_bytes = (remaining_bits + 7) / 8;
|
||||
const in = d.input;
|
||||
in.seek -= n_bytes;
|
||||
d.remaining_bits = 0;
|
||||
d.next_bits = 0;
|
||||
fn peekIntBitsShort(d: *Decompress, T: type) !T {
|
||||
return @intCast(try d.peekBitsShort(@bitSizeOf(T)));
|
||||
}
|
||||
|
||||
/// Reads first 7 bits, and then maybe 1 or 2 more to get full 7,8 or 9 bit code.
|
||||
@ -646,12 +594,12 @@ fn alignBitsPreserving(d: *Decompress) void {
|
||||
/// 280 - 287 8 11000000 through
|
||||
/// 11000111
|
||||
fn readFixedCode(d: *Decompress) !u16 {
|
||||
const code7 = @bitReverse(try d.takeBits(u7));
|
||||
const code7 = @bitReverse(try d.takeIntBits(u7));
|
||||
return switch (code7) {
|
||||
0...0b0010_111 => @as(u16, code7) + 256,
|
||||
0b0010_111 + 1...0b1011_111 => (@as(u16, code7) << 1) + @as(u16, try d.takeBits(u1)) - 0b0011_0000,
|
||||
0b1011_111 + 1...0b1100_011 => (@as(u16, code7 - 0b1100000) << 1) + try d.takeBits(u1) + 280,
|
||||
else => (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, @bitReverse(try d.takeBits(u2))) + 144,
|
||||
0b0010_111 + 1...0b1011_111 => (@as(u16, code7) << 1) + @as(u16, try d.takeIntBits(u1)) - 0b0011_0000,
|
||||
0b1011_111 + 1...0b1100_011 => (@as(u16, code7 - 0b1100000) << 1) + try d.takeIntBits(u1) + 280,
|
||||
else => (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, @bitReverse(try d.takeIntBits(u2))) + 144,
|
||||
};
|
||||
}
|
||||
|
||||
@ -807,7 +755,7 @@ fn HuffmanDecoder(
|
||||
return self.findLinked(code, sym.next);
|
||||
}
|
||||
|
||||
inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
|
||||
fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
|
||||
var pos = start;
|
||||
while (pos > 0) {
|
||||
const sym = self.symbols[pos];
|
||||
@ -898,57 +846,30 @@ test "init/find" {
|
||||
}
|
||||
|
||||
test "encode/decode literals" {
|
||||
var codes: [flate.HuffmanEncoder.max_num_frequencies]flate.HuffmanEncoder.Code = undefined;
|
||||
for (1..286) |j| { // for all different number of codes
|
||||
var enc: flate.HuffmanEncoder = .{
|
||||
.codes = &codes,
|
||||
.freq_cache = undefined,
|
||||
.bit_count = undefined,
|
||||
.lns = undefined,
|
||||
.lfs = undefined,
|
||||
};
|
||||
// create frequencies
|
||||
var freq = [_]u16{0} ** 286;
|
||||
freq[256] = 1; // ensure we have end of block code
|
||||
for (&freq, 1..) |*f, i| {
|
||||
if (i % j == 0)
|
||||
f.* = @intCast(i);
|
||||
}
|
||||
// Check that the example in RFC 1951 section 3.2.2 works (plus some zeroes)
|
||||
const max_bits = 5;
|
||||
var decoder: HuffmanDecoder(16, max_bits, 3) = .{};
|
||||
try decoder.generate(&.{ 3, 3, 3, 3, 0, 0, 3, 2, 4, 4 });
|
||||
|
||||
// encoder from frequencies
|
||||
enc.generate(&freq, 15);
|
||||
|
||||
// get code_lens from encoder
|
||||
var code_lens = [_]u4{0} ** 286;
|
||||
for (code_lens, 0..) |_, i| {
|
||||
code_lens[i] = @intCast(enc.codes[i].len);
|
||||
}
|
||||
// generate decoder from code lens
|
||||
var dec: LiteralDecoder = .{};
|
||||
try dec.generate(&code_lens);
|
||||
|
||||
// expect decoder code to match original encoder code
|
||||
for (dec.symbols) |s| {
|
||||
if (s.code_bits == 0) continue;
|
||||
const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code)));
|
||||
const symbol: u16 = switch (s.kind) {
|
||||
.literal => s.symbol,
|
||||
.end_of_block => 256,
|
||||
.match => @as(u16, s.symbol) + 257,
|
||||
};
|
||||
|
||||
const c = enc.codes[symbol];
|
||||
try testing.expect(c.code == c_code);
|
||||
}
|
||||
|
||||
// find each symbol by code
|
||||
for (enc.codes) |c| {
|
||||
if (c.len == 0) continue;
|
||||
|
||||
const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code)));
|
||||
const s = try dec.find(s_code);
|
||||
try testing.expect(s.code == s_code);
|
||||
try testing.expect(s.code_bits == c.len);
|
||||
inline for (0.., .{
|
||||
@as(u3, 0b010),
|
||||
@as(u3, 0b011),
|
||||
@as(u3, 0b100),
|
||||
@as(u3, 0b101),
|
||||
@as(u0, 0),
|
||||
@as(u0, 0),
|
||||
@as(u3, 0b110),
|
||||
@as(u2, 0b00),
|
||||
@as(u4, 0b1110),
|
||||
@as(u4, 0b1111),
|
||||
}) |i, code| {
|
||||
const bits = @bitSizeOf(@TypeOf(code));
|
||||
if (bits == 0) continue;
|
||||
for (0..1 << (max_bits - bits)) |extra| {
|
||||
const full = (@as(u16, code) << (max_bits - bits)) | @as(u16, @intCast(extra));
|
||||
const symbol = try decoder.find(full);
|
||||
try testing.expectEqual(i, symbol.symbol);
|
||||
try testing.expectEqual(bits, symbol.code_bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,463 +0,0 @@
|
||||
const HuffmanEncoder = @This();
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
codes: []Code,
|
||||
// Reusable buffer with the longest possible frequency table.
|
||||
freq_cache: [max_num_frequencies + 1]LiteralNode,
|
||||
bit_count: [17]u32,
|
||||
lns: []LiteralNode, // sorted by literal, stored to avoid repeated allocation in generate
|
||||
lfs: []LiteralNode, // sorted by frequency, stored to avoid repeated allocation in generate
|
||||
|
||||
pub const LiteralNode = struct {
|
||||
literal: u16,
|
||||
freq: u16,
|
||||
|
||||
pub fn max() LiteralNode {
|
||||
return .{
|
||||
.literal = std.math.maxInt(u16),
|
||||
.freq = std.math.maxInt(u16),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const Code = struct {
|
||||
code: u16 = 0,
|
||||
len: u16 = 0,
|
||||
};
|
||||
|
||||
/// The odd order in which the codegen code sizes are written.
|
||||
pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
|
||||
/// The number of codegen codes.
|
||||
pub const codegen_code_count = 19;
|
||||
|
||||
/// The largest distance code.
|
||||
pub const distance_code_count = 30;
|
||||
|
||||
/// Maximum number of literals.
|
||||
pub const max_num_lit = 286;
|
||||
|
||||
/// Max number of frequencies used for a Huffman Code
|
||||
/// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
|
||||
/// The largest of these is max_num_lit.
|
||||
pub const max_num_frequencies = max_num_lit;
|
||||
|
||||
/// Biggest block size for uncompressed block.
|
||||
pub const max_store_block_size = 65535;
|
||||
/// The special code used to mark the end of a block.
|
||||
pub const end_block_marker = 256;
|
||||
|
||||
/// Update this Huffman Code object to be the minimum code for the specified frequency count.
|
||||
///
|
||||
/// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
||||
/// max_bits The maximum number of bits to use for any literal.
|
||||
pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
|
||||
var list = self.freq_cache[0 .. freq.len + 1];
|
||||
// Number of non-zero literals
|
||||
var count: u32 = 0;
|
||||
// Set list to be the set of all non-zero literals and their frequencies
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
|
||||
count += 1;
|
||||
} else {
|
||||
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
self.codes[i].len = 0;
|
||||
}
|
||||
}
|
||||
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
|
||||
list = list[0..count];
|
||||
if (count <= 2) {
|
||||
// Handle the small cases here, because they are awkward for the general case code. With
|
||||
// two or fewer literals, everything has bit length 1.
|
||||
for (list, 0..) |node, i| {
|
||||
// "list" is in order of increasing literal value.
|
||||
self.codes[node.literal] = .{
|
||||
.code = @intCast(i),
|
||||
.len = 1,
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
self.lfs = list;
|
||||
std.mem.sort(LiteralNode, self.lfs, {}, byFreq);
|
||||
|
||||
// Get the number of literals for each bit count
|
||||
const bit_count = self.bitCounts(list, max_bits);
|
||||
// And do the assignment
|
||||
self.assignEncodingAndSize(bit_count, list);
|
||||
}
|
||||
|
||||
pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
|
||||
var total: u32 = 0;
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/// Return the number of literals assigned to each bit size in the Huffman encoding
|
||||
///
|
||||
/// This method is only called when list.len >= 3
|
||||
/// The cases of 0, 1, and 2 literals are handled by special case code.
|
||||
///
|
||||
/// list: An array of the literals with non-zero frequencies
|
||||
/// and their associated frequencies. The array is in order of increasing
|
||||
/// frequency, and has as its last element a special element with frequency
|
||||
/// `math.maxInt(i32)`
|
||||
///
|
||||
/// max_bits: The maximum number of bits that should be used to encode any literal.
|
||||
/// Must be less than 16.
|
||||
///
|
||||
/// Returns an integer array in which array[i] indicates the number of literals
|
||||
/// that should be encoded in i bits.
|
||||
fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
|
||||
var max_bits = max_bits_to_use;
|
||||
const n = list.len;
|
||||
const max_bits_limit = 16;
|
||||
|
||||
assert(max_bits < max_bits_limit);
|
||||
|
||||
// The tree can't have greater depth than n - 1, no matter what. This
|
||||
// saves a little bit of work in some small cases
|
||||
max_bits = @min(max_bits, n - 1);
|
||||
|
||||
// Create information about each of the levels.
|
||||
// A bogus "Level 0" whose sole purpose is so that
|
||||
// level1.prev.needed == 0. This makes level1.next_pair_freq
|
||||
// be a legitimate value that never gets chosen.
|
||||
var levels: [max_bits_limit]LevelInfo = std.mem.zeroes([max_bits_limit]LevelInfo);
|
||||
// leaf_counts[i] counts the number of literals at the left
|
||||
// of ancestors of the rightmost node at level i.
|
||||
// leaf_counts[i][j] is the number of literals at the left
|
||||
// of the level j ancestor.
|
||||
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0));
|
||||
|
||||
{
|
||||
var level = @as(u32, 1);
|
||||
while (level <= max_bits) : (level += 1) {
|
||||
// For every level, the first two items are the first two characters.
|
||||
// We initialize the levels as if we had already figured this out.
|
||||
levels[level] = LevelInfo{
|
||||
.level = level,
|
||||
.last_freq = list[1].freq,
|
||||
.next_char_freq = list[2].freq,
|
||||
.next_pair_freq = list[0].freq + list[1].freq,
|
||||
.needed = 0,
|
||||
};
|
||||
leaf_counts[level][level] = 2;
|
||||
if (level == 1) {
|
||||
levels[level].next_pair_freq = std.math.maxInt(i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need a total of 2*n - 2 items at top level and have already generated 2.
|
||||
levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
|
||||
|
||||
{
|
||||
var level = max_bits;
|
||||
while (true) {
|
||||
var l = &levels[level];
|
||||
if (l.next_pair_freq == std.math.maxInt(i32) and l.next_char_freq == std.math.maxInt(i32)) {
|
||||
// We've run out of both leaves and pairs.
|
||||
// End all calculations for this level.
|
||||
// To make sure we never come back to this level or any lower level,
|
||||
// set next_pair_freq impossibly large.
|
||||
l.needed = 0;
|
||||
levels[level + 1].next_pair_freq = std.math.maxInt(i32);
|
||||
level += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const prev_freq = l.last_freq;
|
||||
if (l.next_char_freq < l.next_pair_freq) {
|
||||
// The next item on this row is a leaf node.
|
||||
const next = leaf_counts[level][level] + 1;
|
||||
l.last_freq = l.next_char_freq;
|
||||
// Lower leaf_counts are the same of the previous node.
|
||||
leaf_counts[level][level] = next;
|
||||
if (next >= list.len) {
|
||||
l.next_char_freq = LiteralNode.max().freq;
|
||||
} else {
|
||||
l.next_char_freq = list[next].freq;
|
||||
}
|
||||
} else {
|
||||
// The next item on this row is a pair from the previous row.
|
||||
// next_pair_freq isn't valid until we generate two
|
||||
// more values in the level below
|
||||
l.last_freq = l.next_pair_freq;
|
||||
// Take leaf counts from the lower level, except counts[level] remains the same.
|
||||
@memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
|
||||
levels[l.level - 1].needed = 2;
|
||||
}
|
||||
|
||||
l.needed -= 1;
|
||||
if (l.needed == 0) {
|
||||
// We've done everything we need to do for this level.
|
||||
// Continue calculating one level up. Fill in next_pair_freq
|
||||
// of that level with the sum of the two nodes we've just calculated on
|
||||
// this level.
|
||||
if (l.level == max_bits) {
|
||||
// All done!
|
||||
break;
|
||||
}
|
||||
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
|
||||
level += 1;
|
||||
} else {
|
||||
// If we stole from below, move down temporarily to replenish it.
|
||||
while (levels[level - 1].needed > 0) {
|
||||
level -= 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Somethings is wrong if at the end, the top level is null or hasn't used
|
||||
// all of the leaves.
|
||||
assert(leaf_counts[max_bits][max_bits] == n);
|
||||
|
||||
var bit_count = self.bit_count[0 .. max_bits + 1];
|
||||
var bits: u32 = 1;
|
||||
const counts = &leaf_counts[max_bits];
|
||||
{
|
||||
var level = max_bits;
|
||||
while (level > 0) : (level -= 1) {
|
||||
// counts[level] gives the number of literals requiring at least "bits"
|
||||
// bits to encode.
|
||||
bit_count[bits] = counts[level] - counts[level - 1];
|
||||
bits += 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bit_count;
|
||||
}
|
||||
|
||||
/// Look at the leaves and assign them a bit count and an encoding as specified
|
||||
/// in RFC 1951 3.2.2
|
||||
fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
|
||||
var code = @as(u16, 0);
|
||||
var list = list_arg;
|
||||
|
||||
for (bit_count, 0..) |bits, n| {
|
||||
code <<= 1;
|
||||
if (n == 0 or bits == 0) {
|
||||
continue;
|
||||
}
|
||||
// The literals list[list.len-bits] .. list[list.len-bits]
|
||||
// are encoded using "bits" bits, and get the values
|
||||
// code, code + 1, .... The code values are
|
||||
// assigned in literal order (not frequency order).
|
||||
const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
|
||||
|
||||
self.lns = chunk;
|
||||
std.mem.sort(LiteralNode, self.lns, {}, byLiteral);
|
||||
|
||||
for (chunk) |node| {
|
||||
self.codes[node.literal] = .{
|
||||
.code = bitReverse(u16, code, @as(u5, @intCast(n))),
|
||||
.len = @as(u16, @intCast(n)),
|
||||
};
|
||||
code += 1;
|
||||
}
|
||||
list = list[0 .. list.len - @as(u32, @intCast(bits))];
|
||||
}
|
||||
}
|
||||
|
||||
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
if (a.freq == b.freq) {
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
return a.freq < b.freq;
|
||||
}
|
||||
|
||||
/// Describes the state of the constructed tree for a given depth.
|
||||
const LevelInfo = struct {
|
||||
/// Our level. for better printing
|
||||
level: u32,
|
||||
/// The frequency of the last node at this level
|
||||
last_freq: u32,
|
||||
/// The frequency of the next character to add to this level
|
||||
next_char_freq: u32,
|
||||
/// The frequency of the next pair (from level below) to add to this level.
|
||||
/// Only valid if the "needed" value of the next lower level is 0.
|
||||
next_pair_freq: u32,
|
||||
/// The number of chains remaining to generate for this level before moving
|
||||
/// up to the next level
|
||||
needed: u32,
|
||||
};
|
||||
|
||||
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
|
||||
/// Reverse bit-by-bit a N-bit code.
|
||||
fn bitReverse(comptime T: type, value: T, n: usize) T {
|
||||
const r = @bitReverse(value);
|
||||
return r >> @as(std.math.Log2Int(T), @intCast(@typeInfo(T).int.bits - n));
|
||||
}
|
||||
|
||||
test bitReverse {
|
||||
const ReverseBitsTest = struct {
|
||||
in: u16,
|
||||
bit_count: u5,
|
||||
out: u16,
|
||||
};
|
||||
|
||||
const reverse_bits_tests = [_]ReverseBitsTest{
|
||||
.{ .in = 1, .bit_count = 1, .out = 1 },
|
||||
.{ .in = 1, .bit_count = 2, .out = 2 },
|
||||
.{ .in = 1, .bit_count = 3, .out = 4 },
|
||||
.{ .in = 1, .bit_count = 4, .out = 8 },
|
||||
.{ .in = 1, .bit_count = 5, .out = 16 },
|
||||
.{ .in = 17, .bit_count = 5, .out = 17 },
|
||||
.{ .in = 257, .bit_count = 9, .out = 257 },
|
||||
.{ .in = 29, .bit_count = 5, .out = 23 },
|
||||
};
|
||||
|
||||
for (reverse_bits_tests) |h| {
|
||||
const v = bitReverse(u16, h.in, h.bit_count);
|
||||
try std.testing.expectEqual(h.out, v);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a HuffmanCode corresponding to the fixed literal table
|
||||
pub fn fixedLiteralEncoder(codes: *[max_num_frequencies]Code) HuffmanEncoder {
|
||||
var h: HuffmanEncoder = undefined;
|
||||
h.codes = codes;
|
||||
var ch: u16 = 0;
|
||||
|
||||
while (ch < max_num_frequencies) : (ch += 1) {
|
||||
var bits: u16 = undefined;
|
||||
var size: u16 = undefined;
|
||||
switch (ch) {
|
||||
0...143 => {
|
||||
// size 8, 000110000 .. 10111111
|
||||
bits = ch + 48;
|
||||
size = 8;
|
||||
},
|
||||
144...255 => {
|
||||
// size 9, 110010000 .. 111111111
|
||||
bits = ch + 400 - 144;
|
||||
size = 9;
|
||||
},
|
||||
256...279 => {
|
||||
// size 7, 0000000 .. 0010111
|
||||
bits = ch - 256;
|
||||
size = 7;
|
||||
},
|
||||
else => {
|
||||
// size 8, 11000000 .. 11000111
|
||||
bits = ch + 192 - 280;
|
||||
size = 8;
|
||||
},
|
||||
}
|
||||
h.codes[ch] = .{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn fixedDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
|
||||
var h: HuffmanEncoder = undefined;
|
||||
h.codes = codes;
|
||||
for (h.codes, 0..) |_, ch| {
|
||||
h.codes[ch] = .{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
|
||||
var distance_freq: [distance_code_count]u16 = @splat(0);
|
||||
distance_freq[0] = 1;
|
||||
// huff_distance is a static distance encoder used for huffman only encoding.
|
||||
// It can be reused since we will not be encoding distance values.
|
||||
var h: HuffmanEncoder = .{};
|
||||
h.codes = codes;
|
||||
h.generate(distance_freq[0..], 15);
|
||||
return h;
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the fixed literal table specific to Deflate" {
|
||||
var codes: [max_num_frequencies]Code = undefined;
|
||||
const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes);
|
||||
for (enc.codes) |c| {
|
||||
switch (c.len) {
|
||||
7 => {
|
||||
const v = @bitReverse(@as(u7, @intCast(c.code)));
|
||||
try testing.expect(v <= 0b0010111);
|
||||
},
|
||||
8 => {
|
||||
const v = @bitReverse(@as(u8, @intCast(c.code)));
|
||||
try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
|
||||
(v >= 0b11000000 and v <= 11000111));
|
||||
},
|
||||
9 => {
|
||||
const v = @bitReverse(@as(u9, @intCast(c.code)));
|
||||
try testing.expect(v >= 0b110010000 and v <= 0b111111111);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
|
||||
var codes: [distance_code_count]Code = undefined;
|
||||
const enc = fixedDistanceEncoder(&codes);
|
||||
for (enc.codes) |c| {
|
||||
const v = @bitReverse(@as(u5, @intCast(c.code)));
|
||||
try testing.expect(v <= 29);
|
||||
try testing.expect(c.len == 5);
|
||||
}
|
||||
}
|
||||
|
||||
pub const fixed_codes = [_]u8{
|
||||
0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
|
||||
0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
|
||||
0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
|
||||
0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
|
||||
0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
|
||||
0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
|
||||
0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
|
||||
0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
|
||||
0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
|
||||
0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
|
||||
0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
|
||||
0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
|
||||
0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
|
||||
0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
|
||||
0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
|
||||
0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
|
||||
0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
|
||||
0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
|
||||
0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
|
||||
0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
|
||||
0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
|
||||
0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
|
||||
0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
|
||||
0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
|
||||
0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
|
||||
0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
|
||||
0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
|
||||
0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
|
||||
0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
|
||||
0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
|
||||
0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
|
||||
0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
|
||||
0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
|
||||
0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
|
||||
0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
|
||||
0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
|
||||
0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
|
||||
0b10100011,
|
||||
};
|
||||
@ -1,130 +0,0 @@
|
||||
//! Lookup of the previous locations for the same 4 byte data. Works on hash of
|
||||
//! 4 bytes data. Head contains position of the first match for each hash. Chain
|
||||
//! points to the previous position of the same hash given the current location.
|
||||
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const expect = testing.expect;
|
||||
const flate = @import("../flate.zig");
|
||||
const Token = @import("Token.zig");
|
||||
|
||||
const Lookup = @This();
|
||||
|
||||
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
|
||||
const chain_len = 2 * flate.history_len;
|
||||
|
||||
pub const bits = 15;
|
||||
pub const len = 1 << bits;
|
||||
pub const shift = 32 - bits;
|
||||
|
||||
// Maps hash => first position
|
||||
head: [len]u16 = [_]u16{0} ** len,
|
||||
// Maps position => previous positions for the same hash value
|
||||
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
|
||||
|
||||
// Calculates hash of the 4 bytes from data.
|
||||
// Inserts `pos` position of that hash in the lookup tables.
|
||||
// Returns previous location with the same hash value.
|
||||
pub fn add(self: *Lookup, data: []const u8, pos: u16) u16 {
|
||||
if (data.len < 4) return 0;
|
||||
const h = hash(data[0..4]);
|
||||
return self.set(h, pos);
|
||||
}
|
||||
|
||||
// Returns previous location with the same hash value given the current
|
||||
// position.
|
||||
pub fn prev(self: *Lookup, pos: u16) u16 {
|
||||
return self.chain[pos];
|
||||
}
|
||||
|
||||
fn set(self: *Lookup, h: u32, pos: u16) u16 {
|
||||
const p = self.head[h];
|
||||
self.head[h] = pos;
|
||||
self.chain[pos] = p;
|
||||
return p;
|
||||
}
|
||||
|
||||
// Slide all positions in head and chain for `n`
|
||||
pub fn slide(self: *Lookup, n: u16) void {
|
||||
for (&self.head) |*v| {
|
||||
v.* -|= n;
|
||||
}
|
||||
var i: usize = 0;
|
||||
while (i < n) : (i += 1) {
|
||||
self.chain[i] = self.chain[i + n] -| n;
|
||||
}
|
||||
}
|
||||
|
||||
// Add `len` 4 bytes hashes from `data` into lookup.
|
||||
// Position of the first byte is `pos`.
|
||||
pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void {
|
||||
if (length == 0 or data.len < Token.min_length) {
|
||||
return;
|
||||
}
|
||||
var hb =
|
||||
@as(u32, data[3]) |
|
||||
@as(u32, data[2]) << 8 |
|
||||
@as(u32, data[1]) << 16 |
|
||||
@as(u32, data[0]) << 24;
|
||||
_ = self.set(hashu(hb), pos);
|
||||
|
||||
var i = pos;
|
||||
for (4..@min(length + 3, data.len)) |j| {
|
||||
hb = (hb << 8) | @as(u32, data[j]);
|
||||
i += 1;
|
||||
_ = self.set(hashu(hb), i);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates hash of the first 4 bytes of `b`.
|
||||
fn hash(b: *const [4]u8) u32 {
|
||||
return hashu(@as(u32, b[3]) |
|
||||
@as(u32, b[2]) << 8 |
|
||||
@as(u32, b[1]) << 16 |
|
||||
@as(u32, b[0]) << 24);
|
||||
}
|
||||
|
||||
fn hashu(v: u32) u32 {
|
||||
return @intCast((v *% prime4) >> shift);
|
||||
}
|
||||
|
||||
test add {
|
||||
const data = [_]u8{
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03,
|
||||
};
|
||||
|
||||
var h: Lookup = .{};
|
||||
for (data, 0..) |_, i| {
|
||||
const p = h.add(data[i..], @intCast(i));
|
||||
if (i >= 8 and i < 24) {
|
||||
try expect(p == i - 8);
|
||||
} else {
|
||||
try expect(p == 0);
|
||||
}
|
||||
}
|
||||
|
||||
const v = Lookup.hash(data[2 .. 2 + 4]);
|
||||
try expect(h.head[v] == 2 + 16);
|
||||
try expect(h.chain[2 + 16] == 2 + 8);
|
||||
try expect(h.chain[2 + 8] == 2);
|
||||
}
|
||||
|
||||
test bulkAdd {
|
||||
const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
|
||||
|
||||
// one by one
|
||||
var h: Lookup = .{};
|
||||
for (data, 0..) |_, i| {
|
||||
_ = h.add(data[i..], @intCast(i));
|
||||
}
|
||||
|
||||
// in bulk
|
||||
var bh: Lookup = .{};
|
||||
bh.bulkAdd(data, data.len, 0);
|
||||
|
||||
try testing.expectEqualSlices(u16, &h.head, &bh.head);
|
||||
try testing.expectEqualSlices(u16, &h.chain, &bh.chain);
|
||||
}
|
||||
@ -1,333 +0,0 @@
|
||||
//! Token cat be literal: single byte of data or match; reference to the slice of
|
||||
//! data in the same stream represented with <length, distance>. Where length
|
||||
//! can be 3 - 258 bytes, and distance 1 - 32768 bytes.
|
||||
//!
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const print = std.debug.print;
|
||||
const expect = std.testing.expect;
|
||||
|
||||
const Token = @This();
|
||||
|
||||
pub const Kind = enum(u1) {
|
||||
literal,
|
||||
match,
|
||||
};
|
||||
|
||||
// Distance range 1 - 32768, stored in dist as 0 - 32767 (fits u15)
|
||||
dist: u15 = 0,
|
||||
// Length range 3 - 258, stored in len_lit as 0 - 255 (fits u8)
|
||||
len_lit: u8 = 0,
|
||||
kind: Kind = .literal,
|
||||
|
||||
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
|
||||
pub const min_length = 4; // min length used in this algorithm
|
||||
pub const max_length = 258;
|
||||
|
||||
pub const min_distance = 1;
|
||||
pub const max_distance = std.compress.flate.history_len;
|
||||
|
||||
pub fn literal(t: Token) u8 {
|
||||
return t.len_lit;
|
||||
}
|
||||
|
||||
pub fn distance(t: Token) u16 {
|
||||
return @as(u16, t.dist) + min_distance;
|
||||
}
|
||||
|
||||
pub fn length(t: Token) u16 {
|
||||
return @as(u16, t.len_lit) + base_length;
|
||||
}
|
||||
|
||||
pub fn initLiteral(lit: u8) Token {
|
||||
return .{ .kind = .literal, .len_lit = lit };
|
||||
}
|
||||
|
||||
// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
|
||||
// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
|
||||
pub fn initMatch(dist: u16, len: u16) Token {
|
||||
assert(len >= min_length and len <= max_length);
|
||||
assert(dist >= min_distance and dist <= max_distance);
|
||||
return .{
|
||||
.kind = .match,
|
||||
.dist = @intCast(dist - min_distance),
|
||||
.len_lit = @intCast(len - base_length),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn eql(t: Token, o: Token) bool {
|
||||
return t.kind == o.kind and
|
||||
t.dist == o.dist and
|
||||
t.len_lit == o.len_lit;
|
||||
}
|
||||
|
||||
pub fn lengthCode(t: Token) u16 {
|
||||
return match_lengths[match_lengths_index[t.len_lit]].code;
|
||||
}
|
||||
|
||||
pub fn lengthEncoding(t: Token) MatchLength {
|
||||
var c = match_lengths[match_lengths_index[t.len_lit]];
|
||||
c.extra_length = t.len_lit - c.base_scaled;
|
||||
return c;
|
||||
}
|
||||
|
||||
// Returns the distance code corresponding to a specific distance.
|
||||
// Distance code is in range: 0 - 29.
|
||||
pub fn distanceCode(t: Token) u8 {
|
||||
var dist: u16 = t.dist;
|
||||
if (dist < match_distances_index.len) {
|
||||
return match_distances_index[dist];
|
||||
}
|
||||
dist >>= 7;
|
||||
if (dist < match_distances_index.len) {
|
||||
return match_distances_index[dist] + 14;
|
||||
}
|
||||
dist >>= 7;
|
||||
return match_distances_index[dist] + 28;
|
||||
}
|
||||
|
||||
pub fn distanceEncoding(t: Token) MatchDistance {
|
||||
var c = match_distances[t.distanceCode()];
|
||||
c.extra_distance = t.dist - c.base_scaled;
|
||||
return c;
|
||||
}
|
||||
|
||||
pub fn lengthExtraBits(code: u32) u8 {
|
||||
return match_lengths[code - length_codes_start].extra_bits;
|
||||
}
|
||||
|
||||
pub fn matchLength(code: u8) MatchLength {
|
||||
return match_lengths[code];
|
||||
}
|
||||
|
||||
pub fn matchDistance(code: u8) MatchDistance {
|
||||
return match_distances[code];
|
||||
}
|
||||
|
||||
pub fn distanceExtraBits(code: u32) u8 {
|
||||
return match_distances[code].extra_bits;
|
||||
}
|
||||
|
||||
pub fn show(t: Token) void {
|
||||
if (t.kind == .literal) {
|
||||
print("L('{c}'), ", .{t.literal()});
|
||||
} else {
|
||||
print("M({d}, {d}), ", .{ t.distance(), t.length() });
|
||||
}
|
||||
}
|
||||
|
||||
// Returns index in match_lengths table for each length in range 0-255.
|
||||
const match_lengths_index = [_]u8{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
||||
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
|
||||
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 28,
|
||||
};
|
||||
|
||||
const MatchLength = struct {
|
||||
code: u16,
|
||||
base_scaled: u8, // base - 3, scaled to fit into u8 (0-255), same as lit_len field in Token.
|
||||
base: u16, // 3-258
|
||||
extra_length: u8 = 0,
|
||||
extra_bits: u4,
|
||||
};
|
||||
|
||||
// match_lengths represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
|
||||
//
|
||||
// Extra Extra Extra
|
||||
// Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
|
||||
// ---- ---- ------ ---- ---- ------- ---- ---- -------
|
||||
// 257 0 3 267 1 15,16 277 4 67-82
|
||||
// 258 0 4 268 1 17,18 278 4 83-98
|
||||
// 259 0 5 269 2 19-22 279 4 99-114
|
||||
// 260 0 6 270 2 23-26 280 4 115-130
|
||||
// 261 0 7 271 2 27-30 281 5 131-162
|
||||
// 262 0 8 272 2 31-34 282 5 163-194
|
||||
// 263 0 9 273 3 35-42 283 5 195-226
|
||||
// 264 0 10 274 3 43-50 284 5 227-257
|
||||
// 265 1 11,12 275 3 51-58 285 0 258
|
||||
// 266 1 13,14 276 3 59-66
|
||||
//
|
||||
pub const length_codes_start = 257;
|
||||
|
||||
const match_lengths = [_]MatchLength{
|
||||
.{ .extra_bits = 0, .base_scaled = 0, .base = 3, .code = 257 },
|
||||
.{ .extra_bits = 0, .base_scaled = 1, .base = 4, .code = 258 },
|
||||
.{ .extra_bits = 0, .base_scaled = 2, .base = 5, .code = 259 },
|
||||
.{ .extra_bits = 0, .base_scaled = 3, .base = 6, .code = 260 },
|
||||
.{ .extra_bits = 0, .base_scaled = 4, .base = 7, .code = 261 },
|
||||
.{ .extra_bits = 0, .base_scaled = 5, .base = 8, .code = 262 },
|
||||
.{ .extra_bits = 0, .base_scaled = 6, .base = 9, .code = 263 },
|
||||
.{ .extra_bits = 0, .base_scaled = 7, .base = 10, .code = 264 },
|
||||
.{ .extra_bits = 1, .base_scaled = 8, .base = 11, .code = 265 },
|
||||
.{ .extra_bits = 1, .base_scaled = 10, .base = 13, .code = 266 },
|
||||
.{ .extra_bits = 1, .base_scaled = 12, .base = 15, .code = 267 },
|
||||
.{ .extra_bits = 1, .base_scaled = 14, .base = 17, .code = 268 },
|
||||
.{ .extra_bits = 2, .base_scaled = 16, .base = 19, .code = 269 },
|
||||
.{ .extra_bits = 2, .base_scaled = 20, .base = 23, .code = 270 },
|
||||
.{ .extra_bits = 2, .base_scaled = 24, .base = 27, .code = 271 },
|
||||
.{ .extra_bits = 2, .base_scaled = 28, .base = 31, .code = 272 },
|
||||
.{ .extra_bits = 3, .base_scaled = 32, .base = 35, .code = 273 },
|
||||
.{ .extra_bits = 3, .base_scaled = 40, .base = 43, .code = 274 },
|
||||
.{ .extra_bits = 3, .base_scaled = 48, .base = 51, .code = 275 },
|
||||
.{ .extra_bits = 3, .base_scaled = 56, .base = 59, .code = 276 },
|
||||
.{ .extra_bits = 4, .base_scaled = 64, .base = 67, .code = 277 },
|
||||
.{ .extra_bits = 4, .base_scaled = 80, .base = 83, .code = 278 },
|
||||
.{ .extra_bits = 4, .base_scaled = 96, .base = 99, .code = 279 },
|
||||
.{ .extra_bits = 4, .base_scaled = 112, .base = 115, .code = 280 },
|
||||
.{ .extra_bits = 5, .base_scaled = 128, .base = 131, .code = 281 },
|
||||
.{ .extra_bits = 5, .base_scaled = 160, .base = 163, .code = 282 },
|
||||
.{ .extra_bits = 5, .base_scaled = 192, .base = 195, .code = 283 },
|
||||
.{ .extra_bits = 5, .base_scaled = 224, .base = 227, .code = 284 },
|
||||
.{ .extra_bits = 0, .base_scaled = 255, .base = 258, .code = 285 },
|
||||
};
|
||||
|
||||
// Used in distanceCode fn to get index in match_distance table for each distance in range 0-32767.
|
||||
const match_distances_index = [_]u8{
|
||||
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
};
|
||||
|
||||
const MatchDistance = struct {
|
||||
base_scaled: u16, // base - 1, same as Token dist field
|
||||
base: u16,
|
||||
extra_distance: u16 = 0,
|
||||
code: u8,
|
||||
extra_bits: u4,
|
||||
};
|
||||
|
||||
// match_distances represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
|
||||
//
|
||||
// Extra Extra Extra
|
||||
// Code Bits Dist Code Bits Dist Code Bits Distance
|
||||
// ---- ---- ---- ---- ---- ------ ---- ---- --------
|
||||
// 0 0 1 10 4 33-48 20 9 1025-1536
|
||||
// 1 0 2 11 4 49-64 21 9 1537-2048
|
||||
// 2 0 3 12 5 65-96 22 10 2049-3072
|
||||
// 3 0 4 13 5 97-128 23 10 3073-4096
|
||||
// 4 1 5,6 14 6 129-192 24 11 4097-6144
|
||||
// 5 1 7,8 15 6 193-256 25 11 6145-8192
|
||||
// 6 2 9-12 16 7 257-384 26 12 8193-12288
|
||||
// 7 2 13-16 17 7 385-512 27 12 12289-16384
|
||||
// 8 3 17-24 18 8 513-768 28 13 16385-24576
|
||||
// 9 3 25-32 19 8 769-1024 29 13 24577-32768
|
||||
//
|
||||
const match_distances = [_]MatchDistance{
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0000, .code = 0, .base = 1 },
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0001, .code = 1, .base = 2 },
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0002, .code = 2, .base = 3 },
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0003, .code = 3, .base = 4 },
|
||||
.{ .extra_bits = 1, .base_scaled = 0x0004, .code = 4, .base = 5 },
|
||||
.{ .extra_bits = 1, .base_scaled = 0x0006, .code = 5, .base = 7 },
|
||||
.{ .extra_bits = 2, .base_scaled = 0x0008, .code = 6, .base = 9 },
|
||||
.{ .extra_bits = 2, .base_scaled = 0x000c, .code = 7, .base = 13 },
|
||||
.{ .extra_bits = 3, .base_scaled = 0x0010, .code = 8, .base = 17 },
|
||||
.{ .extra_bits = 3, .base_scaled = 0x0018, .code = 9, .base = 25 },
|
||||
.{ .extra_bits = 4, .base_scaled = 0x0020, .code = 10, .base = 33 },
|
||||
.{ .extra_bits = 4, .base_scaled = 0x0030, .code = 11, .base = 49 },
|
||||
.{ .extra_bits = 5, .base_scaled = 0x0040, .code = 12, .base = 65 },
|
||||
.{ .extra_bits = 5, .base_scaled = 0x0060, .code = 13, .base = 97 },
|
||||
.{ .extra_bits = 6, .base_scaled = 0x0080, .code = 14, .base = 129 },
|
||||
.{ .extra_bits = 6, .base_scaled = 0x00c0, .code = 15, .base = 193 },
|
||||
.{ .extra_bits = 7, .base_scaled = 0x0100, .code = 16, .base = 257 },
|
||||
.{ .extra_bits = 7, .base_scaled = 0x0180, .code = 17, .base = 385 },
|
||||
.{ .extra_bits = 8, .base_scaled = 0x0200, .code = 18, .base = 513 },
|
||||
.{ .extra_bits = 8, .base_scaled = 0x0300, .code = 19, .base = 769 },
|
||||
.{ .extra_bits = 9, .base_scaled = 0x0400, .code = 20, .base = 1025 },
|
||||
.{ .extra_bits = 9, .base_scaled = 0x0600, .code = 21, .base = 1537 },
|
||||
.{ .extra_bits = 10, .base_scaled = 0x0800, .code = 22, .base = 2049 },
|
||||
.{ .extra_bits = 10, .base_scaled = 0x0c00, .code = 23, .base = 3073 },
|
||||
.{ .extra_bits = 11, .base_scaled = 0x1000, .code = 24, .base = 4097 },
|
||||
.{ .extra_bits = 11, .base_scaled = 0x1800, .code = 25, .base = 6145 },
|
||||
.{ .extra_bits = 12, .base_scaled = 0x2000, .code = 26, .base = 8193 },
|
||||
.{ .extra_bits = 12, .base_scaled = 0x3000, .code = 27, .base = 12289 },
|
||||
.{ .extra_bits = 13, .base_scaled = 0x4000, .code = 28, .base = 16385 },
|
||||
.{ .extra_bits = 13, .base_scaled = 0x6000, .code = 29, .base = 24577 },
|
||||
};
|
||||
|
||||
test "size" {
|
||||
try expect(@sizeOf(Token) == 4);
|
||||
}
|
||||
|
||||
// testing table https://datatracker.ietf.org/doc/html/rfc1951#page-12
|
||||
test "MatchLength" {
|
||||
var c = Token.initMatch(1, 4).lengthEncoding();
|
||||
try expect(c.code == 258);
|
||||
try expect(c.extra_bits == 0);
|
||||
try expect(c.extra_length == 0);
|
||||
|
||||
c = Token.initMatch(1, 11).lengthEncoding();
|
||||
try expect(c.code == 265);
|
||||
try expect(c.extra_bits == 1);
|
||||
try expect(c.extra_length == 0);
|
||||
|
||||
c = Token.initMatch(1, 12).lengthEncoding();
|
||||
try expect(c.code == 265);
|
||||
try expect(c.extra_bits == 1);
|
||||
try expect(c.extra_length == 1);
|
||||
|
||||
c = Token.initMatch(1, 130).lengthEncoding();
|
||||
try expect(c.code == 280);
|
||||
try expect(c.extra_bits == 4);
|
||||
try expect(c.extra_length == 130 - 115);
|
||||
}
|
||||
|
||||
test "MatchDistance" {
|
||||
var c = Token.initMatch(1, 4).distanceEncoding();
|
||||
try expect(c.code == 0);
|
||||
try expect(c.extra_bits == 0);
|
||||
try expect(c.extra_distance == 0);
|
||||
|
||||
c = Token.initMatch(192, 4).distanceEncoding();
|
||||
try expect(c.code == 14);
|
||||
try expect(c.extra_bits == 6);
|
||||
try expect(c.extra_distance == 192 - 129);
|
||||
}
|
||||
|
||||
test "match_lengths" {
|
||||
for (match_lengths, 0..) |ml, i| {
|
||||
try expect(@as(u16, ml.base_scaled) + 3 == ml.base);
|
||||
try expect(i + 257 == ml.code);
|
||||
}
|
||||
|
||||
for (match_distances, 0..) |mo, i| {
|
||||
try expect(mo.base_scaled + 1 == mo.base);
|
||||
try expect(i == mo.code);
|
||||
}
|
||||
}
|
||||
286
lib/std/compress/flate/token.zig
Normal file
286
lib/std/compress/flate/token.zig
Normal file
@ -0,0 +1,286 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub const min_length = 3;
|
||||
pub const max_length = 258;
|
||||
|
||||
pub const min_distance = 1;
|
||||
pub const max_distance = std.compress.flate.history_len;
|
||||
|
||||
pub const codegen_order: [19]u8 = .{
|
||||
16, 17, 18,
|
||||
0, 8, //
|
||||
7, 9,
|
||||
6, 10,
|
||||
5, 11,
|
||||
4, 12,
|
||||
3, 13,
|
||||
2, 14,
|
||||
1, 15,
|
||||
};
|
||||
|
||||
pub const fixed_lit_codes = fixed_lit[0];
|
||||
pub const fixed_lit_bits = fixed_lit[1];
|
||||
const fixed_lit = blk: {
|
||||
var codes: [286]u16 = undefined;
|
||||
var bits: [286]u4 = undefined;
|
||||
|
||||
for (0..143 + 1, 0b00110000..0b10111111 + 1) |i, v| {
|
||||
codes[i] = @bitReverse(@as(u8, v));
|
||||
bits[i] = 8;
|
||||
}
|
||||
for (144..255 + 1, 0b110010000..0b111111111 + 1) |i, v| {
|
||||
codes[i] = @bitReverse(@as(u9, v));
|
||||
bits[i] = 9;
|
||||
}
|
||||
for (256..279 + 1, 0b0000000..0b0010111 + 1) |i, v| {
|
||||
codes[i] = @bitReverse(@as(u7, v));
|
||||
bits[i] = 7;
|
||||
}
|
||||
for (280..287 - 2 + 1, 0b11000000..0b11000111 - 2 + 1) |i, v| {
|
||||
codes[i] = @bitReverse(@as(u8, v));
|
||||
bits[i] = 8;
|
||||
}
|
||||
break :blk .{ codes, bits };
|
||||
};
|
||||
|
||||
pub const fixed_dist_codes = fixed_dist[0];
|
||||
pub const fixed_dist_bits = fixed_dist[1];
|
||||
const fixed_dist = blk: {
|
||||
var codes: [30]u16 = undefined;
|
||||
const bits: [30]u4 = @splat(5);
|
||||
|
||||
for (0..30) |i| {
|
||||
codes[i] = @bitReverse(@as(u5, i));
|
||||
}
|
||||
break :blk .{ codes, bits };
|
||||
};
|
||||
|
||||
// All paramters of codes can be derived matchematically, however some are faster to
|
||||
// do via lookup table. For ReleaseSmall, we do all mathematically to save space.
|
||||
pub const LenCode = if (builtin.mode != .ReleaseSmall) LookupLenCode else ShortLenCode;
|
||||
pub const DistCode = if (builtin.mode != .ReleaseSmall) LookupDistCode else ShortDistCode;
|
||||
const ShortLenCode = ShortCode(u8, u2, u3, true);
|
||||
const ShortDistCode = ShortCode(u15, u1, u4, false);
|
||||
/// For length and distance codes, they having this format.
|
||||
///
|
||||
/// For example, length code 0b1101 (13 or literal 270) has high_bits=0b01 and high_log2=3
|
||||
/// and is 1_01_xx (2 extra bits). It is then offsetted by the min length of 3.
|
||||
/// ^ bit 4 = 2 + high_log2 - 1
|
||||
///
|
||||
/// An exception is Length codes, where value 255 is assigned the special zero-bit code 28 or
|
||||
/// literal 285.
|
||||
fn ShortCode(Value: type, HighBits: type, HighLog2: type, len_special: bool) type {
|
||||
return packed struct(u5) {
|
||||
/// Bits preceding high bit or start if none
|
||||
high_bits: HighBits,
|
||||
/// High bit, 0 means none, otherwise it is at bit `x + high_log2 - 1`
|
||||
high_log2: HighLog2,
|
||||
|
||||
pub fn fromVal(v: Value) @This() {
|
||||
if (len_special and v == 255) return .fromInt(28);
|
||||
const high_bits = @bitSizeOf(HighBits) + 1;
|
||||
const bits = @bitSizeOf(Value) - @clz(v);
|
||||
if (bits <= high_bits) return @bitCast(@as(u5, @intCast(v)));
|
||||
const high = v >> @intCast(bits - high_bits);
|
||||
return .{ .high_bits = @truncate(high), .high_log2 = @intCast(bits - high_bits + 1) };
|
||||
}
|
||||
|
||||
/// `@ctz(return) >= extraBits()`
|
||||
pub fn base(c: @This()) Value {
|
||||
if (len_special and c.toInt() == 28) return 255;
|
||||
if (c.high_log2 <= 1) return @as(u5, @bitCast(c));
|
||||
const high_value = (@as(Value, @intFromBool(c.high_log2 != 0)) << @bitSizeOf(HighBits)) | c.high_bits;
|
||||
const high_start = @as(std.math.Log2Int(Value), c.high_log2 - 1);
|
||||
return @shlExact(high_value, high_start);
|
||||
}
|
||||
|
||||
const max_extra = @bitSizeOf(Value) - (1 + @bitSizeOf(HighLog2));
|
||||
pub fn extraBits(c: @This()) std.math.IntFittingRange(0, max_extra) {
|
||||
if (len_special and c.toInt() == 28) return 0;
|
||||
return @intCast(c.high_log2 -| 1);
|
||||
}
|
||||
|
||||
pub fn toInt(c: @This()) u5 {
|
||||
return @bitCast(c);
|
||||
}
|
||||
|
||||
pub fn fromInt(x: u5) @This() {
|
||||
return @bitCast(x);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const LookupLenCode = packed struct(u5) {
|
||||
code: ShortLenCode,
|
||||
|
||||
const code_table = table: {
|
||||
var codes: [256]ShortLenCode = undefined;
|
||||
for (0.., &codes) |v, *c| {
|
||||
c.* = .fromVal(v);
|
||||
}
|
||||
break :table codes;
|
||||
};
|
||||
|
||||
const base_table = table: {
|
||||
var bases: [29]u8 = undefined;
|
||||
for (0.., &bases) |c, *b| {
|
||||
b.* = ShortLenCode.fromInt(c).base();
|
||||
}
|
||||
break :table bases;
|
||||
};
|
||||
|
||||
pub fn fromVal(v: u8) LookupLenCode {
|
||||
return .{ .code = code_table[v] };
|
||||
}
|
||||
|
||||
/// `@ctz(return) >= extraBits()`
|
||||
pub fn base(c: LookupLenCode) u8 {
|
||||
return base_table[c.toInt()];
|
||||
}
|
||||
|
||||
pub fn extraBits(c: LookupLenCode) u3 {
|
||||
return c.code.extraBits();
|
||||
}
|
||||
|
||||
pub fn toInt(c: LookupLenCode) u5 {
|
||||
return @bitCast(c);
|
||||
}
|
||||
|
||||
pub fn fromInt(x: u5) LookupLenCode {
|
||||
return @bitCast(x);
|
||||
}
|
||||
};
|
||||
|
||||
const LookupDistCode = packed struct(u5) {
|
||||
code: ShortDistCode,
|
||||
|
||||
const base_table = table: {
|
||||
var bases: [30]u15 = undefined;
|
||||
for (0.., &bases) |c, *b| {
|
||||
b.* = ShortDistCode.fromInt(c).base();
|
||||
}
|
||||
break :table bases;
|
||||
};
|
||||
|
||||
pub fn fromVal(v: u15) LookupDistCode {
|
||||
return .{ .code = .fromVal(v) };
|
||||
}
|
||||
|
||||
/// `@ctz(return) >= extraBits()`
|
||||
pub fn base(c: LookupDistCode) u15 {
|
||||
return base_table[c.toInt()];
|
||||
}
|
||||
|
||||
pub fn extraBits(c: LookupDistCode) u4 {
|
||||
return c.code.extraBits();
|
||||
}
|
||||
|
||||
pub fn toInt(c: LookupDistCode) u5 {
|
||||
return @bitCast(c);
|
||||
}
|
||||
|
||||
pub fn fromInt(x: u5) LookupDistCode {
|
||||
return @bitCast(x);
|
||||
}
|
||||
};
|
||||
|
||||
test LenCode {
|
||||
inline for ([_]type{ ShortLenCode, LookupLenCode }) |Code| {
|
||||
// Check against the RFC 1951 table
|
||||
for (0.., [_]struct {
|
||||
base: u8,
|
||||
extra_bits: u4,
|
||||
}{
|
||||
// zig fmt: off
|
||||
.{ .base = 3 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 4 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 5 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 6 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 7 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 8 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 9 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 10 - min_length, .extra_bits = 0 },
|
||||
.{ .base = 11 - min_length, .extra_bits = 1 },
|
||||
.{ .base = 13 - min_length, .extra_bits = 1 },
|
||||
.{ .base = 15 - min_length, .extra_bits = 1 },
|
||||
.{ .base = 17 - min_length, .extra_bits = 1 },
|
||||
.{ .base = 19 - min_length, .extra_bits = 2 },
|
||||
.{ .base = 23 - min_length, .extra_bits = 2 },
|
||||
.{ .base = 27 - min_length, .extra_bits = 2 },
|
||||
.{ .base = 31 - min_length, .extra_bits = 2 },
|
||||
.{ .base = 35 - min_length, .extra_bits = 3 },
|
||||
.{ .base = 43 - min_length, .extra_bits = 3 },
|
||||
.{ .base = 51 - min_length, .extra_bits = 3 },
|
||||
.{ .base = 59 - min_length, .extra_bits = 3 },
|
||||
.{ .base = 67 - min_length, .extra_bits = 4 },
|
||||
.{ .base = 83 - min_length, .extra_bits = 4 },
|
||||
.{ .base = 99 - min_length, .extra_bits = 4 },
|
||||
.{ .base = 115 - min_length, .extra_bits = 4 },
|
||||
.{ .base = 131 - min_length, .extra_bits = 5 },
|
||||
.{ .base = 163 - min_length, .extra_bits = 5 },
|
||||
.{ .base = 195 - min_length, .extra_bits = 5 },
|
||||
.{ .base = 227 - min_length, .extra_bits = 5 },
|
||||
.{ .base = 258 - min_length, .extra_bits = 0 },
|
||||
}) |code, params| {
|
||||
// zig fmt: on
|
||||
const c: u5 = @intCast(code);
|
||||
try std.testing.expectEqual(params.extra_bits, Code.extraBits(.fromInt(@intCast(c))));
|
||||
try std.testing.expectEqual(params.base, Code.base(.fromInt(@intCast(c))));
|
||||
for (params.base..params.base + @shlExact(@as(u16, 1), params.extra_bits) -
|
||||
@intFromBool(c == 27)) |v|
|
||||
{
|
||||
try std.testing.expectEqual(c, Code.fromVal(@intCast(v)).toInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test DistCode {
|
||||
inline for ([_]type{ ShortDistCode, LookupDistCode }) |Code| {
|
||||
for (0.., [_]struct {
|
||||
base: u15,
|
||||
extra_bits: u4,
|
||||
}{
|
||||
// zig fmt: off
|
||||
.{ .base = 1 - min_distance, .extra_bits = 0 },
|
||||
.{ .base = 2 - min_distance, .extra_bits = 0 },
|
||||
.{ .base = 3 - min_distance, .extra_bits = 0 },
|
||||
.{ .base = 4 - min_distance, .extra_bits = 0 },
|
||||
.{ .base = 5 - min_distance, .extra_bits = 1 },
|
||||
.{ .base = 7 - min_distance, .extra_bits = 1 },
|
||||
.{ .base = 9 - min_distance, .extra_bits = 2 },
|
||||
.{ .base = 13 - min_distance, .extra_bits = 2 },
|
||||
.{ .base = 17 - min_distance, .extra_bits = 3 },
|
||||
.{ .base = 25 - min_distance, .extra_bits = 3 },
|
||||
.{ .base = 33 - min_distance, .extra_bits = 4 },
|
||||
.{ .base = 49 - min_distance, .extra_bits = 4 },
|
||||
.{ .base = 65 - min_distance, .extra_bits = 5 },
|
||||
.{ .base = 97 - min_distance, .extra_bits = 5 },
|
||||
.{ .base = 129 - min_distance, .extra_bits = 6 },
|
||||
.{ .base = 193 - min_distance, .extra_bits = 6 },
|
||||
.{ .base = 257 - min_distance, .extra_bits = 7 },
|
||||
.{ .base = 385 - min_distance, .extra_bits = 7 },
|
||||
.{ .base = 513 - min_distance, .extra_bits = 8 },
|
||||
.{ .base = 769 - min_distance, .extra_bits = 8 },
|
||||
.{ .base = 1025 - min_distance, .extra_bits = 9 },
|
||||
.{ .base = 1537 - min_distance, .extra_bits = 9 },
|
||||
.{ .base = 2049 - min_distance, .extra_bits = 10 },
|
||||
.{ .base = 3073 - min_distance, .extra_bits = 10 },
|
||||
.{ .base = 4097 - min_distance, .extra_bits = 11 },
|
||||
.{ .base = 6145 - min_distance, .extra_bits = 11 },
|
||||
.{ .base = 8193 - min_distance, .extra_bits = 12 },
|
||||
.{ .base = 12289 - min_distance, .extra_bits = 12 },
|
||||
.{ .base = 16385 - min_distance, .extra_bits = 13 },
|
||||
.{ .base = 24577 - min_distance, .extra_bits = 13 },
|
||||
}) |code, params| {
|
||||
// zig fmt: on
|
||||
const c: u5 = @intCast(code);
|
||||
try std.testing.expectEqual(params.extra_bits, Code.extraBits(.fromInt(@intCast(c))));
|
||||
try std.testing.expectEqual(params.base, Code.base(.fromInt(@intCast(c))));
|
||||
for (params.base..params.base + @shlExact(@as(u16, 1), params.extra_bits)) |v| {
|
||||
try std.testing.expectEqual(c, Code.fromVal(@intCast(v)).toInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user