zig/lib/std/compress/deflate/compressor.zig
2023-11-19 09:55:07 +00:00

1111 lines
41 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
const fmt = std.fmt;
const io = std.io;
const math = std.math;
const mem = std.mem;
const Allocator = std.mem.Allocator;
const deflate_const = @import("deflate_const.zig");
const fast = @import("deflate_fast.zig");
const hm_bw = @import("huffman_bit_writer.zig");
const token = @import("token.zig");
pub const Compression = enum(i5) {
/// huffman_only disables Lempel-Ziv match searching and only performs Huffman
/// entropy encoding. This mode is useful in compressing data that has
/// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
/// that lacks an entropy encoder. Compression gains are achieved when
/// certain bytes in the input stream occur more frequently than others.
///
/// Note that huffman_only produces a compressed output that is
/// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
/// continue to be able to decompress this output.
huffman_only = -2,
/// Same as level_6
default_compression = -1,
/// Does not attempt any compression; only adds the necessary DEFLATE framing.
no_compression = 0,
/// Prioritizes speed over output size, based on Snappy's LZ77-style encoder
best_speed = 1,
level_2 = 2,
level_3 = 3,
level_4 = 4,
level_5 = 5,
level_6 = 6,
level_7 = 7,
level_8 = 8,
/// Prioritizes smaller output size over speed
best_compression = 9,
};
const log_window_size = 15;
const window_size = 1 << log_window_size;
const window_mask = window_size - 1;
// The LZ77 step produces a sequence of literal tokens and <length, offset>
// pair tokens. The offset is also known as distance. The underlying wire
// format limits the range of lengths and offsets. For example, there are
// 256 legitimate lengths: those in the range [3, 258]. This package's
// compressor uses a higher minimum match length, enabling optimizations
// such as finding matches via 32-bit loads and compares.
const base_match_length = deflate_const.base_match_length; // The smallest match length per the RFC section 3.2.5
const min_match_length = 4; // The smallest match length that the compressor actually emits
const max_match_length = deflate_const.max_match_length;
const base_match_offset = deflate_const.base_match_offset; // The smallest match offset
const max_match_offset = deflate_const.max_match_offset; // The largest match offset
// The maximum number of tokens we put into a single flate block, just to
// stop things from getting too large.
const max_flate_block_tokens = 1 << 14;
const max_store_block_size = deflate_const.max_store_block_size;
const hash_bits = 17; // After 17 performance degrades
const hash_size = 1 << hash_bits;
const hash_mask = (1 << hash_bits) - 1;
const max_hash_offset = 1 << 24;
const skip_never = math.maxInt(u32);
const CompressionLevel = struct {
good: u16,
lazy: u16,
nice: u16,
chain: u16,
fast_skip_hashshing: u32,
};
fn levels(compression: Compression) CompressionLevel {
switch (compression) {
.no_compression,
.best_speed, // best_speed uses a custom algorithm; see deflate_fast.zig
.huffman_only,
=> return .{
.good = 0,
.lazy = 0,
.nice = 0,
.chain = 0,
.fast_skip_hashshing = 0,
},
// For levels 2-3 we don't bother trying with lazy matches.
.level_2 => return .{
.good = 4,
.lazy = 0,
.nice = 16,
.chain = 8,
.fast_skip_hashshing = 5,
},
.level_3 => return .{
.good = 4,
.lazy = 0,
.nice = 32,
.chain = 32,
.fast_skip_hashshing = 6,
},
// Levels 4-9 use increasingly more lazy matching and increasingly stringent conditions for
// "good enough".
.level_4 => return .{
.good = 4,
.lazy = 4,
.nice = 16,
.chain = 16,
.fast_skip_hashshing = skip_never,
},
.level_5 => return .{
.good = 8,
.lazy = 16,
.nice = 32,
.chain = 32,
.fast_skip_hashshing = skip_never,
},
.default_compression,
.level_6,
=> return .{
.good = 8,
.lazy = 16,
.nice = 128,
.chain = 128,
.fast_skip_hashshing = skip_never,
},
.level_7 => return .{
.good = 8,
.lazy = 32,
.nice = 128,
.chain = 256,
.fast_skip_hashshing = skip_never,
},
.level_8 => return .{
.good = 32,
.lazy = 128,
.nice = 258,
.chain = 1024,
.fast_skip_hashshing = skip_never,
},
.best_compression => return .{
.good = 32,
.lazy = 258,
.nice = 258,
.chain = 4096,
.fast_skip_hashshing = skip_never,
},
}
}
// matchLen returns the number of matching bytes in a and b
// up to length 'max'. Both slices must be at least 'max'
// bytes in size.
fn matchLen(a: []u8, b: []u8, max: u32) u32 {
const bounded_a = a[0..max];
const bounded_b = b[0..max];
for (bounded_a, 0..) |av, i| {
if (bounded_b[i] != av) {
return @as(u32, @intCast(i));
}
}
return max;
}
const hash_mul = 0x1e35a7bd;
// hash4 returns a hash representation of the first 4 bytes
// of the supplied slice.
// The caller must ensure that b.len >= 4.
fn hash4(b: []u8) u32 {
return ((@as(u32, b[3]) |
@as(u32, b[2]) << 8 |
@as(u32, b[1]) << 16 |
@as(u32, b[0]) << 24) *% hash_mul) >> (32 - hash_bits);
}
// bulkHash4 will compute hashes using the same
// algorithm as hash4
fn bulkHash4(b: []u8, dst: []u32) u32 {
if (b.len < min_match_length) {
return 0;
}
var hb =
@as(u32, b[3]) |
@as(u32, b[2]) << 8 |
@as(u32, b[1]) << 16 |
@as(u32, b[0]) << 24;
dst[0] = (hb *% hash_mul) >> (32 - hash_bits);
const end = b.len - min_match_length + 1;
var i: u32 = 1;
while (i < end) : (i += 1) {
hb = (hb << 8) | @as(u32, b[i + 3]);
dst[i] = (hb *% hash_mul) >> (32 - hash_bits);
}
return hb;
}
pub const CompressorOptions = struct {
level: Compression = .default_compression,
dictionary: ?[]const u8 = null,
};
/// Returns a new Compressor compressing data at the given level.
/// Following zlib, levels range from 1 (best_speed) to 9 (best_compression);
/// higher levels typically run slower but compress more. Level 0
/// (no_compression) does not attempt any compression; it only adds the
/// necessary DEFLATE framing.
/// Level -1 (default_compression) uses the default compression level.
/// Level -2 (huffman_only) will use Huffman compression only, giving
/// a very fast compression for all types of input, but sacrificing considerable
/// compression efficiency.
///
/// `dictionary` is optional and initializes the new `Compressor` with a preset dictionary.
/// The returned Compressor behaves as if the dictionary had been written to it without producing
/// any compressed output. The compressed data written to hm_bw can only be decompressed by a
/// Decompressor initialized with the same dictionary.
///
/// The compressed data will be passed to the provided `writer`, see `writer()` and `write()`.
pub fn compressor(
allocator: Allocator,
writer: anytype,
options: CompressorOptions,
) !Compressor(@TypeOf(writer)) {
return Compressor(@TypeOf(writer)).init(allocator, writer, options);
}
pub fn Compressor(comptime WriterType: anytype) type {
return struct {
const Self = @This();
/// A Writer takes data written to it and writes the compressed
/// form of that data to an underlying writer.
pub const Writer = io.Writer(*Self, Error, write);
/// Returns a Writer that takes data written to it and writes the compressed
/// form of that data to an underlying writer.
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
pub const Error = WriterType.Error;
allocator: Allocator,
compression: Compression,
compression_level: CompressionLevel,
// Inner writer wrapped in a HuffmanBitWriter
hm_bw: hm_bw.HuffmanBitWriter(WriterType) = undefined,
bulk_hasher: *const fn ([]u8, []u32) u32,
sync: bool, // requesting flush
best_speed_enc: *fast.DeflateFast, // Encoder for best_speed
// Input hash chains
// hash_head[hashValue] contains the largest inputIndex with the specified hash value
// If hash_head[hashValue] is within the current window, then
// hash_prev[hash_head[hashValue] & window_mask] contains the previous index
// with the same hash value.
chain_head: u32,
hash_head: []u32, // [hash_size]u32,
hash_prev: []u32, // [window_size]u32,
hash_offset: u32,
// input window: unprocessed data is window[index..window_end]
index: u32,
window: []u8,
window_end: usize,
block_start: usize, // window index where current tokens start
byte_available: bool, // if true, still need to process window[index-1].
// queued output tokens
tokens: []token.Token,
tokens_count: u16,
// deflate state
length: u32,
offset: u32,
hash: u32,
max_insert_index: usize,
err: bool,
// hash_match must be able to contain hashes for the maximum match length.
hash_match: []u32, // [max_match_length - 1]u32,
// dictionary
dictionary: ?[]const u8,
fn fillDeflate(self: *Self, b: []const u8) u32 {
if (self.index >= 2 * window_size - (min_match_length + max_match_length)) {
// shift the window by window_size
mem.copyForwards(u8, self.window, self.window[window_size .. 2 * window_size]);
self.index -= window_size;
self.window_end -= window_size;
if (self.block_start >= window_size) {
self.block_start -= window_size;
} else {
self.block_start = math.maxInt(u32);
}
self.hash_offset += window_size;
if (self.hash_offset > max_hash_offset) {
const delta = self.hash_offset - 1;
self.hash_offset -= delta;
self.chain_head -|= delta;
// Iterate over slices instead of arrays to avoid copying
// the entire table onto the stack (https://golang.org/issue/18625).
for (self.hash_prev, 0..) |v, i| {
if (v > delta) {
self.hash_prev[i] = @as(u32, @intCast(v - delta));
} else {
self.hash_prev[i] = 0;
}
}
for (self.hash_head, 0..) |v, i| {
if (v > delta) {
self.hash_head[i] = @as(u32, @intCast(v - delta));
} else {
self.hash_head[i] = 0;
}
}
}
}
const n = std.compress.deflate.copy(self.window[self.window_end..], b);
self.window_end += n;
return @as(u32, @intCast(n));
}
fn writeBlock(self: *Self, tokens: []token.Token, index: usize) !void {
if (index > 0) {
var window: ?[]u8 = null;
if (self.block_start <= index) {
window = self.window[self.block_start..index];
}
self.block_start = index;
try self.hm_bw.writeBlock(tokens, false, window);
return;
}
return;
}
// fillWindow will fill the current window with the supplied
// dictionary and calculate all hashes.
// This is much faster than doing a full encode.
// Should only be used after a reset.
fn fillWindow(self: *Self, in_b: []const u8) void {
var b = in_b;
// Do not fill window if we are in store-only mode (look at the fill() function to see
// Compressions which use fillStore() instead of fillDeflate()).
if (self.compression == .no_compression or
self.compression == .huffman_only or
self.compression == .best_speed)
{
return;
}
// fillWindow() must not be called with stale data
assert(self.index == 0 and self.window_end == 0);
// If we are given too much, cut it.
if (b.len > window_size) {
b = b[b.len - window_size ..];
}
// Add all to window.
@memcpy(self.window[0..b.len], b);
const n = b.len;
// Calculate 256 hashes at the time (more L1 cache hits)
const loops = (n + 256 - min_match_length) / 256;
var j: usize = 0;
while (j < loops) : (j += 1) {
const index = j * 256;
var end = index + 256 + min_match_length - 1;
if (end > n) {
end = n;
}
const to_check = self.window[index..end];
const dst_size = to_check.len - min_match_length + 1;
if (dst_size <= 0) {
continue;
}
const dst = self.hash_match[0..dst_size];
_ = self.bulk_hasher(to_check, dst);
var new_h: u32 = 0;
for (dst, 0..) |val, i| {
const di = i + index;
new_h = val;
const hh = &self.hash_head[new_h & hash_mask];
// Get previous value with the same hash.
// Our chain should point to the previous value.
self.hash_prev[di & window_mask] = hh.*;
// Set the head of the hash chain to us.
hh.* = @as(u32, @intCast(di + self.hash_offset));
}
self.hash = new_h;
}
// Update window information.
self.window_end = n;
self.index = @as(u32, @intCast(n));
}
const Match = struct {
length: u32,
offset: u32,
ok: bool,
};
// Try to find a match starting at pos whose length is greater than prev_length.
// We only look at self.compression_level.chain possibilities before giving up.
fn findMatch(
self: *Self,
pos: u32,
prev_head: u32,
prev_length: u32,
lookahead: u32,
) Match {
var length: u32 = 0;
var offset: u32 = 0;
var ok: bool = false;
var min_match_look: u32 = max_match_length;
if (lookahead < min_match_look) {
min_match_look = lookahead;
}
var win = self.window[0 .. pos + min_match_look];
// We quit when we get a match that's at least nice long
var nice = win.len - pos;
if (self.compression_level.nice < nice) {
nice = self.compression_level.nice;
}
// If we've got a match that's good enough, only look in 1/4 the chain.
var tries = self.compression_level.chain;
length = prev_length;
if (length >= self.compression_level.good) {
tries >>= 2;
}
var w_end = win[pos + length];
const w_pos = win[pos..];
const min_index = pos -| window_size;
var i = prev_head;
while (tries > 0) : (tries -= 1) {
if (w_end == win[i + length]) {
const n = matchLen(win[i..], w_pos, min_match_look);
if (n > length and (n > min_match_length or pos - i <= 4096)) {
length = n;
offset = pos - i;
ok = true;
if (n >= nice) {
// The match is good enough that we don't try to find a better one.
break;
}
w_end = win[pos + n];
}
}
if (i == min_index) {
// hash_prev[i & window_mask] has already been overwritten, so stop now.
break;
}
if (@as(u32, @intCast(self.hash_prev[i & window_mask])) < self.hash_offset) {
break;
}
i = @as(u32, @intCast(self.hash_prev[i & window_mask])) - self.hash_offset;
if (i < min_index) {
break;
}
}
return Match{ .length = length, .offset = offset, .ok = ok };
}
fn writeStoredBlock(self: *Self, buf: []u8) !void {
try self.hm_bw.writeStoredHeader(buf.len, false);
try self.hm_bw.writeBytes(buf);
}
// encSpeed will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
fn encSpeed(self: *Self) !void {
// We only compress if we have max_store_block_size.
if (self.window_end < max_store_block_size) {
if (!self.sync) {
return;
}
// Handle small sizes.
if (self.window_end < 128) {
switch (self.window_end) {
0 => return,
1...16 => {
try self.writeStoredBlock(self.window[0..self.window_end]);
},
else => {
try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
self.err = self.hm_bw.err;
},
}
self.window_end = 0;
self.best_speed_enc.reset();
return;
}
}
// Encode the block.
self.tokens_count = 0;
self.best_speed_enc.encode(
self.tokens,
&self.tokens_count,
self.window[0..self.window_end],
);
// If we removed less than 1/16th, Huffman compress the block.
if (self.tokens_count > self.window_end - (self.window_end >> 4)) {
try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
} else {
try self.hm_bw.writeBlockDynamic(
self.tokens[0..self.tokens_count],
false,
self.window[0..self.window_end],
);
}
self.err = self.hm_bw.err;
self.window_end = 0;
}
fn initDeflate(self: *Self) !void {
self.window = try self.allocator.alloc(u8, 2 * window_size);
self.hash_offset = 1;
self.tokens = try self.allocator.alloc(token.Token, max_flate_block_tokens);
self.tokens_count = 0;
@memset(self.tokens, 0);
self.length = min_match_length - 1;
self.offset = 0;
self.byte_available = false;
self.index = 0;
self.hash = 0;
self.chain_head = 0;
self.bulk_hasher = bulkHash4;
}
fn deflate(self: *Self) !void {
if (self.window_end - self.index < min_match_length + max_match_length and !self.sync) {
return;
}
self.max_insert_index = self.window_end -| (min_match_length - 1);
if (self.index < self.max_insert_index) {
self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
}
while (true) {
assert(self.index <= self.window_end);
const lookahead = self.window_end -| self.index;
if (lookahead < min_match_length + max_match_length) {
if (!self.sync) {
break;
}
assert(self.index <= self.window_end);
if (lookahead == 0) {
// Flush current output block if any.
if (self.byte_available) {
// There is still one pending token that needs to be flushed
self.tokens[self.tokens_count] = token.literalToken(@as(u32, @intCast(self.window[self.index - 1])));
self.tokens_count += 1;
self.byte_available = false;
}
if (self.tokens.len > 0) {
try self.writeBlock(self.tokens[0..self.tokens_count], self.index);
self.tokens_count = 0;
}
break;
}
}
if (self.index < self.max_insert_index) {
// Update the hash
self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
const hh = &self.hash_head[self.hash & hash_mask];
self.chain_head = @as(u32, @intCast(hh.*));
self.hash_prev[self.index & window_mask] = @as(u32, @intCast(self.chain_head));
hh.* = @as(u32, @intCast(self.index + self.hash_offset));
}
const prev_length = self.length;
const prev_offset = self.offset;
self.length = min_match_length - 1;
self.offset = 0;
const min_index = self.index -| window_size;
if (self.hash_offset <= self.chain_head and
self.chain_head - self.hash_offset >= min_index and
(self.compression_level.fast_skip_hashshing != skip_never and
lookahead > min_match_length - 1 or
self.compression_level.fast_skip_hashshing == skip_never and
lookahead > prev_length and
prev_length < self.compression_level.lazy))
{
{
const fmatch = self.findMatch(
self.index,
self.chain_head -| self.hash_offset,
min_match_length - 1,
@as(u32, @intCast(lookahead)),
);
if (fmatch.ok) {
self.length = fmatch.length;
self.offset = fmatch.offset;
}
}
}
if (self.compression_level.fast_skip_hashshing != skip_never and
self.length >= min_match_length or
self.compression_level.fast_skip_hashshing == skip_never and
prev_length >= min_match_length and
self.length <= prev_length)
{
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
if (self.compression_level.fast_skip_hashshing != skip_never) {
self.tokens[self.tokens_count] = token.matchToken(@as(u32, @intCast(self.length - base_match_length)), @as(u32, @intCast(self.offset - base_match_offset)));
self.tokens_count += 1;
} else {
self.tokens[self.tokens_count] = token.matchToken(
@as(u32, @intCast(prev_length - base_match_length)),
@as(u32, @intCast(prev_offset -| base_match_offset)),
);
self.tokens_count += 1;
}
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
// lookahead, the last two strings are not inserted into the hash
// table.
if (self.length <= self.compression_level.fast_skip_hashshing) {
var newIndex: u32 = 0;
if (self.compression_level.fast_skip_hashshing != skip_never) {
newIndex = self.index + self.length;
} else {
newIndex = self.index + prev_length - 1;
}
var index = self.index;
index += 1;
while (index < newIndex) : (index += 1) {
if (index < self.max_insert_index) {
self.hash = hash4(self.window[index .. index + min_match_length]);
// Get previous value with the same hash.
// Our chain should point to the previous value.
const hh = &self.hash_head[self.hash & hash_mask];
self.hash_prev[index & window_mask] = hh.*;
// Set the head of the hash chain to us.
hh.* = @as(u32, @intCast(index + self.hash_offset));
}
}
self.index = index;
if (self.compression_level.fast_skip_hashshing == skip_never) {
self.byte_available = false;
self.length = min_match_length - 1;
}
} else {
// For matches this long, we don't bother inserting each individual
// item into the table.
self.index += self.length;
if (self.index < self.max_insert_index) {
self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
}
}
if (self.tokens_count == max_flate_block_tokens) {
// The block includes the current character
try self.writeBlock(self.tokens[0..self.tokens_count], self.index);
self.tokens_count = 0;
}
} else {
if (self.compression_level.fast_skip_hashshing != skip_never or self.byte_available) {
var i = self.index -| 1;
if (self.compression_level.fast_skip_hashshing != skip_never) {
i = self.index;
}
self.tokens[self.tokens_count] = token.literalToken(@as(u32, @intCast(self.window[i])));
self.tokens_count += 1;
if (self.tokens_count == max_flate_block_tokens) {
try self.writeBlock(self.tokens[0..self.tokens_count], i + 1);
self.tokens_count = 0;
}
}
self.index += 1;
if (self.compression_level.fast_skip_hashshing == skip_never) {
self.byte_available = true;
}
}
}
}
fn fillStore(self: *Self, b: []const u8) u32 {
const n = std.compress.deflate.copy(self.window[self.window_end..], b);
self.window_end += n;
return @as(u32, @intCast(n));
}
fn store(self: *Self) !void {
if (self.window_end > 0 and (self.window_end == max_store_block_size or self.sync)) {
try self.writeStoredBlock(self.window[0..self.window_end]);
self.window_end = 0;
}
}
// storeHuff compresses and stores the currently added data
// when the self.window is full or we are at the end of the stream.
fn storeHuff(self: *Self) !void {
if (self.window_end < self.window.len and !self.sync or self.window_end == 0) {
return;
}
try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
self.err = self.hm_bw.err;
self.window_end = 0;
}
pub fn bytesWritten(self: *Self) usize {
return self.hm_bw.bytes_written;
}
/// Writes the compressed form of `input` to the underlying writer.
pub fn write(self: *Self, input: []const u8) !usize {
var buf = input;
// writes data to hm_bw, which will eventually write the
// compressed form of data to its underlying writer.
while (buf.len > 0) {
try self.step();
const filled = self.fill(buf);
buf = buf[filled..];
}
return input.len;
}
/// Flushes any pending data to the underlying writer.
/// It is useful mainly in compressed network protocols, to ensure that
/// a remote reader has enough data to reconstruct a packet.
/// Flush does not return until the data has been written.
/// Calling `flush()` when there is no pending data still causes the Writer
/// to emit a sync marker of at least 4 bytes.
/// If the underlying writer returns an error, `flush()` returns that error.
///
/// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
pub fn flush(self: *Self) !void {
self.sync = true;
try self.step();
try self.hm_bw.writeStoredHeader(0, false);
try self.hm_bw.flush();
self.sync = false;
return;
}
fn step(self: *Self) !void {
switch (self.compression) {
.no_compression => return self.store(),
.huffman_only => return self.storeHuff(),
.best_speed => return self.encSpeed(),
.default_compression,
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> return self.deflate(),
}
}
fn fill(self: *Self, b: []const u8) u32 {
switch (self.compression) {
.no_compression => return self.fillStore(b),
.huffman_only => return self.fillStore(b),
.best_speed => return self.fillStore(b),
.default_compression,
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> return self.fillDeflate(b),
}
}
fn init(
allocator: Allocator,
in_writer: WriterType,
options: CompressorOptions,
) !Self {
var s = Self{
.allocator = undefined,
.compression = undefined,
.compression_level = undefined,
.hm_bw = undefined, // HuffmanBitWriter
.bulk_hasher = undefined,
.sync = false,
.best_speed_enc = undefined, // Best speed encoder
.chain_head = 0,
.hash_head = undefined,
.hash_prev = undefined, // previous hash
.hash_offset = 0,
.index = 0,
.window = undefined,
.window_end = 0,
.block_start = 0,
.byte_available = false,
.tokens = undefined,
.tokens_count = 0,
.length = 0,
.offset = 0,
.hash = 0,
.max_insert_index = 0,
.err = false, // Error
.hash_match = undefined,
.dictionary = options.dictionary,
};
s.hm_bw = try hm_bw.huffmanBitWriter(allocator, in_writer);
s.allocator = allocator;
s.hash_head = try allocator.alloc(u32, hash_size);
s.hash_prev = try allocator.alloc(u32, window_size);
s.hash_match = try allocator.alloc(u32, max_match_length - 1);
@memset(s.hash_head, 0);
@memset(s.hash_prev, 0);
@memset(s.hash_match, 0);
switch (options.level) {
.no_compression => {
s.compression = options.level;
s.compression_level = levels(options.level);
s.window = try allocator.alloc(u8, max_store_block_size);
s.tokens = try allocator.alloc(token.Token, 0);
},
.huffman_only => {
s.compression = options.level;
s.compression_level = levels(options.level);
s.window = try allocator.alloc(u8, max_store_block_size);
s.tokens = try allocator.alloc(token.Token, 0);
},
.best_speed => {
s.compression = options.level;
s.compression_level = levels(options.level);
s.window = try allocator.alloc(u8, max_store_block_size);
s.tokens = try allocator.alloc(token.Token, max_store_block_size);
s.best_speed_enc = try allocator.create(fast.DeflateFast);
s.best_speed_enc.* = fast.deflateFast();
try s.best_speed_enc.init(allocator);
},
.default_compression => {
s.compression = .level_6;
s.compression_level = levels(.level_6);
try s.initDeflate();
if (options.dictionary != null) {
s.fillWindow(options.dictionary.?);
}
},
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> {
s.compression = options.level;
s.compression_level = levels(options.level);
try s.initDeflate();
if (options.dictionary != null) {
s.fillWindow(options.dictionary.?);
}
},
}
return s;
}
/// Release all allocated memory.
pub fn deinit(self: *Self) void {
self.hm_bw.deinit();
self.allocator.free(self.window);
self.allocator.free(self.tokens);
self.allocator.free(self.hash_head);
self.allocator.free(self.hash_prev);
self.allocator.free(self.hash_match);
if (self.compression == .best_speed) {
self.best_speed_enc.deinit();
self.allocator.destroy(self.best_speed_enc);
}
}
/// Reset discards the inner writer's state and replace the inner writer with new_writer.
/// new_writer must be of the same type as the previous writer.
pub fn reset(self: *Self, new_writer: WriterType) void {
self.hm_bw.reset(new_writer);
self.sync = false;
switch (self.compression) {
// Reset window
.no_compression => self.window_end = 0,
// Reset window, tokens, and encoder
.best_speed => {
self.window_end = 0;
self.tokens_count = 0;
self.best_speed_enc.reset();
},
// Reset everything and reinclude the dictionary if there is one
.huffman_only,
.default_compression,
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> {
self.chain_head = 0;
@memset(self.hash_head, 0);
@memset(self.hash_prev, 0);
self.hash_offset = 1;
self.index = 0;
self.window_end = 0;
self.block_start = 0;
self.byte_available = false;
self.tokens_count = 0;
self.length = min_match_length - 1;
self.offset = 0;
self.hash = 0;
self.max_insert_index = 0;
if (self.dictionary != null) {
self.fillWindow(self.dictionary.?);
}
},
}
}
/// Writes any pending data to the underlying writer.
pub fn close(self: *Self) !void {
self.sync = true;
try self.step();
try self.hm_bw.writeStoredHeader(0, true);
try self.hm_bw.flush();
return;
}
};
}
// tests
const expect = std.testing.expect;
const testing = std.testing;
const ArrayList = std.ArrayList;
const DeflateTest = struct {
in: []const u8,
level: Compression,
out: []const u8,
};
var deflate_tests = [_]DeflateTest{
// Level 0
.{
.in = &[_]u8{},
.level = .no_compression,
.out = &[_]u8{ 1, 0, 0, 255, 255 },
},
// Level -1
.{
.in = &[_]u8{0x11},
.level = .default_compression,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{0x11},
.level = .level_6,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
// Level 4
.{
.in = &[_]u8{0x11},
.level = .level_4,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
// Level 0
.{
.in = &[_]u8{0x11},
.level = .no_compression,
.out = &[_]u8{ 0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x12 },
.level = .no_compression,
.out = &[_]u8{ 0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
.level = .no_compression,
.out = &[_]u8{ 0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255 },
},
// Level 2
.{
.in = &[_]u8{},
.level = .level_2,
.out = &[_]u8{ 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{0x11},
.level = .level_2,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x12 },
.level = .level_2,
.out = &[_]u8{ 18, 20, 2, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
.level = .level_2,
.out = &[_]u8{ 18, 132, 2, 64, 0, 0, 0, 255, 255 },
},
// Level 9
.{
.in = &[_]u8{},
.level = .best_compression,
.out = &[_]u8{ 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{0x11},
.level = .best_compression,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x12 },
.level = .best_compression,
.out = &[_]u8{ 18, 20, 2, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
.level = .best_compression,
.out = &[_]u8{ 18, 132, 2, 64, 0, 0, 0, 255, 255 },
},
};
test "deflate" {
for (deflate_tests) |dt| {
var output = ArrayList(u8).init(testing.allocator);
defer output.deinit();
var comp = try compressor(testing.allocator, output.writer(), .{ .level = dt.level });
_ = try comp.write(dt.in);
try comp.close();
comp.deinit();
try testing.expectEqualSlices(u8, dt.out, output.items);
}
}
test "bulkHash4" {
for (deflate_tests) |x| {
if (x.out.len < min_match_length) {
continue;
}
// double the test data
var out = try testing.allocator.alloc(u8, x.out.len * 2);
defer testing.allocator.free(out);
@memcpy(out[0..x.out.len], x.out);
@memcpy(out[x.out.len..], x.out);
var j: usize = 4;
while (j < out.len) : (j += 1) {
var y = out[0..j];
const dst = try testing.allocator.alloc(u32, y.len - min_match_length + 1);
defer testing.allocator.free(dst);
_ = bulkHash4(y, dst);
for (dst, 0..) |got, i| {
const want = hash4(y[i..]);
try testing.expectEqual(want, got);
}
}
}
}