zig/lib/std/compress/deflate/compressor.zig
Igor Anić d645114f7e add deflate implemented from first principles
Zig deflate compression/decompression implementation. It supports compression and decompression of gzip, zlib and raw deflate format.

Fixes #18062.

This PR replaces current compress/gzip and compress/zlib packages. Deflate package is renamed to flate. Flate is common name for deflate/inflate where deflate is compression and inflate decompression.

There are breaking change. Methods signatures are changed because of removal of the allocator, and I also unified API for all three namespaces (flate, gzip, zlib).

Currently I put old packages under v1 namespace they are still available as compress/v1/gzip, compress/v1/zlib, compress/v1/deflate. Idea is to give users of the current API little time to postpone analyzing what they had to change. Although that rises question when it is safe to remove that v1 namespace.

Here is current API in the compress package:

```Zig
// deflate
    fn compressor(allocator, writer, options) !Compressor(@TypeOf(writer))
    fn Compressor(comptime WriterType) type

    fn decompressor(allocator, reader, null) !Decompressor(@TypeOf(reader))
    fn Decompressor(comptime ReaderType: type) type

// gzip
    fn compress(allocator, writer, options) !Compress(@TypeOf(writer))
    fn Compress(comptime WriterType: type) type

    fn decompress(allocator, reader) !Decompress(@TypeOf(reader))
    fn Decompress(comptime ReaderType: type) type

// zlib
    fn compressStream(allocator, writer, options) !CompressStream(@TypeOf(writer))
    fn CompressStream(comptime WriterType: type) type

    fn decompressStream(allocator, reader) !DecompressStream(@TypeOf(reader))
    fn DecompressStream(comptime ReaderType: type) type

// xz
   fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader))
   fn Decompress(comptime ReaderType: type) type

// lzma
    fn decompress(allocator, reader) !Decompress(@TypeOf(reader))
    fn Decompress(comptime ReaderType: type) type

// lzma2
    fn decompress(allocator, reader, writer !void

// zstandard:
    fn DecompressStream(ReaderType, options) type
    fn decompressStream(allocator, reader) DecompressStream(@TypeOf(reader), .{})
    struct decompress
```

The proposed naming convention:
 - Compressor/Decompressor for functions which return type, like Reader/Writer/GeneralPurposeAllocator
 - compressor/compressor for functions which are initializers for that type, like reader/writer/allocator
 - compress/decompress for one shot operations, accepts reader/writer pair, like read/write/alloc

```Zig
/// Compress from reader and write compressed data to the writer.
fn compress(reader: anytype, writer: anytype, options: Options) !void

/// Create Compressor which outputs the writer.
fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer))

/// Compressor type
fn Compressor(comptime WriterType: type) type

/// Decompress from reader and write plain data to the writer.
fn decompress(reader: anytype, writer: anytype) !void

/// Create Decompressor which reads from reader.
fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)

/// Decompressor type
fn Decompressor(comptime ReaderType: type) type

```

Comparing this implementation with the one we currently have in Zig's standard library (std).
Std is roughly 1.2-1.4 times slower in decompression, and 1.1-1.2 times slower in compression. Compressed sizes are pretty much same in both cases.
More resutls in [this](https://github.com/ianic/flate) repo.

This library uses static allocations for all structures, doesn't require allocator. That makes sense especially for deflate where all structures, internal buffers are allocated to the full size. Little less for inflate where we std version uses less memory by not preallocating to theoretical max size array which are usually not fully used.

For deflate this library allocates 395K while std 779K.
For inflate this library allocates 74.5K while std around 36K.

Inflate difference is because we here use 64K history instead of 32K in std.

If merged existing usage of compress gzip/zlib/deflate need some changes. Here is example with necessary changes in comments:

```Zig

const std = @import("std");

// To get this file:
// wget -nc -O war_and_peace.txt https://www.gutenberg.org/ebooks/2600.txt.utf-8
const data = @embedFile("war_and_peace.txt");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer std.debug.assert(gpa.deinit() == .ok);
    const allocator = gpa.allocator();

    try oldDeflate(allocator);
    try new(std.compress.flate, allocator);

    try oldZlib(allocator);
    try new(std.compress.zlib, allocator);

    try oldGzip(allocator);
    try new(std.compress.gzip, allocator);
}

pub fn new(comptime pkg: type, allocator: std.mem.Allocator) !void {
    var buf = std.ArrayList(u8).init(allocator);
    defer buf.deinit();

    // Compressor
    var cmp = try pkg.compressor(buf.writer(), .{});
    _ = try cmp.write(data);
    try cmp.finish();

    var fbs = std.io.fixedBufferStream(buf.items);
    // Decompressor
    var dcp = pkg.decompressor(fbs.reader());

    const plain = try dcp.reader().readAllAlloc(allocator, std.math.maxInt(usize));
    defer allocator.free(plain);
    try std.testing.expectEqualSlices(u8, data, plain);
}

pub fn oldDeflate(allocator: std.mem.Allocator) !void {
    const deflate = std.compress.v1.deflate;

    // Compressor
    var buf = std.ArrayList(u8).init(allocator);
    defer buf.deinit();
    // Remove allocator
    // Rename deflate -> flate
    var cmp = try deflate.compressor(allocator, buf.writer(), .{});
    _ = try cmp.write(data);
    try cmp.close(); // Rename to finish
    cmp.deinit(); // Remove

    // Decompressor
    var fbs = std.io.fixedBufferStream(buf.items);
    // Remove allocator and last param
    // Rename deflate -> flate
    // Remove try
    var dcp = try deflate.decompressor(allocator, fbs.reader(), null);
    defer dcp.deinit(); // Remove

    const plain = try dcp.reader().readAllAlloc(allocator, std.math.maxInt(usize));
    defer allocator.free(plain);
    try std.testing.expectEqualSlices(u8, data, plain);
}

pub fn oldZlib(allocator: std.mem.Allocator) !void {
    const zlib = std.compress.v1.zlib;

    var buf = std.ArrayList(u8).init(allocator);
    defer buf.deinit();

    // Compressor
    // Rename compressStream => compressor
    // Remove allocator
    var cmp = try zlib.compressStream(allocator, buf.writer(), .{});
    _ = try cmp.write(data);
    try cmp.finish();
    cmp.deinit(); // Remove

    var fbs = std.io.fixedBufferStream(buf.items);
    // Decompressor
    // decompressStream => decompressor
    // Remove allocator
    // Remove try
    var dcp = try zlib.decompressStream(allocator, fbs.reader());
    defer dcp.deinit(); // Remove

    const plain = try dcp.reader().readAllAlloc(allocator, std.math.maxInt(usize));
    defer allocator.free(plain);
    try std.testing.expectEqualSlices(u8, data, plain);
}

pub fn oldGzip(allocator: std.mem.Allocator) !void {
    const gzip = std.compress.v1.gzip;

    var buf = std.ArrayList(u8).init(allocator);
    defer buf.deinit();

    // Compressor
    // Rename compress => compressor
    // Remove allocator
    var cmp = try gzip.compress(allocator, buf.writer(), .{});
    _ = try cmp.write(data);
    try cmp.close(); // Rename to finisho
    cmp.deinit(); // Remove

    var fbs = std.io.fixedBufferStream(buf.items);
    // Decompressor
    // Rename decompress => decompressor
    // Remove allocator
    // Remove try
    var dcp = try gzip.decompress(allocator, fbs.reader());
    defer dcp.deinit(); // Remove

    const plain = try dcp.reader().readAllAlloc(allocator, std.math.maxInt(usize));
    defer allocator.free(plain);
    try std.testing.expectEqualSlices(u8, data, plain);
}

```
2024-02-14 18:28:20 +01:00

1111 lines
41 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
const fmt = std.fmt;
const io = std.io;
const math = std.math;
const mem = std.mem;
const Allocator = std.mem.Allocator;
const deflate_const = @import("deflate_const.zig");
const fast = @import("deflate_fast.zig");
const hm_bw = @import("huffman_bit_writer.zig");
const token = @import("token.zig");
pub const Compression = enum(i5) {
/// huffman_only disables Lempel-Ziv match searching and only performs Huffman
/// entropy encoding. This mode is useful in compressing data that has
/// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
/// that lacks an entropy encoder. Compression gains are achieved when
/// certain bytes in the input stream occur more frequently than others.
///
/// Note that huffman_only produces a compressed output that is
/// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
/// continue to be able to decompress this output.
huffman_only = -2,
/// Same as level_6
default_compression = -1,
/// Does not attempt any compression; only adds the necessary DEFLATE framing.
no_compression = 0,
/// Prioritizes speed over output size, based on Snappy's LZ77-style encoder
best_speed = 1,
level_2 = 2,
level_3 = 3,
level_4 = 4,
level_5 = 5,
level_6 = 6,
level_7 = 7,
level_8 = 8,
/// Prioritizes smaller output size over speed
best_compression = 9,
};
const log_window_size = 15;
const window_size = 1 << log_window_size;
const window_mask = window_size - 1;
// The LZ77 step produces a sequence of literal tokens and <length, offset>
// pair tokens. The offset is also known as distance. The underlying wire
// format limits the range of lengths and offsets. For example, there are
// 256 legitimate lengths: those in the range [3, 258]. This package's
// compressor uses a higher minimum match length, enabling optimizations
// such as finding matches via 32-bit loads and compares.
const base_match_length = deflate_const.base_match_length; // The smallest match length per the RFC section 3.2.5
const min_match_length = 4; // The smallest match length that the compressor actually emits
const max_match_length = deflate_const.max_match_length;
const base_match_offset = deflate_const.base_match_offset; // The smallest match offset
const max_match_offset = deflate_const.max_match_offset; // The largest match offset
// The maximum number of tokens we put into a single flate block, just to
// stop things from getting too large.
const max_flate_block_tokens = 1 << 14;
const max_store_block_size = deflate_const.max_store_block_size;
const hash_bits = 17; // After 17 performance degrades
const hash_size = 1 << hash_bits;
const hash_mask = (1 << hash_bits) - 1;
const max_hash_offset = 1 << 24;
const skip_never = math.maxInt(u32);
const CompressionLevel = struct {
good: u16,
lazy: u16,
nice: u16,
chain: u16,
fast_skip_hashshing: u32,
};
fn levels(compression: Compression) CompressionLevel {
switch (compression) {
.no_compression,
.best_speed, // best_speed uses a custom algorithm; see deflate_fast.zig
.huffman_only,
=> return .{
.good = 0,
.lazy = 0,
.nice = 0,
.chain = 0,
.fast_skip_hashshing = 0,
},
// For levels 2-3 we don't bother trying with lazy matches.
.level_2 => return .{
.good = 4,
.lazy = 0,
.nice = 16,
.chain = 8,
.fast_skip_hashshing = 5,
},
.level_3 => return .{
.good = 4,
.lazy = 0,
.nice = 32,
.chain = 32,
.fast_skip_hashshing = 6,
},
// Levels 4-9 use increasingly more lazy matching and increasingly stringent conditions for
// "good enough".
.level_4 => return .{
.good = 4,
.lazy = 4,
.nice = 16,
.chain = 16,
.fast_skip_hashshing = skip_never,
},
.level_5 => return .{
.good = 8,
.lazy = 16,
.nice = 32,
.chain = 32,
.fast_skip_hashshing = skip_never,
},
.default_compression,
.level_6,
=> return .{
.good = 8,
.lazy = 16,
.nice = 128,
.chain = 128,
.fast_skip_hashshing = skip_never,
},
.level_7 => return .{
.good = 8,
.lazy = 32,
.nice = 128,
.chain = 256,
.fast_skip_hashshing = skip_never,
},
.level_8 => return .{
.good = 32,
.lazy = 128,
.nice = 258,
.chain = 1024,
.fast_skip_hashshing = skip_never,
},
.best_compression => return .{
.good = 32,
.lazy = 258,
.nice = 258,
.chain = 4096,
.fast_skip_hashshing = skip_never,
},
}
}
// matchLen returns the number of matching bytes in a and b
// up to length 'max'. Both slices must be at least 'max'
// bytes in size.
fn matchLen(a: []u8, b: []u8, max: u32) u32 {
const bounded_a = a[0..max];
const bounded_b = b[0..max];
for (bounded_a, 0..) |av, i| {
if (bounded_b[i] != av) {
return @as(u32, @intCast(i));
}
}
return max;
}
const hash_mul = 0x1e35a7bd;
// hash4 returns a hash representation of the first 4 bytes
// of the supplied slice.
// The caller must ensure that b.len >= 4.
fn hash4(b: []u8) u32 {
return ((@as(u32, b[3]) |
@as(u32, b[2]) << 8 |
@as(u32, b[1]) << 16 |
@as(u32, b[0]) << 24) *% hash_mul) >> (32 - hash_bits);
}
// bulkHash4 will compute hashes using the same
// algorithm as hash4
fn bulkHash4(b: []u8, dst: []u32) u32 {
if (b.len < min_match_length) {
return 0;
}
var hb =
@as(u32, b[3]) |
@as(u32, b[2]) << 8 |
@as(u32, b[1]) << 16 |
@as(u32, b[0]) << 24;
dst[0] = (hb *% hash_mul) >> (32 - hash_bits);
const end = b.len - min_match_length + 1;
var i: u32 = 1;
while (i < end) : (i += 1) {
hb = (hb << 8) | @as(u32, b[i + 3]);
dst[i] = (hb *% hash_mul) >> (32 - hash_bits);
}
return hb;
}
pub const CompressorOptions = struct {
level: Compression = .default_compression,
dictionary: ?[]const u8 = null,
};
/// Returns a new Compressor compressing data at the given level.
/// Following zlib, levels range from 1 (best_speed) to 9 (best_compression);
/// higher levels typically run slower but compress more. Level 0
/// (no_compression) does not attempt any compression; it only adds the
/// necessary DEFLATE framing.
/// Level -1 (default_compression) uses the default compression level.
/// Level -2 (huffman_only) will use Huffman compression only, giving
/// a very fast compression for all types of input, but sacrificing considerable
/// compression efficiency.
///
/// `dictionary` is optional and initializes the new `Compressor` with a preset dictionary.
/// The returned Compressor behaves as if the dictionary had been written to it without producing
/// any compressed output. The compressed data written to hm_bw can only be decompressed by a
/// Decompressor initialized with the same dictionary.
///
/// The compressed data will be passed to the provided `writer`, see `writer()` and `write()`.
pub fn compressor(
allocator: Allocator,
writer: anytype,
options: CompressorOptions,
) !Compressor(@TypeOf(writer)) {
return Compressor(@TypeOf(writer)).init(allocator, writer, options);
}
pub fn Compressor(comptime WriterType: anytype) type {
return struct {
const Self = @This();
/// A Writer takes data written to it and writes the compressed
/// form of that data to an underlying writer.
pub const Writer = io.Writer(*Self, Error, write);
/// Returns a Writer that takes data written to it and writes the compressed
/// form of that data to an underlying writer.
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
pub const Error = WriterType.Error;
allocator: Allocator,
compression: Compression,
compression_level: CompressionLevel,
// Inner writer wrapped in a HuffmanBitWriter
hm_bw: hm_bw.HuffmanBitWriter(WriterType) = undefined,
bulk_hasher: *const fn ([]u8, []u32) u32,
sync: bool, // requesting flush
best_speed_enc: *fast.DeflateFast, // Encoder for best_speed
// Input hash chains
// hash_head[hashValue] contains the largest inputIndex with the specified hash value
// If hash_head[hashValue] is within the current window, then
// hash_prev[hash_head[hashValue] & window_mask] contains the previous index
// with the same hash value.
chain_head: u32,
hash_head: []u32, // [hash_size]u32,
hash_prev: []u32, // [window_size]u32,
hash_offset: u32,
// input window: unprocessed data is window[index..window_end]
index: u32,
window: []u8,
window_end: usize,
block_start: usize, // window index where current tokens start
byte_available: bool, // if true, still need to process window[index-1].
// queued output tokens
tokens: []token.Token,
tokens_count: u16,
// deflate state
length: u32,
offset: u32,
hash: u32,
max_insert_index: usize,
err: bool,
// hash_match must be able to contain hashes for the maximum match length.
hash_match: []u32, // [max_match_length - 1]u32,
// dictionary
dictionary: ?[]const u8,
fn fillDeflate(self: *Self, b: []const u8) u32 {
if (self.index >= 2 * window_size - (min_match_length + max_match_length)) {
// shift the window by window_size
mem.copyForwards(u8, self.window, self.window[window_size .. 2 * window_size]);
self.index -= window_size;
self.window_end -= window_size;
if (self.block_start >= window_size) {
self.block_start -= window_size;
} else {
self.block_start = math.maxInt(u32);
}
self.hash_offset += window_size;
if (self.hash_offset > max_hash_offset) {
const delta = self.hash_offset - 1;
self.hash_offset -= delta;
self.chain_head -|= delta;
// Iterate over slices instead of arrays to avoid copying
// the entire table onto the stack (https://golang.org/issue/18625).
for (self.hash_prev, 0..) |v, i| {
if (v > delta) {
self.hash_prev[i] = @as(u32, @intCast(v - delta));
} else {
self.hash_prev[i] = 0;
}
}
for (self.hash_head, 0..) |v, i| {
if (v > delta) {
self.hash_head[i] = @as(u32, @intCast(v - delta));
} else {
self.hash_head[i] = 0;
}
}
}
}
const n = std.compress.v1.deflate.copy(self.window[self.window_end..], b);
self.window_end += n;
return @as(u32, @intCast(n));
}
fn writeBlock(self: *Self, tokens: []token.Token, index: usize) !void {
if (index > 0) {
var window: ?[]u8 = null;
if (self.block_start <= index) {
window = self.window[self.block_start..index];
}
self.block_start = index;
try self.hm_bw.writeBlock(tokens, false, window);
return;
}
return;
}
// fillWindow will fill the current window with the supplied
// dictionary and calculate all hashes.
// This is much faster than doing a full encode.
// Should only be used after a reset.
fn fillWindow(self: *Self, in_b: []const u8) void {
var b = in_b;
// Do not fill window if we are in store-only mode (look at the fill() function to see
// Compressions which use fillStore() instead of fillDeflate()).
if (self.compression == .no_compression or
self.compression == .huffman_only or
self.compression == .best_speed)
{
return;
}
// fillWindow() must not be called with stale data
assert(self.index == 0 and self.window_end == 0);
// If we are given too much, cut it.
if (b.len > window_size) {
b = b[b.len - window_size ..];
}
// Add all to window.
@memcpy(self.window[0..b.len], b);
const n = b.len;
// Calculate 256 hashes at the time (more L1 cache hits)
const loops = (n + 256 - min_match_length) / 256;
var j: usize = 0;
while (j < loops) : (j += 1) {
const index = j * 256;
var end = index + 256 + min_match_length - 1;
if (end > n) {
end = n;
}
const to_check = self.window[index..end];
const dst_size = to_check.len - min_match_length + 1;
if (dst_size <= 0) {
continue;
}
const dst = self.hash_match[0..dst_size];
_ = self.bulk_hasher(to_check, dst);
var new_h: u32 = 0;
for (dst, 0..) |val, i| {
const di = i + index;
new_h = val;
const hh = &self.hash_head[new_h & hash_mask];
// Get previous value with the same hash.
// Our chain should point to the previous value.
self.hash_prev[di & window_mask] = hh.*;
// Set the head of the hash chain to us.
hh.* = @as(u32, @intCast(di + self.hash_offset));
}
self.hash = new_h;
}
// Update window information.
self.window_end = n;
self.index = @as(u32, @intCast(n));
}
const Match = struct {
length: u32,
offset: u32,
ok: bool,
};
// Try to find a match starting at pos whose length is greater than prev_length.
// We only look at self.compression_level.chain possibilities before giving up.
fn findMatch(
self: *Self,
pos: u32,
prev_head: u32,
prev_length: u32,
lookahead: u32,
) Match {
var length: u32 = 0;
var offset: u32 = 0;
var ok: bool = false;
var min_match_look: u32 = max_match_length;
if (lookahead < min_match_look) {
min_match_look = lookahead;
}
var win = self.window[0 .. pos + min_match_look];
// We quit when we get a match that's at least nice long
var nice = win.len - pos;
if (self.compression_level.nice < nice) {
nice = self.compression_level.nice;
}
// If we've got a match that's good enough, only look in 1/4 the chain.
var tries = self.compression_level.chain;
length = prev_length;
if (length >= self.compression_level.good) {
tries >>= 2;
}
var w_end = win[pos + length];
const w_pos = win[pos..];
const min_index = pos -| window_size;
var i = prev_head;
while (tries > 0) : (tries -= 1) {
if (w_end == win[i + length]) {
const n = matchLen(win[i..], w_pos, min_match_look);
if (n > length and (n > min_match_length or pos - i <= 4096)) {
length = n;
offset = pos - i;
ok = true;
if (n >= nice) {
// The match is good enough that we don't try to find a better one.
break;
}
w_end = win[pos + n];
}
}
if (i == min_index) {
// hash_prev[i & window_mask] has already been overwritten, so stop now.
break;
}
if (@as(u32, @intCast(self.hash_prev[i & window_mask])) < self.hash_offset) {
break;
}
i = @as(u32, @intCast(self.hash_prev[i & window_mask])) - self.hash_offset;
if (i < min_index) {
break;
}
}
return Match{ .length = length, .offset = offset, .ok = ok };
}
fn writeStoredBlock(self: *Self, buf: []u8) !void {
try self.hm_bw.writeStoredHeader(buf.len, false);
try self.hm_bw.writeBytes(buf);
}
// encSpeed will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
fn encSpeed(self: *Self) !void {
// We only compress if we have max_store_block_size.
if (self.window_end < max_store_block_size) {
if (!self.sync) {
return;
}
// Handle small sizes.
if (self.window_end < 128) {
switch (self.window_end) {
0 => return,
1...16 => {
try self.writeStoredBlock(self.window[0..self.window_end]);
},
else => {
try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
self.err = self.hm_bw.err;
},
}
self.window_end = 0;
self.best_speed_enc.reset();
return;
}
}
// Encode the block.
self.tokens_count = 0;
self.best_speed_enc.encode(
self.tokens,
&self.tokens_count,
self.window[0..self.window_end],
);
// If we removed less than 1/16th, Huffman compress the block.
if (self.tokens_count > self.window_end - (self.window_end >> 4)) {
try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
} else {
try self.hm_bw.writeBlockDynamic(
self.tokens[0..self.tokens_count],
false,
self.window[0..self.window_end],
);
}
self.err = self.hm_bw.err;
self.window_end = 0;
}
fn initDeflate(self: *Self) !void {
self.window = try self.allocator.alloc(u8, 2 * window_size);
self.hash_offset = 1;
self.tokens = try self.allocator.alloc(token.Token, max_flate_block_tokens);
self.tokens_count = 0;
@memset(self.tokens, 0);
self.length = min_match_length - 1;
self.offset = 0;
self.byte_available = false;
self.index = 0;
self.hash = 0;
self.chain_head = 0;
self.bulk_hasher = bulkHash4;
}
fn deflate(self: *Self) !void {
if (self.window_end - self.index < min_match_length + max_match_length and !self.sync) {
return;
}
self.max_insert_index = self.window_end -| (min_match_length - 1);
if (self.index < self.max_insert_index) {
self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
}
while (true) {
assert(self.index <= self.window_end);
const lookahead = self.window_end -| self.index;
if (lookahead < min_match_length + max_match_length) {
if (!self.sync) {
break;
}
assert(self.index <= self.window_end);
if (lookahead == 0) {
// Flush current output block if any.
if (self.byte_available) {
// There is still one pending token that needs to be flushed
self.tokens[self.tokens_count] = token.literalToken(@as(u32, @intCast(self.window[self.index - 1])));
self.tokens_count += 1;
self.byte_available = false;
}
if (self.tokens.len > 0) {
try self.writeBlock(self.tokens[0..self.tokens_count], self.index);
self.tokens_count = 0;
}
break;
}
}
if (self.index < self.max_insert_index) {
// Update the hash
self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
const hh = &self.hash_head[self.hash & hash_mask];
self.chain_head = @as(u32, @intCast(hh.*));
self.hash_prev[self.index & window_mask] = @as(u32, @intCast(self.chain_head));
hh.* = @as(u32, @intCast(self.index + self.hash_offset));
}
const prev_length = self.length;
const prev_offset = self.offset;
self.length = min_match_length - 1;
self.offset = 0;
const min_index = self.index -| window_size;
if (self.hash_offset <= self.chain_head and
self.chain_head - self.hash_offset >= min_index and
(self.compression_level.fast_skip_hashshing != skip_never and
lookahead > min_match_length - 1 or
self.compression_level.fast_skip_hashshing == skip_never and
lookahead > prev_length and
prev_length < self.compression_level.lazy))
{
{
const fmatch = self.findMatch(
self.index,
self.chain_head -| self.hash_offset,
min_match_length - 1,
@as(u32, @intCast(lookahead)),
);
if (fmatch.ok) {
self.length = fmatch.length;
self.offset = fmatch.offset;
}
}
}
if (self.compression_level.fast_skip_hashshing != skip_never and
self.length >= min_match_length or
self.compression_level.fast_skip_hashshing == skip_never and
prev_length >= min_match_length and
self.length <= prev_length)
{
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
if (self.compression_level.fast_skip_hashshing != skip_never) {
self.tokens[self.tokens_count] = token.matchToken(@as(u32, @intCast(self.length - base_match_length)), @as(u32, @intCast(self.offset - base_match_offset)));
self.tokens_count += 1;
} else {
self.tokens[self.tokens_count] = token.matchToken(
@as(u32, @intCast(prev_length - base_match_length)),
@as(u32, @intCast(prev_offset -| base_match_offset)),
);
self.tokens_count += 1;
}
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
// lookahead, the last two strings are not inserted into the hash
// table.
if (self.length <= self.compression_level.fast_skip_hashshing) {
var newIndex: u32 = 0;
if (self.compression_level.fast_skip_hashshing != skip_never) {
newIndex = self.index + self.length;
} else {
newIndex = self.index + prev_length - 1;
}
var index = self.index;
index += 1;
while (index < newIndex) : (index += 1) {
if (index < self.max_insert_index) {
self.hash = hash4(self.window[index .. index + min_match_length]);
// Get previous value with the same hash.
// Our chain should point to the previous value.
const hh = &self.hash_head[self.hash & hash_mask];
self.hash_prev[index & window_mask] = hh.*;
// Set the head of the hash chain to us.
hh.* = @as(u32, @intCast(index + self.hash_offset));
}
}
self.index = index;
if (self.compression_level.fast_skip_hashshing == skip_never) {
self.byte_available = false;
self.length = min_match_length - 1;
}
} else {
// For matches this long, we don't bother inserting each individual
// item into the table.
self.index += self.length;
if (self.index < self.max_insert_index) {
self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
}
}
if (self.tokens_count == max_flate_block_tokens) {
// The block includes the current character
try self.writeBlock(self.tokens[0..self.tokens_count], self.index);
self.tokens_count = 0;
}
} else {
if (self.compression_level.fast_skip_hashshing != skip_never or self.byte_available) {
var i = self.index -| 1;
if (self.compression_level.fast_skip_hashshing != skip_never) {
i = self.index;
}
self.tokens[self.tokens_count] = token.literalToken(@as(u32, @intCast(self.window[i])));
self.tokens_count += 1;
if (self.tokens_count == max_flate_block_tokens) {
try self.writeBlock(self.tokens[0..self.tokens_count], i + 1);
self.tokens_count = 0;
}
}
self.index += 1;
if (self.compression_level.fast_skip_hashshing == skip_never) {
self.byte_available = true;
}
}
}
}
fn fillStore(self: *Self, b: []const u8) u32 {
const n = std.compress.v1.deflate.copy(self.window[self.window_end..], b);
self.window_end += n;
return @as(u32, @intCast(n));
}
fn store(self: *Self) !void {
if (self.window_end > 0 and (self.window_end == max_store_block_size or self.sync)) {
try self.writeStoredBlock(self.window[0..self.window_end]);
self.window_end = 0;
}
}
// storeHuff compresses and stores the currently added data
// when the self.window is full or we are at the end of the stream.
fn storeHuff(self: *Self) !void {
if (self.window_end < self.window.len and !self.sync or self.window_end == 0) {
return;
}
try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
self.err = self.hm_bw.err;
self.window_end = 0;
}
pub fn bytesWritten(self: *Self) usize {
return self.hm_bw.bytes_written;
}
/// Writes the compressed form of `input` to the underlying writer.
pub fn write(self: *Self, input: []const u8) Error!usize {
var buf = input;
// writes data to hm_bw, which will eventually write the
// compressed form of data to its underlying writer.
while (buf.len > 0) {
try self.step();
const filled = self.fill(buf);
buf = buf[filled..];
}
return input.len;
}
/// Flushes any pending data to the underlying writer.
/// It is useful mainly in compressed network protocols, to ensure that
/// a remote reader has enough data to reconstruct a packet.
/// Flush does not return until the data has been written.
/// Calling `flush()` when there is no pending data still causes the Writer
/// to emit a sync marker of at least 4 bytes.
/// If the underlying writer returns an error, `flush()` returns that error.
///
/// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
pub fn flush(self: *Self) Error!void {
self.sync = true;
try self.step();
try self.hm_bw.writeStoredHeader(0, false);
try self.hm_bw.flush();
self.sync = false;
return;
}
fn step(self: *Self) !void {
switch (self.compression) {
.no_compression => return self.store(),
.huffman_only => return self.storeHuff(),
.best_speed => return self.encSpeed(),
.default_compression,
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> return self.deflate(),
}
}
fn fill(self: *Self, b: []const u8) u32 {
switch (self.compression) {
.no_compression => return self.fillStore(b),
.huffman_only => return self.fillStore(b),
.best_speed => return self.fillStore(b),
.default_compression,
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> return self.fillDeflate(b),
}
}
fn init(
allocator: Allocator,
in_writer: WriterType,
options: CompressorOptions,
) !Self {
var s = Self{
.allocator = undefined,
.compression = undefined,
.compression_level = undefined,
.hm_bw = undefined, // HuffmanBitWriter
.bulk_hasher = undefined,
.sync = false,
.best_speed_enc = undefined, // Best speed encoder
.chain_head = 0,
.hash_head = undefined,
.hash_prev = undefined, // previous hash
.hash_offset = 0,
.index = 0,
.window = undefined,
.window_end = 0,
.block_start = 0,
.byte_available = false,
.tokens = undefined,
.tokens_count = 0,
.length = 0,
.offset = 0,
.hash = 0,
.max_insert_index = 0,
.err = false, // Error
.hash_match = undefined,
.dictionary = options.dictionary,
};
s.hm_bw = try hm_bw.huffmanBitWriter(allocator, in_writer);
s.allocator = allocator;
s.hash_head = try allocator.alloc(u32, hash_size);
s.hash_prev = try allocator.alloc(u32, window_size);
s.hash_match = try allocator.alloc(u32, max_match_length - 1);
@memset(s.hash_head, 0);
@memset(s.hash_prev, 0);
@memset(s.hash_match, 0);
switch (options.level) {
.no_compression => {
s.compression = options.level;
s.compression_level = levels(options.level);
s.window = try allocator.alloc(u8, max_store_block_size);
s.tokens = try allocator.alloc(token.Token, 0);
},
.huffman_only => {
s.compression = options.level;
s.compression_level = levels(options.level);
s.window = try allocator.alloc(u8, max_store_block_size);
s.tokens = try allocator.alloc(token.Token, 0);
},
.best_speed => {
s.compression = options.level;
s.compression_level = levels(options.level);
s.window = try allocator.alloc(u8, max_store_block_size);
s.tokens = try allocator.alloc(token.Token, max_store_block_size);
s.best_speed_enc = try allocator.create(fast.DeflateFast);
s.best_speed_enc.* = fast.deflateFast();
try s.best_speed_enc.init(allocator);
},
.default_compression => {
s.compression = .level_6;
s.compression_level = levels(.level_6);
try s.initDeflate();
if (options.dictionary != null) {
s.fillWindow(options.dictionary.?);
}
},
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> {
s.compression = options.level;
s.compression_level = levels(options.level);
try s.initDeflate();
if (options.dictionary != null) {
s.fillWindow(options.dictionary.?);
}
},
}
return s;
}
/// Release all allocated memory.
pub fn deinit(self: *Self) void {
self.hm_bw.deinit();
self.allocator.free(self.window);
self.allocator.free(self.tokens);
self.allocator.free(self.hash_head);
self.allocator.free(self.hash_prev);
self.allocator.free(self.hash_match);
if (self.compression == .best_speed) {
self.best_speed_enc.deinit();
self.allocator.destroy(self.best_speed_enc);
}
}
/// Reset discards the inner writer's state and replace the inner writer with new_writer.
/// new_writer must be of the same type as the previous writer.
pub fn reset(self: *Self, new_writer: WriterType) void {
self.hm_bw.reset(new_writer);
self.sync = false;
switch (self.compression) {
// Reset window
.no_compression => self.window_end = 0,
// Reset window, tokens, and encoder
.best_speed => {
self.window_end = 0;
self.tokens_count = 0;
self.best_speed_enc.reset();
},
// Reset everything and reinclude the dictionary if there is one
.huffman_only,
.default_compression,
.level_2,
.level_3,
.level_4,
.level_5,
.level_6,
.level_7,
.level_8,
.best_compression,
=> {
self.chain_head = 0;
@memset(self.hash_head, 0);
@memset(self.hash_prev, 0);
self.hash_offset = 1;
self.index = 0;
self.window_end = 0;
self.block_start = 0;
self.byte_available = false;
self.tokens_count = 0;
self.length = min_match_length - 1;
self.offset = 0;
self.hash = 0;
self.max_insert_index = 0;
if (self.dictionary != null) {
self.fillWindow(self.dictionary.?);
}
},
}
}
/// Writes any pending data to the underlying writer.
pub fn close(self: *Self) Error!void {
self.sync = true;
try self.step();
try self.hm_bw.writeStoredHeader(0, true);
try self.hm_bw.flush();
return;
}
};
}
// tests
const expect = std.testing.expect;
const testing = std.testing;
const ArrayList = std.ArrayList;
const DeflateTest = struct {
in: []const u8,
level: Compression,
out: []const u8,
};
var deflate_tests = [_]DeflateTest{
// Level 0
.{
.in = &[_]u8{},
.level = .no_compression,
.out = &[_]u8{ 1, 0, 0, 255, 255 },
},
// Level -1
.{
.in = &[_]u8{0x11},
.level = .default_compression,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{0x11},
.level = .level_6,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
// Level 4
.{
.in = &[_]u8{0x11},
.level = .level_4,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
// Level 0
.{
.in = &[_]u8{0x11},
.level = .no_compression,
.out = &[_]u8{ 0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x12 },
.level = .no_compression,
.out = &[_]u8{ 0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
.level = .no_compression,
.out = &[_]u8{ 0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255 },
},
// Level 2
.{
.in = &[_]u8{},
.level = .level_2,
.out = &[_]u8{ 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{0x11},
.level = .level_2,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x12 },
.level = .level_2,
.out = &[_]u8{ 18, 20, 2, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
.level = .level_2,
.out = &[_]u8{ 18, 132, 2, 64, 0, 0, 0, 255, 255 },
},
// Level 9
.{
.in = &[_]u8{},
.level = .best_compression,
.out = &[_]u8{ 1, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{0x11},
.level = .best_compression,
.out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x12 },
.level = .best_compression,
.out = &[_]u8{ 18, 20, 2, 4, 0, 0, 255, 255 },
},
.{
.in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
.level = .best_compression,
.out = &[_]u8{ 18, 132, 2, 64, 0, 0, 0, 255, 255 },
},
};
test "deflate" {
for (deflate_tests) |dt| {
var output = ArrayList(u8).init(testing.allocator);
defer output.deinit();
var comp = try compressor(testing.allocator, output.writer(), .{ .level = dt.level });
_ = try comp.write(dt.in);
try comp.close();
comp.deinit();
try testing.expectEqualSlices(u8, dt.out, output.items);
}
}
test "bulkHash4" {
for (deflate_tests) |x| {
if (x.out.len < min_match_length) {
continue;
}
// double the test data
var out = try testing.allocator.alloc(u8, x.out.len * 2);
defer testing.allocator.free(out);
@memcpy(out[0..x.out.len], x.out);
@memcpy(out[x.out.len..], x.out);
var j: usize = 4;
while (j < out.len) : (j += 1) {
var y = out[0..j];
const dst = try testing.allocator.alloc(u32, y.len - min_match_length + 1);
defer testing.allocator.free(dst);
_ = bulkHash4(y, dst);
for (dst, 0..) |got, i| {
const want = hash4(y[i..]);
try testing.expectEqual(want, got);
}
}
}
}