mirror of
https://github.com/ziglang/zig.git
synced 2025-12-24 07:03:11 +00:00
std.compress.flate: finish reorganizing
This commit is contained in:
parent
73c98ca0e6
commit
824c157e0c
@ -1,7 +1,23 @@
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("../std.zig");
|
||||
const testing = std.testing;
|
||||
const Writer = std.Io.Writer;
|
||||
|
||||
/// When decompressing, the output buffer is used as the history window, so
|
||||
/// less than this may result in failure to decompress streams that were
|
||||
/// compressed with a larger window.
|
||||
pub const max_window_len = history_len * 2;
|
||||
|
||||
pub const history_len = 32768;
|
||||
|
||||
/// Deflate is a lossless data compression file format that uses a combination
|
||||
/// of LZ77 and Huffman coding.
|
||||
pub const Compress = @import("flate/Compress.zig");
|
||||
|
||||
/// Inflate is the decoding process that takes a Deflate bitstream for
|
||||
/// decompression and correctly produces the original full-size data or file.
|
||||
pub const Decompress = @import("flate/Decompress.zig");
|
||||
|
||||
/// Compression without Lempel-Ziv match searching. Faster compression, less
|
||||
/// memory requirements but bigger compressed sizes.
|
||||
pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
|
||||
|
||||
/// Container of the deflate bit stream body. Container adds header before
|
||||
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
|
||||
@ -13,7 +29,6 @@ const Writer = std.Io.Writer;
|
||||
/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
|
||||
/// crc32 checksum and 4 bytes of uncompressed data length.
|
||||
///
|
||||
///
|
||||
/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
|
||||
/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
|
||||
pub const Container = enum {
|
||||
@ -84,7 +99,7 @@ pub const Container = enum {
|
||||
pub fn init(containter: Container) Hasher {
|
||||
return switch (containter) {
|
||||
.gzip => .{ .gzip = .{} },
|
||||
.zlib => .{ .zlib = .init() },
|
||||
.zlib => .{ .zlib = .{} },
|
||||
.raw => .raw,
|
||||
};
|
||||
}
|
||||
@ -107,7 +122,7 @@ pub const Container = enum {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writeFooter(hasher: *Hasher, writer: *Writer) Writer.Error!void {
|
||||
pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
var bits: [4]u8 = undefined;
|
||||
switch (hasher.*) {
|
||||
.gzip => |*gzip| {
|
||||
@ -135,484 +150,6 @@ pub const Container = enum {
|
||||
};
|
||||
};
|
||||
|
||||
/// When decompressing, the output buffer is used as the history window, so
|
||||
/// less than this may result in failure to decompress streams that were
|
||||
/// compressed with a larger window.
|
||||
pub const max_window_len = 1 << 16;
|
||||
|
||||
/// Deflate is a lossless data compression file format that uses a combination
|
||||
/// of LZ77 and Huffman coding.
|
||||
pub const Compress = @import("flate/Compress.zig");
|
||||
|
||||
/// Inflate is the decoding process that takes a Deflate bitstream for
|
||||
/// decompression and correctly produces the original full-size data or file.
|
||||
pub const Decompress = @import("flate/Decompress.zig");
|
||||
|
||||
/// Compression without Lempel-Ziv match searching. Faster compression, less
|
||||
/// memory requirements but bigger compressed sizes.
|
||||
pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
|
||||
|
||||
test "compress/decompress" {
|
||||
const print = std.debug.print;
|
||||
var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer
|
||||
var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer
|
||||
|
||||
const levels = [_]Compress.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
|
||||
const cases = [_]struct {
|
||||
data: []const u8, // uncompressed content
|
||||
// compressed data sizes per level 4-9
|
||||
gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len,
|
||||
huffman_only_size: usize = 0,
|
||||
store_size: usize = 0,
|
||||
}{
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/rfc1951.txt"),
|
||||
.gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 },
|
||||
.huffman_only_size = 20287,
|
||||
.store_size = 36967,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/roundtrip1.input"),
|
||||
.gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 },
|
||||
.huffman_only_size = 393,
|
||||
.store_size = 393,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/roundtrip2.input"),
|
||||
.gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 },
|
||||
.huffman_only_size = 394,
|
||||
.store_size = 394,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/deflate-stream.expect"),
|
||||
.gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 },
|
||||
.huffman_only_size = 498,
|
||||
.store_size = 747,
|
||||
},
|
||||
};
|
||||
|
||||
for (cases, 0..) |case, case_no| {
|
||||
const data = case.data;
|
||||
|
||||
for (levels, 0..) |level, i| {
|
||||
for (Container.list) |container| {
|
||||
var compressed_size: usize = if (case.gzip_sizes[i] > 0)
|
||||
case.gzip_sizes[i] - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var compressed: Writer = .fixed(&cmp_buf);
|
||||
var compress: Compress = .init(&compressed, &.{}, .{ .container = .raw, .level = level });
|
||||
try compress.writer.writeAll(data);
|
||||
try compress.end();
|
||||
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
|
||||
compressed_size = compressed.end;
|
||||
}
|
||||
try testing.expectEqual(compressed_size, compressed.end);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
|
||||
var decompressed: Writer = .fixed(&dcm_buf);
|
||||
var decompress: Decompress = .init(&compressed, container, &.{});
|
||||
_ = try decompress.reader.streamRemaining(&decompressed);
|
||||
try testing.expectEqualSlices(u8, data, decompressed.buffered());
|
||||
}
|
||||
|
||||
// compressor writer interface
|
||||
{
|
||||
var compressed: Writer = .fixed(&cmp_buf);
|
||||
var cmp = try Compress.init(&compressed, &.{}, .{
|
||||
.level = level,
|
||||
.container = container,
|
||||
});
|
||||
var cmp_wrt = cmp.writer();
|
||||
try cmp_wrt.writeAll(data);
|
||||
try cmp.finish();
|
||||
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompressor reader interface
|
||||
{
|
||||
var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
|
||||
var decompress: Decompress = .init(&compressed, container, &.{});
|
||||
const n = try decompress.reader.readSliceShort(&dcm_buf);
|
||||
try testing.expectEqual(data.len, n);
|
||||
try testing.expectEqualSlices(u8, data, dcm_buf[0..n]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// huffman only compression
|
||||
{
|
||||
for (Container.list) |container| {
|
||||
var compressed_size: usize = if (case.huffman_only_size > 0)
|
||||
case.huffman_only_size - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original: std.Io.Reader = .fixed(data);
|
||||
var compressed: Writer = .fixed(&cmp_buf);
|
||||
var cmp = try Compress.Huffman.init(container, &compressed);
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.finish();
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
|
||||
var decompress: Decompress = .init(&compressed, container, &.{});
|
||||
var decompressed: Writer = .fixed(&dcm_buf);
|
||||
_ = try decompress.reader.streamRemaining(&decompressed);
|
||||
try testing.expectEqualSlices(u8, data, decompressed.buffered());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// store only
|
||||
{
|
||||
for (Container.list) |container| {
|
||||
var compressed_size: usize = if (case.store_size > 0)
|
||||
case.store_size - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original: std.Io.Reader = .fixed(data);
|
||||
var compressed: Writer = .fixed(&cmp_buf);
|
||||
var cmp = try Compress.SimpleCompressor(.store, container).init(&compressed);
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.finish();
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]);
|
||||
var decompress: Decompress = .init(&compressed, container, &.{});
|
||||
var decompressed: Writer = .fixed(&dcm_buf);
|
||||
_ = try decompress.reader.streamRemaining(&decompressed);
|
||||
try testing.expectEqualSlices(u8, data, decompressed.buffered());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn testDecompress(container: Container, compressed: []const u8, expected_plain: []const u8) !void {
|
||||
var in: std.Io.Reader = .fixed(compressed);
|
||||
var aw: std.Io.Writer.Allocating = .init(testing.allocator);
|
||||
defer aw.deinit();
|
||||
|
||||
var decompress: Decompress = .init(&in, container, &.{});
|
||||
_ = try decompress.reader.streamRemaining(&aw.writer);
|
||||
try testing.expectEqualSlices(u8, expected_plain, aw.getWritten());
|
||||
}
|
||||
|
||||
test "don't read past deflate stream's end" {
|
||||
try testDecompress(.zlib, &[_]u8{
|
||||
0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
|
||||
0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
|
||||
0x83, 0x95, 0x0b, 0xf5,
|
||||
}, &[_]u8{
|
||||
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
||||
0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
|
||||
0x00, 0x00, 0xff, 0xff, 0xff,
|
||||
});
|
||||
}
|
||||
|
||||
test "zlib header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{0x78}, ""),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
|
||||
);
|
||||
// Wrong CINFO
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
|
||||
);
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongZlibChecksum,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
|
||||
);
|
||||
}
|
||||
|
||||
test "gzip header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadGzipHeader,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongGzipChecksum,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Wrong initial size
|
||||
try testing.expectError(
|
||||
error.WrongGzipSize,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated initial size field
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
try testDecompress(.gzip, &[_]u8{
|
||||
// GZIP header
|
||||
0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
|
||||
// header.FHCRC (should cover entire header)
|
||||
0x99, 0xd6,
|
||||
// GZIP data
|
||||
0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}, "");
|
||||
}
|
||||
|
||||
test "public interface" {
|
||||
const plain_data_buf = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a };
|
||||
|
||||
// deflate final stored block, header + plain (stored) data
|
||||
const deflate_block = [_]u8{
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
} ++ plain_data_buf;
|
||||
|
||||
const plain_data: []const u8 = &plain_data_buf;
|
||||
const gzip_data: []const u8 = &deflate_block;
|
||||
|
||||
//// gzip header/footer + deflate block
|
||||
//const gzip_data =
|
||||
// [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes)
|
||||
// deflate_block ++
|
||||
// [_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes)
|
||||
|
||||
//// zlib header/footer + deflate block
|
||||
//const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)}
|
||||
// deflate_block ++
|
||||
// [_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum
|
||||
|
||||
// TODO
|
||||
//const gzip = @import("gzip.zig");
|
||||
//const zlib = @import("zlib.zig");
|
||||
|
||||
var buffer1: [64]u8 = undefined;
|
||||
var buffer2: [64]u8 = undefined;
|
||||
|
||||
// decompress
|
||||
{
|
||||
var plain: Writer = .fixed(&buffer2);
|
||||
var in: std.Io.Reader = .fixed(gzip_data);
|
||||
var d: Decompress = .init(&in, .raw, &.{});
|
||||
_ = try d.reader.streamRemaining(&plain);
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.buffered());
|
||||
}
|
||||
|
||||
// compress/decompress
|
||||
{
|
||||
var plain: Writer = .fixed(&buffer2);
|
||||
var compressed: Writer = .fixed(&buffer1);
|
||||
|
||||
var cmp: Compress = .init(&compressed, &.{}, .{});
|
||||
try cmp.writer.writeAll(plain_data);
|
||||
try cmp.end();
|
||||
|
||||
var r: std.Io.Reader = .fixed(&buffer1);
|
||||
var d: Decompress = .init(&r, .raw, &.{});
|
||||
_ = try d.reader.streamRemaining(&plain);
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.buffered());
|
||||
}
|
||||
|
||||
// compressor/decompressor
|
||||
{
|
||||
var plain: Writer = .fixed(&buffer2);
|
||||
var compressed: Writer = .fixed(&buffer1);
|
||||
|
||||
var cmp: Compress = .init(&compressed, &.{}, .{});
|
||||
try cmp.writer.writeAll(plain_data);
|
||||
try cmp.end();
|
||||
|
||||
var r: std.Io.Reader = .fixed(&buffer1);
|
||||
var dcp = Decompress(&r);
|
||||
try dcp.decompress(&plain);
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.buffered());
|
||||
}
|
||||
|
||||
// huffman
|
||||
{
|
||||
// huffman compress/decompress
|
||||
{
|
||||
var plain: Writer = .fixed(&buffer2);
|
||||
var compressed: Writer = .fixed(&buffer1);
|
||||
|
||||
var in: std.Io.Reader = .fixed(plain_data);
|
||||
try HuffmanEncoder.compress(&in, &compressed);
|
||||
|
||||
var r: std.Io.Reader = .fixed(&buffer1);
|
||||
var d: Decompress = .init(&r, .raw, &.{});
|
||||
_ = try d.reader.streamRemaining(&plain);
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.buffered());
|
||||
}
|
||||
|
||||
// huffman compressor/decompressor
|
||||
{
|
||||
var plain: Writer = .fixed(&buffer2);
|
||||
var compressed: Writer = .fixed(&buffer1);
|
||||
|
||||
var in: std.Io.Reader = .fixed(plain_data);
|
||||
var cmp = try HuffmanEncoder.Compressor(&compressed);
|
||||
try cmp.compress(&in);
|
||||
try cmp.finish();
|
||||
|
||||
var r: std.Io.Reader = .fixed(&buffer1);
|
||||
var d: Decompress = .init(&r, .raw, &.{});
|
||||
_ = try d.reader.streamRemaining(&plain);
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.buffered());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO
|
||||
//{
|
||||
// // store compress/decompress
|
||||
// {
|
||||
// var plain: Writer = .fixed(&buffer2);
|
||||
// var compressed: Writer = .fixed(&buffer1);
|
||||
|
||||
// var in: std.Io.Reader = .fixed(plain_data);
|
||||
// try store.compress(&in, &compressed);
|
||||
|
||||
// var r: std.Io.Reader = .fixed(&buffer1);
|
||||
// var d: Decompress = .init(&r, .raw, &.{});
|
||||
// _ = try d.reader.streamRemaining(&plain);
|
||||
// try testing.expectEqualSlices(u8, plain_data, plain.buffered());
|
||||
// }
|
||||
|
||||
// // store compressor/decompressor
|
||||
// {
|
||||
// var plain: Writer = .fixed(&buffer2);
|
||||
// var compressed: Writer = .fixed(&buffer1);
|
||||
|
||||
// var in: std.Io.Reader = .fixed(plain_data);
|
||||
// var cmp = try store.compressor(&compressed);
|
||||
// try cmp.compress(&in);
|
||||
// try cmp.finish();
|
||||
|
||||
// var r: std.Io.Reader = .fixed(&buffer1);
|
||||
// var d: Decompress = .init(&r, .raw, &.{});
|
||||
// _ = try d.reader.streamRemaining(&plain);
|
||||
// try testing.expectEqualSlices(u8, plain_data, plain.buffered());
|
||||
// }
|
||||
//}
|
||||
}
|
||||
|
||||
pub const match = struct {
|
||||
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
|
||||
pub const min_length = 4; // min length used in this algorithm
|
||||
pub const max_length = 258;
|
||||
|
||||
pub const min_distance = 1;
|
||||
pub const max_distance = 32768;
|
||||
};
|
||||
|
||||
pub const history_len = match.max_distance;
|
||||
|
||||
pub const lookup = struct {
|
||||
pub const bits = 15;
|
||||
pub const len = 1 << bits;
|
||||
pub const shift = 32 - bits;
|
||||
};
|
||||
|
||||
test "zlib should not overshoot" {
|
||||
// Compressed zlib data with extra 4 bytes at the end.
|
||||
const data = [_]u8{
|
||||
0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9,
|
||||
0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08,
|
||||
0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34,
|
||||
0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12,
|
||||
};
|
||||
|
||||
var reader: std.Io.Reader = .fixed(&data);
|
||||
|
||||
var decompress: Decompress = .init(&reader, .zlib, &.{});
|
||||
var out: [128]u8 = undefined;
|
||||
|
||||
{
|
||||
const n = try decompress.reader.readSliceShort(out[0..]);
|
||||
|
||||
// Expected decompressed data
|
||||
try std.testing.expectEqual(46, n);
|
||||
try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]);
|
||||
|
||||
// Decompressor don't overshoot underlying reader.
|
||||
// It is leaving it at the end of compressed data chunk.
|
||||
try std.testing.expectEqual(data.len - 4, reader.seek);
|
||||
// TODO what was this testing, exactly?
|
||||
//try std.testing.expectEqual(0, decompress.unreadBytes());
|
||||
}
|
||||
|
||||
// 4 bytes after compressed chunk are available in reader.
|
||||
const n = try reader.readSliceShort(out[0..]);
|
||||
try std.testing.expectEqual(n, 4);
|
||||
try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]);
|
||||
}
|
||||
|
||||
test {
|
||||
_ = HuffmanEncoder;
|
||||
_ = Compress;
|
||||
|
||||
@ -31,7 +31,26 @@ fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code,
|
||||
fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
|
||||
distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
|
||||
|
||||
pub fn init(bw: *BlockWriter) void {
|
||||
pub fn init(output: *Writer) BlockWriter {
|
||||
return .{
|
||||
.output = output,
|
||||
.codegen_freq = undefined,
|
||||
.literal_freq = undefined,
|
||||
.distance_freq = undefined,
|
||||
.codegen = undefined,
|
||||
.literal_encoding = undefined,
|
||||
.distance_encoding = undefined,
|
||||
.codegen_encoding = undefined,
|
||||
.fixed_literal_encoding = undefined,
|
||||
.fixed_distance_encoding = undefined,
|
||||
.huff_distance = undefined,
|
||||
.fixed_literal_codes = undefined,
|
||||
.fixed_distance_codes = undefined,
|
||||
.distance_codes = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn initBuffers(bw: *BlockWriter) void {
|
||||
bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes);
|
||||
bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes);
|
||||
bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes);
|
||||
|
||||
@ -122,22 +122,7 @@ pub const Options = struct {
|
||||
|
||||
pub fn init(output: *Writer, buffer: []u8, options: Options) Compress {
|
||||
return .{
|
||||
.block_writer = .{
|
||||
.output = output,
|
||||
.codegen_freq = undefined,
|
||||
.literal_freq = undefined,
|
||||
.distance_freq = undefined,
|
||||
.codegen = undefined,
|
||||
.literal_encoding = undefined,
|
||||
.distance_encoding = undefined,
|
||||
.codegen_encoding = undefined,
|
||||
.fixed_literal_encoding = undefined,
|
||||
.fixed_distance_encoding = undefined,
|
||||
.huff_distance = undefined,
|
||||
.fixed_literal_codes = undefined,
|
||||
.fixed_distance_codes = undefined,
|
||||
.distance_codes = undefined,
|
||||
},
|
||||
.block_writer = .init(output),
|
||||
.level = .get(options.level),
|
||||
.hasher = .init(options.container),
|
||||
.state = .header,
|
||||
@ -188,20 +173,21 @@ fn drain(me: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize
|
||||
}
|
||||
|
||||
const buffered = me.buffered();
|
||||
const min_lookahead = flate.match.min_length + flate.match.max_length;
|
||||
const min_lookahead = Token.min_length + Token.max_length;
|
||||
const history_plus_lookahead_len = flate.history_len + min_lookahead;
|
||||
if (buffered.len < history_plus_lookahead_len) return 0;
|
||||
const lookahead = buffered[flate.history_len..];
|
||||
|
||||
_ = lookahead;
|
||||
// TODO tokenize
|
||||
_ = lookahead;
|
||||
//c.hasher.update(lookahead[0..n]);
|
||||
@panic("TODO");
|
||||
}
|
||||
|
||||
pub fn end(c: *Compress) !void {
|
||||
try endUnflushed(c);
|
||||
try c.output.flush();
|
||||
const out = c.block_writer.output;
|
||||
try out.flush();
|
||||
}
|
||||
|
||||
pub fn endUnflushed(c: *Compress) !void {
|
||||
@ -227,7 +213,7 @@ pub fn endUnflushed(c: *Compress) !void {
|
||||
// Checksum value of the uncompressed data (excluding any
|
||||
// dictionary data) computed according to Adler-32
|
||||
// algorithm.
|
||||
std.mem.writeInt(u32, try out.writableArray(4), zlib.final, .big);
|
||||
std.mem.writeInt(u32, try out.writableArray(4), zlib.adler, .big);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
@ -243,15 +229,16 @@ pub const Simple = struct {
|
||||
|
||||
pub const Strategy = enum { huffman, store };
|
||||
|
||||
pub fn init(out: *Writer, buffer: []u8, container: Container) !Simple {
|
||||
const self: Simple = .{
|
||||
pub fn init(output: *Writer, buffer: []u8, container: Container, strategy: Strategy) !Simple {
|
||||
const header = container.header();
|
||||
try output.writeAll(header);
|
||||
return .{
|
||||
.buffer = buffer,
|
||||
.wp = 0,
|
||||
.block_writer = .init(out),
|
||||
.block_writer = .init(output),
|
||||
.hasher = .init(container),
|
||||
.strategy = strategy,
|
||||
};
|
||||
try container.writeHeader(self.out);
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn flush(self: *Simple) !void {
|
||||
@ -263,7 +250,7 @@ pub const Simple = struct {
|
||||
pub fn finish(self: *Simple) !void {
|
||||
try self.flushBuffer(true);
|
||||
try self.block_writer.flush();
|
||||
try self.hasher.container().writeFooter(&self.hasher, self.out);
|
||||
try self.hasher.container().writeFooter(&self.hasher, self.block_writer.output);
|
||||
}
|
||||
|
||||
fn flushBuffer(self: *Simple, final: bool) !void {
|
||||
@ -300,7 +287,13 @@ test "generate a Huffman code from an array of frequencies" {
|
||||
};
|
||||
|
||||
var codes: [19]HuffmanEncoder.Code = undefined;
|
||||
var enc: HuffmanEncoder = .{ .codes = &codes };
|
||||
var enc: HuffmanEncoder = .{
|
||||
.codes = &codes,
|
||||
.freq_cache = undefined,
|
||||
.bit_count = undefined,
|
||||
.lns = undefined,
|
||||
.lfs = undefined,
|
||||
};
|
||||
enc.generate(freqs[0..], 7);
|
||||
|
||||
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
|
||||
@ -337,247 +330,3 @@ test "generate a Huffman code from an array of frequencies" {
|
||||
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
|
||||
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
|
||||
}
|
||||
|
||||
test "tokenization" {
|
||||
const L = Token.initLiteral;
|
||||
const M = Token.initMatch;
|
||||
|
||||
const cases = [_]struct {
|
||||
data: []const u8,
|
||||
tokens: []const Token,
|
||||
}{
|
||||
.{
|
||||
.data = "Blah blah blah blah blah!",
|
||||
.tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') },
|
||||
},
|
||||
.{
|
||||
.data = "ABCDEABCD ABCDEABCD",
|
||||
.tokens = &[_]Token{
|
||||
L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '),
|
||||
L('A'), M(10, 8),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |c| {
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
|
||||
var cw = std.Io.countingWriter(std.Io.null_writer);
|
||||
const cww = cw.writer();
|
||||
var df = try Compress(container, @TypeOf(cww), TestTokenWriter).init(cww, .{});
|
||||
|
||||
_ = try df.write(c.data);
|
||||
try df.flush();
|
||||
|
||||
// df.token_writer.show();
|
||||
try expect(df.block_writer.pos == c.tokens.len); // number of tokens written
|
||||
try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match
|
||||
|
||||
try testing.expectEqual(container.headerSize(), cw.bytes_written);
|
||||
try df.finish();
|
||||
try testing.expectEqual(container.size(), cw.bytes_written);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that tokens written are equal to expected token list.
|
||||
const TestTokenWriter = struct {
|
||||
const Self = @This();
|
||||
|
||||
pos: usize = 0,
|
||||
actual: [128]Token = undefined,
|
||||
|
||||
pub fn init(_: anytype) Self {
|
||||
return .{};
|
||||
}
|
||||
pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
|
||||
for (tokens) |t| {
|
||||
self.actual[self.pos] = t;
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
|
||||
|
||||
pub fn get(self: *Self) []Token {
|
||||
return self.actual[0..self.pos];
|
||||
}
|
||||
|
||||
pub fn show(self: *Self) void {
|
||||
std.debug.print("\n", .{});
|
||||
for (self.get()) |t| {
|
||||
t.show();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flush(_: *Self) !void {}
|
||||
};
|
||||
|
||||
test "file tokenization" {
|
||||
const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
|
||||
const cases = [_]struct {
|
||||
data: []const u8, // uncompressed content
|
||||
// expected number of tokens producet in deflate tokenization
|
||||
tokens_count: [levels.len]usize = .{0} ** levels.len,
|
||||
}{
|
||||
.{
|
||||
.data = @embedFile("testdata/rfc1951.txt"),
|
||||
.tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 },
|
||||
},
|
||||
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-null-max.input"),
|
||||
.tokens_count = .{ 257, 257, 257, 257, 257, 257 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-pi.input"),
|
||||
.tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-text.input"),
|
||||
.tokens_count = .{ 235, 234, 234, 234, 234, 234 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/fuzz/roundtrip1.input"),
|
||||
.tokens_count = .{ 333, 331, 331, 331, 331, 331 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/fuzz/roundtrip2.input"),
|
||||
.tokens_count = .{ 334, 334, 334, 334, 334, 334 },
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| { // for each case
|
||||
const data = case.data;
|
||||
|
||||
for (levels, 0..) |level, i| { // for each compression level
|
||||
var original: std.Io.Reader = .fixed(data);
|
||||
|
||||
// buffer for decompressed data
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
const writer = al.writer();
|
||||
|
||||
// create compressor
|
||||
const WriterType = @TypeOf(writer);
|
||||
const TokenWriter = TokenDecoder(@TypeOf(writer));
|
||||
var cmp = try Compress(.raw, WriterType, TokenWriter).init(writer, .{ .level = level });
|
||||
|
||||
// Stream uncompressed `original` data to the compressor. It will
|
||||
// produce tokens list and pass that list to the TokenDecoder. This
|
||||
// TokenDecoder uses CircularBuffer from inflate to convert list of
|
||||
// tokens back to the uncompressed stream.
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.flush();
|
||||
const expected_count = case.tokens_count[i];
|
||||
const actual = cmp.block_writer.tokens_count;
|
||||
if (expected_count == 0) {
|
||||
std.debug.print("actual token count {d}\n", .{actual});
|
||||
} else {
|
||||
try testing.expectEqual(expected_count, actual);
|
||||
}
|
||||
|
||||
try testing.expectEqual(data.len, al.items.len);
|
||||
try testing.expectEqualSlices(u8, data, al.items);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const TokenDecoder = struct {
|
||||
output: *Writer,
|
||||
tokens_count: usize,
|
||||
|
||||
pub fn init(output: *Writer) TokenDecoder {
|
||||
return .{
|
||||
.output = output,
|
||||
.tokens_count = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn write(self: *TokenDecoder, tokens: []const Token, _: bool, _: ?[]const u8) !void {
|
||||
self.tokens_count += tokens.len;
|
||||
for (tokens) |t| {
|
||||
switch (t.kind) {
|
||||
.literal => self.hist.write(t.literal()),
|
||||
.match => try self.hist.writeMatch(t.length(), t.distance()),
|
||||
}
|
||||
if (self.hist.free() < 285) try self.flushWin();
|
||||
}
|
||||
try self.flushWin();
|
||||
}
|
||||
|
||||
fn flushWin(self: *TokenDecoder) !void {
|
||||
while (true) {
|
||||
const buf = self.hist.read();
|
||||
if (buf.len == 0) break;
|
||||
try self.output.writeAll(buf);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
test "store simple compressor" {
|
||||
if (true) return error.SkipZigTest;
|
||||
//const data = "Hello world!";
|
||||
//const expected = [_]u8{
|
||||
// 0x1, // block type 0, final bit set
|
||||
// 0xc, 0x0, // len = 12
|
||||
// 0xf3, 0xff, // ~len
|
||||
// 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', //
|
||||
// //0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
|
||||
//};
|
||||
|
||||
//var fbs: std.Io.Reader = .fixed(data);
|
||||
//var al = std.ArrayList(u8).init(testing.allocator);
|
||||
//defer al.deinit();
|
||||
|
||||
//var cmp = try store.compressor(.raw, al.writer());
|
||||
//try cmp.compress(&fbs);
|
||||
//try cmp.finish();
|
||||
//try testing.expectEqualSlices(u8, &expected, al.items);
|
||||
|
||||
//fbs = .fixed(data);
|
||||
//try al.resize(0);
|
||||
|
||||
//// huffman only compresoor will also emit store block for this small sample
|
||||
//var hc = try huffman.compressor(.raw, al.writer());
|
||||
//try hc.compress(&fbs);
|
||||
//try hc.finish();
|
||||
//try testing.expectEqualSlices(u8, &expected, al.items);
|
||||
}
|
||||
|
||||
test "sliding window match" {
|
||||
const data = "Blah blah blah blah blah!";
|
||||
var win: Writer = .{};
|
||||
try expect(win.write(data) == data.len);
|
||||
try expect(win.wp == data.len);
|
||||
try expect(win.rp == 0);
|
||||
|
||||
// length between l symbols
|
||||
try expect(win.match(1, 6, 0) == 18);
|
||||
try expect(win.match(1, 11, 0) == 13);
|
||||
try expect(win.match(1, 16, 0) == 8);
|
||||
try expect(win.match(1, 21, 0) == 0);
|
||||
|
||||
// position 15 = "blah blah!"
|
||||
// position 20 = "blah!"
|
||||
try expect(win.match(15, 20, 0) == 4);
|
||||
try expect(win.match(15, 20, 3) == 4);
|
||||
try expect(win.match(15, 20, 4) == 0);
|
||||
}
|
||||
|
||||
test "sliding window slide" {
|
||||
var win: Writer = .{};
|
||||
win.wp = Writer.buffer_len - 11;
|
||||
win.rp = Writer.buffer_len - 111;
|
||||
win.buffer[win.rp] = 0xab;
|
||||
try expect(win.lookahead().len == 100);
|
||||
try expect(win.tokensBuffer().?.len == win.rp);
|
||||
|
||||
const n = win.slide();
|
||||
try expect(n == 32757);
|
||||
try expect(win.buffer[win.rp] == 0xab);
|
||||
try expect(win.rp == Writer.hist_len - 111);
|
||||
try expect(win.wp == Writer.hist_len - 11);
|
||||
try expect(win.lookahead().len == 100);
|
||||
try expect(win.tokensBuffer() == null);
|
||||
}
|
||||
|
||||
@ -4,8 +4,8 @@ const Container = flate.Container;
|
||||
const Token = @import("Token.zig");
|
||||
const testing = std.testing;
|
||||
const Decompress = @This();
|
||||
const Writer = std.io.Writer;
|
||||
const Reader = std.io.Reader;
|
||||
const Writer = std.Io.Writer;
|
||||
const Reader = std.Io.Reader;
|
||||
|
||||
input: *Reader,
|
||||
reader: Reader,
|
||||
@ -129,7 +129,7 @@ fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol {
|
||||
return sym;
|
||||
}
|
||||
|
||||
pub fn stream(r: *Reader, w: *Writer, limit: std.io.Limit) Reader.StreamError!usize {
|
||||
pub fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
return readInner(d, w, limit) catch |err| switch (err) {
|
||||
error.EndOfStream => return error.EndOfStream,
|
||||
@ -143,7 +143,8 @@ pub fn stream(r: *Reader, w: *Writer, limit: std.io.Limit) Reader.StreamError!us
|
||||
};
|
||||
}
|
||||
|
||||
fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.StreamError)!usize {
|
||||
fn readInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader.StreamError)!usize {
|
||||
var remaining = @intFromEnum(limit);
|
||||
const in = d.input;
|
||||
sw: switch (d.state) {
|
||||
.protocol_header => switch (d.hasher.container()) {
|
||||
@ -182,15 +183,9 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
|
||||
continue :sw .block_header;
|
||||
},
|
||||
.zlib => {
|
||||
const Header = extern struct {
|
||||
cmf: packed struct(u8) {
|
||||
cm: u4,
|
||||
cinfo: u4,
|
||||
},
|
||||
flg: u8,
|
||||
};
|
||||
const header = try in.takeStruct(Header);
|
||||
if (header.cmf.cm != 8 or header.cmf.cinfo > 7) return error.BadZlibHeader;
|
||||
const header = try in.takeArray(2);
|
||||
const cmf: packed struct(u8) { cm: u4, cinfo: u4 } = @bitCast(header[0]);
|
||||
if (cmf.cm != 8 or cmf.cinfo > 7) return error.BadZlibHeader;
|
||||
continue :sw .block_header;
|
||||
},
|
||||
.raw => continue :sw .block_header,
|
||||
@ -219,7 +214,7 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
|
||||
// lengths for code lengths
|
||||
var cl_lens = [_]u4{0} ** 19;
|
||||
for (0..hclen) |i| {
|
||||
cl_lens[flate.huffman.codegen_order[i]] = try d.takeBits(u3);
|
||||
cl_lens[flate.HuffmanEncoder.codegen_order[i]] = try d.takeBits(u3);
|
||||
}
|
||||
var cl_dec: CodegenDecoder = .{};
|
||||
try cl_dec.generate(&cl_lens);
|
||||
@ -259,52 +254,56 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
|
||||
return n;
|
||||
},
|
||||
.fixed_block => {
|
||||
const start = w.count;
|
||||
while (@intFromEnum(limit) > w.count - start) {
|
||||
while (remaining > 0) {
|
||||
const code = try d.readFixedCode();
|
||||
switch (code) {
|
||||
0...255 => try w.writeBytePreserve(flate.history_len, @intCast(code)),
|
||||
0...255 => {
|
||||
try w.writeBytePreserve(flate.history_len, @intCast(code));
|
||||
remaining -= 1;
|
||||
},
|
||||
256 => {
|
||||
d.state = if (d.final_block) .protocol_footer else .block_header;
|
||||
return w.count - start;
|
||||
return @intFromEnum(limit) - remaining;
|
||||
},
|
||||
257...285 => {
|
||||
// Handles fixed block non literal (length) code.
|
||||
// Length code is followed by 5 bits of distance code.
|
||||
const length = try d.decodeLength(@intCast(code - 257));
|
||||
const distance = try d.decodeDistance(try d.takeBitsReverseBuffered(u5));
|
||||
try writeMatch(w, length, distance);
|
||||
remaining = try writeMatch(w, length, distance, remaining);
|
||||
},
|
||||
else => return error.InvalidCode,
|
||||
}
|
||||
}
|
||||
d.state = .fixed_block;
|
||||
return w.count - start;
|
||||
return @intFromEnum(limit) - remaining;
|
||||
},
|
||||
.dynamic_block => {
|
||||
// In larger archives most blocks are usually dynamic, so decompression
|
||||
// performance depends on this logic.
|
||||
const start = w.count;
|
||||
while (@intFromEnum(limit) > w.count - start) {
|
||||
// In larger archives most blocks are usually dynamic, so
|
||||
// decompression performance depends on this logic.
|
||||
while (remaining > 0) {
|
||||
const sym = try d.decodeSymbol(&d.lit_dec);
|
||||
|
||||
switch (sym.kind) {
|
||||
.literal => try w.writeBytePreserve(flate.history_len, sym.symbol),
|
||||
.literal => {
|
||||
try w.writeBytePreserve(flate.history_len, sym.symbol);
|
||||
remaining -= 1;
|
||||
},
|
||||
.match => {
|
||||
// Decode match backreference <length, distance>
|
||||
const length = try d.decodeLength(sym.symbol);
|
||||
const dsm = try d.decodeSymbol(&d.dst_dec);
|
||||
const distance = try d.decodeDistance(dsm.symbol);
|
||||
try writeMatch(w, length, distance);
|
||||
remaining = try writeMatch(w, length, distance, remaining);
|
||||
},
|
||||
.end_of_block => {
|
||||
d.state = if (d.final_block) .protocol_footer else .block_header;
|
||||
return w.count - start;
|
||||
return @intFromEnum(limit) - remaining;
|
||||
},
|
||||
}
|
||||
}
|
||||
d.state = .dynamic_block;
|
||||
return w.count - start;
|
||||
return @intFromEnum(limit) - remaining;
|
||||
},
|
||||
.protocol_footer => {
|
||||
d.alignBitsToByte();
|
||||
@ -314,7 +313,7 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
|
||||
if (try in.takeInt(u32, .little) != gzip.count) return error.WrongGzipSize;
|
||||
},
|
||||
.zlib => |*zlib| {
|
||||
const chksum: u32 = @byteSwap(zlib.final());
|
||||
const chksum: u32 = @byteSwap(zlib.adler);
|
||||
if (try in.takeInt(u32, .big) != chksum) return error.WrongZlibChecksum;
|
||||
},
|
||||
.raw => {},
|
||||
@ -328,10 +327,11 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S
|
||||
|
||||
/// Write match (back-reference to the same data slice) starting at `distance`
|
||||
/// back from current write position, and `length` of bytes.
|
||||
fn writeMatch(bw: *Writer, length: u16, distance: u16) !void {
|
||||
_ = bw;
|
||||
fn writeMatch(w: *Writer, length: u16, distance: u16, remaining: usize) !usize {
|
||||
_ = w;
|
||||
_ = length;
|
||||
_ = distance;
|
||||
_ = remaining;
|
||||
@panic("TODO");
|
||||
}
|
||||
|
||||
@ -622,7 +622,13 @@ test "init/find" {
|
||||
test "encode/decode literals" {
|
||||
var codes: [flate.HuffmanEncoder.max_num_frequencies]flate.HuffmanEncoder.Code = undefined;
|
||||
for (1..286) |j| { // for all different number of codes
|
||||
var enc: flate.HuffmanEncoder = .{ .codes = &codes };
|
||||
var enc: flate.HuffmanEncoder = .{
|
||||
.codes = &codes,
|
||||
.freq_cache = undefined,
|
||||
.bit_count = undefined,
|
||||
.lns = undefined,
|
||||
.lfs = undefined,
|
||||
};
|
||||
// create frequencies
|
||||
var freq = [_]u16{0} ** 286;
|
||||
freq[256] = 1; // ensure we have end of block code
|
||||
@ -857,7 +863,7 @@ test "fuzzing tests" {
|
||||
const r = &decompress.reader;
|
||||
if (c.err) |expected_err| {
|
||||
try testing.expectError(error.ReadFailed, r.streamRemaining(&aw.writer));
|
||||
try testing.expectError(expected_err, decompress.read_err.?);
|
||||
try testing.expectEqual(expected_err, decompress.read_err orelse return error.TestFailed);
|
||||
} else {
|
||||
_ = try r.streamRemaining(&aw.writer);
|
||||
try testing.expectEqualStrings(c.out, aw.getWritten());
|
||||
@ -891,3 +897,148 @@ test "reading into empty buffer" {
|
||||
var buf: [0]u8 = undefined;
|
||||
try testing.expectEqual(0, try r.readVec(&.{&buf}));
|
||||
}
|
||||
|
||||
test "don't read past deflate stream's end" {
|
||||
try testDecompress(.zlib, &[_]u8{
|
||||
0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
|
||||
0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
|
||||
0x83, 0x95, 0x0b, 0xf5,
|
||||
}, &[_]u8{
|
||||
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
||||
0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
|
||||
0x00, 0x00, 0xff, 0xff, 0xff,
|
||||
});
|
||||
}
|
||||
|
||||
test "zlib header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{0x78}, ""),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
|
||||
);
|
||||
// Wrong CINFO
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
|
||||
);
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongZlibChecksum,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
|
||||
);
|
||||
}
|
||||
|
||||
test "gzip header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadGzipHeader,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongGzipChecksum,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Wrong initial size
|
||||
try testing.expectError(
|
||||
error.WrongGzipSize,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated initial size field
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
try testDecompress(.gzip, &[_]u8{
|
||||
// GZIP header
|
||||
0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
|
||||
// header.FHCRC (should cover entire header)
|
||||
0x99, 0xd6,
|
||||
// GZIP data
|
||||
0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}, "");
|
||||
}
|
||||
|
||||
fn testDecompress(container: Container, compressed: []const u8, expected_plain: []const u8) !void {
|
||||
var in: std.Io.Reader = .fixed(compressed);
|
||||
var aw: std.Io.Writer.Allocating = .init(testing.allocator);
|
||||
defer aw.deinit();
|
||||
|
||||
var decompress: Decompress = .init(&in, container, &.{});
|
||||
_ = try decompress.reader.streamRemaining(&aw.writer);
|
||||
try testing.expectEqualSlices(u8, expected_plain, aw.getWritten());
|
||||
}
|
||||
|
||||
test "zlib should not overshoot" {
|
||||
// Compressed zlib data with extra 4 bytes at the end.
|
||||
const data = [_]u8{
|
||||
0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9,
|
||||
0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08,
|
||||
0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34,
|
||||
0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12,
|
||||
};
|
||||
|
||||
var reader: std.Io.Reader = .fixed(&data);
|
||||
|
||||
var decompress: Decompress = .init(&reader, .zlib, &.{});
|
||||
var out: [128]u8 = undefined;
|
||||
|
||||
{
|
||||
const n = try decompress.reader.readSliceShort(out[0..]);
|
||||
|
||||
// Expected decompressed data
|
||||
try std.testing.expectEqual(46, n);
|
||||
try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]);
|
||||
|
||||
// Decompressor don't overshoot underlying reader.
|
||||
// It is leaving it at the end of compressed data chunk.
|
||||
try std.testing.expectEqual(data.len - 4, reader.seek);
|
||||
// TODO what was this testing, exactly?
|
||||
//try std.testing.expectEqual(0, decompress.unreadBytes());
|
||||
}
|
||||
|
||||
// 4 bytes after compressed chunk are available in reader.
|
||||
const n = try reader.readSliceShort(out[0..]);
|
||||
try std.testing.expectEqual(n, 4);
|
||||
try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]);
|
||||
}
|
||||
|
||||
@ -135,7 +135,7 @@ fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize)
|
||||
// of ancestors of the rightmost node at level i.
|
||||
// leaf_counts[i][j] is the number of literals at the left
|
||||
// of the level j ancestor.
|
||||
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(0);
|
||||
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0));
|
||||
|
||||
{
|
||||
var level = @as(u32, 1);
|
||||
@ -389,7 +389,8 @@ pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the fixed literal table specific to Deflate" {
|
||||
const enc = fixedLiteralEncoder();
|
||||
var codes: [max_num_frequencies]Code = undefined;
|
||||
const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes);
|
||||
for (enc.codes) |c| {
|
||||
switch (c.len) {
|
||||
7 => {
|
||||
|
||||
@ -6,14 +6,19 @@ const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const expect = testing.expect;
|
||||
const flate = @import("../flate.zig");
|
||||
const Token = @import("Token.zig");
|
||||
|
||||
const Lookup = @This();
|
||||
|
||||
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
|
||||
const chain_len = 2 * flate.history_len;
|
||||
|
||||
pub const bits = 15;
|
||||
pub const len = 1 << bits;
|
||||
pub const shift = 32 - bits;
|
||||
|
||||
// Maps hash => first position
|
||||
head: [flate.lookup.len]u16 = [_]u16{0} ** flate.lookup.len,
|
||||
head: [len]u16 = [_]u16{0} ** len,
|
||||
// Maps position => previous positions for the same hash value
|
||||
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
|
||||
|
||||
@ -52,8 +57,8 @@ pub fn slide(self: *Lookup, n: u16) void {
|
||||
|
||||
// Add `len` 4 bytes hashes from `data` into lookup.
|
||||
// Position of the first byte is `pos`.
|
||||
pub fn bulkAdd(self: *Lookup, data: []const u8, len: u16, pos: u16) void {
|
||||
if (len == 0 or data.len < flate.match.min_length) {
|
||||
pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void {
|
||||
if (length == 0 or data.len < Token.min_length) {
|
||||
return;
|
||||
}
|
||||
var hb =
|
||||
@ -64,7 +69,7 @@ pub fn bulkAdd(self: *Lookup, data: []const u8, len: u16, pos: u16) void {
|
||||
_ = self.set(hashu(hb), pos);
|
||||
|
||||
var i = pos;
|
||||
for (4..@min(len + 3, data.len)) |j| {
|
||||
for (4..@min(length + 3, data.len)) |j| {
|
||||
hb = (hb << 8) | @as(u32, data[j]);
|
||||
i += 1;
|
||||
_ = self.set(hashu(hb), i);
|
||||
@ -80,7 +85,7 @@ fn hash(b: *const [4]u8) u32 {
|
||||
}
|
||||
|
||||
fn hashu(v: u32) u32 {
|
||||
return @intCast((v *% prime4) >> flate.lookup.shift);
|
||||
return @intCast((v *% prime4) >> shift);
|
||||
}
|
||||
|
||||
test add {
|
||||
|
||||
@ -6,7 +6,6 @@ const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const print = std.debug.print;
|
||||
const expect = std.testing.expect;
|
||||
const match = std.compress.flate.match;
|
||||
|
||||
const Token = @This();
|
||||
|
||||
@ -21,16 +20,23 @@ dist: u15 = 0,
|
||||
len_lit: u8 = 0,
|
||||
kind: Kind = .literal,
|
||||
|
||||
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
|
||||
pub const min_length = 4; // min length used in this algorithm
|
||||
pub const max_length = 258;
|
||||
|
||||
pub const min_distance = 1;
|
||||
pub const max_distance = std.compress.flate.history_len;
|
||||
|
||||
pub fn literal(t: Token) u8 {
|
||||
return t.len_lit;
|
||||
}
|
||||
|
||||
pub fn distance(t: Token) u16 {
|
||||
return @as(u16, t.dist) + match.min_distance;
|
||||
return @as(u16, t.dist) + min_distance;
|
||||
}
|
||||
|
||||
pub fn length(t: Token) u16 {
|
||||
return @as(u16, t.len_lit) + match.base_length;
|
||||
return @as(u16, t.len_lit) + base_length;
|
||||
}
|
||||
|
||||
pub fn initLiteral(lit: u8) Token {
|
||||
@ -40,12 +46,12 @@ pub fn initLiteral(lit: u8) Token {
|
||||
// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
|
||||
// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
|
||||
pub fn initMatch(dist: u16, len: u16) Token {
|
||||
assert(len >= match.min_length and len <= match.max_length);
|
||||
assert(dist >= match.min_distance and dist <= match.max_distance);
|
||||
assert(len >= min_length and len <= max_length);
|
||||
assert(dist >= min_distance and dist <= max_distance);
|
||||
return .{
|
||||
.kind = .match,
|
||||
.dist = @intCast(dist - match.min_distance),
|
||||
.len_lit = @intCast(len - match.base_length),
|
||||
.dist = @intCast(dist - min_distance),
|
||||
.len_lit = @intCast(len - base_length),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user