diff --git a/lib/std/compress/flate.zig b/lib/std/compress/flate.zig index 73f98271a4..032ea8a779 100644 --- a/lib/std/compress/flate.zig +++ b/lib/std/compress/flate.zig @@ -1,7 +1,23 @@ -const builtin = @import("builtin"); const std = @import("../std.zig"); -const testing = std.testing; -const Writer = std.Io.Writer; + +/// When decompressing, the output buffer is used as the history window, so +/// less than this may result in failure to decompress streams that were +/// compressed with a larger window. +pub const max_window_len = history_len * 2; + +pub const history_len = 32768; + +/// Deflate is a lossless data compression file format that uses a combination +/// of LZ77 and Huffman coding. +pub const Compress = @import("flate/Compress.zig"); + +/// Inflate is the decoding process that takes a Deflate bitstream for +/// decompression and correctly produces the original full-size data or file. +pub const Decompress = @import("flate/Decompress.zig"); + +/// Compression without Lempel-Ziv match searching. Faster compression, less +/// memory requirements but bigger compressed sizes. +pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig"); /// Container of the deflate bit stream body. Container adds header before /// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header, @@ -13,7 +29,6 @@ const Writer = std.Io.Writer; /// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes /// crc32 checksum and 4 bytes of uncompressed data length. /// -/// /// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4 /// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5 pub const Container = enum { @@ -84,7 +99,7 @@ pub const Container = enum { pub fn init(containter: Container) Hasher { return switch (containter) { .gzip => .{ .gzip = .{} }, - .zlib => .{ .zlib = .init() }, + .zlib => .{ .zlib = .{} }, .raw => .raw, }; } @@ -107,7 +122,7 @@ pub const Container = enum { } } - pub fn writeFooter(hasher: *Hasher, writer: *Writer) Writer.Error!void { + pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void { var bits: [4]u8 = undefined; switch (hasher.*) { .gzip => |*gzip| { @@ -135,484 +150,6 @@ pub const Container = enum { }; }; -/// When decompressing, the output buffer is used as the history window, so -/// less than this may result in failure to decompress streams that were -/// compressed with a larger window. -pub const max_window_len = 1 << 16; - -/// Deflate is a lossless data compression file format that uses a combination -/// of LZ77 and Huffman coding. -pub const Compress = @import("flate/Compress.zig"); - -/// Inflate is the decoding process that takes a Deflate bitstream for -/// decompression and correctly produces the original full-size data or file. -pub const Decompress = @import("flate/Decompress.zig"); - -/// Compression without Lempel-Ziv match searching. Faster compression, less -/// memory requirements but bigger compressed sizes. -pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig"); - -test "compress/decompress" { - const print = std.debug.print; - var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer - var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer - - const levels = [_]Compress.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 }; - const cases = [_]struct { - data: []const u8, // uncompressed content - // compressed data sizes per level 4-9 - gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len, - huffman_only_size: usize = 0, - store_size: usize = 0, - }{ - .{ - .data = @embedFile("flate/testdata/rfc1951.txt"), - .gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 }, - .huffman_only_size = 20287, - .store_size = 36967, - }, - .{ - .data = @embedFile("flate/testdata/fuzz/roundtrip1.input"), - .gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 }, - .huffman_only_size = 393, - .store_size = 393, - }, - .{ - .data = @embedFile("flate/testdata/fuzz/roundtrip2.input"), - .gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 }, - .huffman_only_size = 394, - .store_size = 394, - }, - .{ - .data = @embedFile("flate/testdata/fuzz/deflate-stream.expect"), - .gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 }, - .huffman_only_size = 498, - .store_size = 747, - }, - }; - - for (cases, 0..) |case, case_no| { - const data = case.data; - - for (levels, 0..) |level, i| { - for (Container.list) |container| { - var compressed_size: usize = if (case.gzip_sizes[i] > 0) - case.gzip_sizes[i] - Container.gzip.size() + container.size() - else - 0; - - // compress original stream to compressed stream - { - var compressed: Writer = .fixed(&cmp_buf); - var compress: Compress = .init(&compressed, &.{}, .{ .container = .raw, .level = level }); - try compress.writer.writeAll(data); - try compress.end(); - - if (compressed_size == 0) { - if (container == .gzip) - print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos }); - compressed_size = compressed.end; - } - try testing.expectEqual(compressed_size, compressed.end); - } - // decompress compressed stream to decompressed stream - { - var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]); - var decompressed: Writer = .fixed(&dcm_buf); - var decompress: Decompress = .init(&compressed, container, &.{}); - _ = try decompress.reader.streamRemaining(&decompressed); - try testing.expectEqualSlices(u8, data, decompressed.buffered()); - } - - // compressor writer interface - { - var compressed: Writer = .fixed(&cmp_buf); - var cmp = try Compress.init(&compressed, &.{}, .{ - .level = level, - .container = container, - }); - var cmp_wrt = cmp.writer(); - try cmp_wrt.writeAll(data); - try cmp.finish(); - - try testing.expectEqual(compressed_size, compressed.pos); - } - // decompressor reader interface - { - var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]); - var decompress: Decompress = .init(&compressed, container, &.{}); - const n = try decompress.reader.readSliceShort(&dcm_buf); - try testing.expectEqual(data.len, n); - try testing.expectEqualSlices(u8, data, dcm_buf[0..n]); - } - } - } - // huffman only compression - { - for (Container.list) |container| { - var compressed_size: usize = if (case.huffman_only_size > 0) - case.huffman_only_size - Container.gzip.size() + container.size() - else - 0; - - // compress original stream to compressed stream - { - var original: std.Io.Reader = .fixed(data); - var compressed: Writer = .fixed(&cmp_buf); - var cmp = try Compress.Huffman.init(container, &compressed); - try cmp.compress(original.reader()); - try cmp.finish(); - if (compressed_size == 0) { - if (container == .gzip) - print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos }); - compressed_size = compressed.pos; - } - try testing.expectEqual(compressed_size, compressed.pos); - } - // decompress compressed stream to decompressed stream - { - var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]); - var decompress: Decompress = .init(&compressed, container, &.{}); - var decompressed: Writer = .fixed(&dcm_buf); - _ = try decompress.reader.streamRemaining(&decompressed); - try testing.expectEqualSlices(u8, data, decompressed.buffered()); - } - } - } - - // store only - { - for (Container.list) |container| { - var compressed_size: usize = if (case.store_size > 0) - case.store_size - Container.gzip.size() + container.size() - else - 0; - - // compress original stream to compressed stream - { - var original: std.Io.Reader = .fixed(data); - var compressed: Writer = .fixed(&cmp_buf); - var cmp = try Compress.SimpleCompressor(.store, container).init(&compressed); - try cmp.compress(original.reader()); - try cmp.finish(); - if (compressed_size == 0) { - if (container == .gzip) - print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos }); - compressed_size = compressed.pos; - } - - try testing.expectEqual(compressed_size, compressed.pos); - } - // decompress compressed stream to decompressed stream - { - var compressed: std.Io.Reader = .fixed(cmp_buf[0..compressed_size]); - var decompress: Decompress = .init(&compressed, container, &.{}); - var decompressed: Writer = .fixed(&dcm_buf); - _ = try decompress.reader.streamRemaining(&decompressed); - try testing.expectEqualSlices(u8, data, decompressed.buffered()); - } - } - } - } -} - -fn testDecompress(container: Container, compressed: []const u8, expected_plain: []const u8) !void { - var in: std.Io.Reader = .fixed(compressed); - var aw: std.Io.Writer.Allocating = .init(testing.allocator); - defer aw.deinit(); - - var decompress: Decompress = .init(&in, container, &.{}); - _ = try decompress.reader.streamRemaining(&aw.writer); - try testing.expectEqualSlices(u8, expected_plain, aw.getWritten()); -} - -test "don't read past deflate stream's end" { - try testDecompress(.zlib, &[_]u8{ - 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff, - 0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01, - 0x83, 0x95, 0x0b, 0xf5, - }, &[_]u8{ - 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, - 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, - 0x00, 0x00, 0xff, 0xff, 0xff, - }); -} - -test "zlib header" { - // Truncated header - try testing.expectError( - error.EndOfStream, - testDecompress(.zlib, &[_]u8{0x78}, ""), - ); - // Wrong CM - try testing.expectError( - error.BadZlibHeader, - testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""), - ); - // Wrong CINFO - try testing.expectError( - error.BadZlibHeader, - testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""), - ); - // Wrong checksum - try testing.expectError( - error.WrongZlibChecksum, - testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""), - ); - // Truncated checksum - try testing.expectError( - error.EndOfStream, - testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""), - ); -} - -test "gzip header" { - // Truncated header - try testing.expectError( - error.EndOfStream, - testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined), - ); - // Wrong CM - try testing.expectError( - error.BadGzipHeader, - testDecompress(.gzip, &[_]u8{ - 0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x03, - }, undefined), - ); - - // Wrong checksum - try testing.expectError( - error.WrongGzipChecksum, - testDecompress(.gzip, &[_]u8{ - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, - }, undefined), - ); - // Truncated checksum - try testing.expectError( - error.EndOfStream, - testDecompress(.gzip, &[_]u8{ - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, - }, undefined), - ); - // Wrong initial size - try testing.expectError( - error.WrongGzipSize, - testDecompress(.gzip, &[_]u8{ - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, - }, undefined), - ); - // Truncated initial size field - try testing.expectError( - error.EndOfStream, - testDecompress(.gzip, &[_]u8{ - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, - }, undefined), - ); - - try testDecompress(.gzip, &[_]u8{ - // GZIP header - 0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, - // header.FHCRC (should cover entire header) - 0x99, 0xd6, - // GZIP data - 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, ""); -} - -test "public interface" { - const plain_data_buf = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a }; - - // deflate final stored block, header + plain (stored) data - const deflate_block = [_]u8{ - 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen - } ++ plain_data_buf; - - const plain_data: []const u8 = &plain_data_buf; - const gzip_data: []const u8 = &deflate_block; - - //// gzip header/footer + deflate block - //const gzip_data = - // [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes) - // deflate_block ++ - // [_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes) - - //// zlib header/footer + deflate block - //const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)} - // deflate_block ++ - // [_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum - - // TODO - //const gzip = @import("gzip.zig"); - //const zlib = @import("zlib.zig"); - - var buffer1: [64]u8 = undefined; - var buffer2: [64]u8 = undefined; - - // decompress - { - var plain: Writer = .fixed(&buffer2); - var in: std.Io.Reader = .fixed(gzip_data); - var d: Decompress = .init(&in, .raw, &.{}); - _ = try d.reader.streamRemaining(&plain); - try testing.expectEqualSlices(u8, plain_data, plain.buffered()); - } - - // compress/decompress - { - var plain: Writer = .fixed(&buffer2); - var compressed: Writer = .fixed(&buffer1); - - var cmp: Compress = .init(&compressed, &.{}, .{}); - try cmp.writer.writeAll(plain_data); - try cmp.end(); - - var r: std.Io.Reader = .fixed(&buffer1); - var d: Decompress = .init(&r, .raw, &.{}); - _ = try d.reader.streamRemaining(&plain); - try testing.expectEqualSlices(u8, plain_data, plain.buffered()); - } - - // compressor/decompressor - { - var plain: Writer = .fixed(&buffer2); - var compressed: Writer = .fixed(&buffer1); - - var cmp: Compress = .init(&compressed, &.{}, .{}); - try cmp.writer.writeAll(plain_data); - try cmp.end(); - - var r: std.Io.Reader = .fixed(&buffer1); - var dcp = Decompress(&r); - try dcp.decompress(&plain); - try testing.expectEqualSlices(u8, plain_data, plain.buffered()); - } - - // huffman - { - // huffman compress/decompress - { - var plain: Writer = .fixed(&buffer2); - var compressed: Writer = .fixed(&buffer1); - - var in: std.Io.Reader = .fixed(plain_data); - try HuffmanEncoder.compress(&in, &compressed); - - var r: std.Io.Reader = .fixed(&buffer1); - var d: Decompress = .init(&r, .raw, &.{}); - _ = try d.reader.streamRemaining(&plain); - try testing.expectEqualSlices(u8, plain_data, plain.buffered()); - } - - // huffman compressor/decompressor - { - var plain: Writer = .fixed(&buffer2); - var compressed: Writer = .fixed(&buffer1); - - var in: std.Io.Reader = .fixed(plain_data); - var cmp = try HuffmanEncoder.Compressor(&compressed); - try cmp.compress(&in); - try cmp.finish(); - - var r: std.Io.Reader = .fixed(&buffer1); - var d: Decompress = .init(&r, .raw, &.{}); - _ = try d.reader.streamRemaining(&plain); - try testing.expectEqualSlices(u8, plain_data, plain.buffered()); - } - } - - // TODO - //{ - // // store compress/decompress - // { - // var plain: Writer = .fixed(&buffer2); - // var compressed: Writer = .fixed(&buffer1); - - // var in: std.Io.Reader = .fixed(plain_data); - // try store.compress(&in, &compressed); - - // var r: std.Io.Reader = .fixed(&buffer1); - // var d: Decompress = .init(&r, .raw, &.{}); - // _ = try d.reader.streamRemaining(&plain); - // try testing.expectEqualSlices(u8, plain_data, plain.buffered()); - // } - - // // store compressor/decompressor - // { - // var plain: Writer = .fixed(&buffer2); - // var compressed: Writer = .fixed(&buffer1); - - // var in: std.Io.Reader = .fixed(plain_data); - // var cmp = try store.compressor(&compressed); - // try cmp.compress(&in); - // try cmp.finish(); - - // var r: std.Io.Reader = .fixed(&buffer1); - // var d: Decompress = .init(&r, .raw, &.{}); - // _ = try d.reader.streamRemaining(&plain); - // try testing.expectEqualSlices(u8, plain_data, plain.buffered()); - // } - //} -} - -pub const match = struct { - pub const base_length = 3; // smallest match length per the RFC section 3.2.5 - pub const min_length = 4; // min length used in this algorithm - pub const max_length = 258; - - pub const min_distance = 1; - pub const max_distance = 32768; -}; - -pub const history_len = match.max_distance; - -pub const lookup = struct { - pub const bits = 15; - pub const len = 1 << bits; - pub const shift = 32 - bits; -}; - -test "zlib should not overshoot" { - // Compressed zlib data with extra 4 bytes at the end. - const data = [_]u8{ - 0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9, - 0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08, - 0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34, - 0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12, - }; - - var reader: std.Io.Reader = .fixed(&data); - - var decompress: Decompress = .init(&reader, .zlib, &.{}); - var out: [128]u8 = undefined; - - { - const n = try decompress.reader.readSliceShort(out[0..]); - - // Expected decompressed data - try std.testing.expectEqual(46, n); - try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]); - - // Decompressor don't overshoot underlying reader. - // It is leaving it at the end of compressed data chunk. - try std.testing.expectEqual(data.len - 4, reader.seek); - // TODO what was this testing, exactly? - //try std.testing.expectEqual(0, decompress.unreadBytes()); - } - - // 4 bytes after compressed chunk are available in reader. - const n = try reader.readSliceShort(out[0..]); - try std.testing.expectEqual(n, 4); - try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]); -} - test { _ = HuffmanEncoder; _ = Compress; diff --git a/lib/std/compress/flate/BlockWriter.zig b/lib/std/compress/flate/BlockWriter.zig index b3af65051a..d0e9dc1203 100644 --- a/lib/std/compress/flate/BlockWriter.zig +++ b/lib/std/compress/flate/BlockWriter.zig @@ -31,7 +31,26 @@ fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code, fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code, distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code, -pub fn init(bw: *BlockWriter) void { +pub fn init(output: *Writer) BlockWriter { + return .{ + .output = output, + .codegen_freq = undefined, + .literal_freq = undefined, + .distance_freq = undefined, + .codegen = undefined, + .literal_encoding = undefined, + .distance_encoding = undefined, + .codegen_encoding = undefined, + .fixed_literal_encoding = undefined, + .fixed_distance_encoding = undefined, + .huff_distance = undefined, + .fixed_literal_codes = undefined, + .fixed_distance_codes = undefined, + .distance_codes = undefined, + }; +} + +pub fn initBuffers(bw: *BlockWriter) void { bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes); bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes); bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes); diff --git a/lib/std/compress/flate/Compress.zig b/lib/std/compress/flate/Compress.zig index f38f7b2703..2249ece4c0 100644 --- a/lib/std/compress/flate/Compress.zig +++ b/lib/std/compress/flate/Compress.zig @@ -122,22 +122,7 @@ pub const Options = struct { pub fn init(output: *Writer, buffer: []u8, options: Options) Compress { return .{ - .block_writer = .{ - .output = output, - .codegen_freq = undefined, - .literal_freq = undefined, - .distance_freq = undefined, - .codegen = undefined, - .literal_encoding = undefined, - .distance_encoding = undefined, - .codegen_encoding = undefined, - .fixed_literal_encoding = undefined, - .fixed_distance_encoding = undefined, - .huff_distance = undefined, - .fixed_literal_codes = undefined, - .fixed_distance_codes = undefined, - .distance_codes = undefined, - }, + .block_writer = .init(output), .level = .get(options.level), .hasher = .init(options.container), .state = .header, @@ -188,20 +173,21 @@ fn drain(me: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize } const buffered = me.buffered(); - const min_lookahead = flate.match.min_length + flate.match.max_length; + const min_lookahead = Token.min_length + Token.max_length; const history_plus_lookahead_len = flate.history_len + min_lookahead; if (buffered.len < history_plus_lookahead_len) return 0; const lookahead = buffered[flate.history_len..]; - _ = lookahead; // TODO tokenize + _ = lookahead; //c.hasher.update(lookahead[0..n]); @panic("TODO"); } pub fn end(c: *Compress) !void { try endUnflushed(c); - try c.output.flush(); + const out = c.block_writer.output; + try out.flush(); } pub fn endUnflushed(c: *Compress) !void { @@ -227,7 +213,7 @@ pub fn endUnflushed(c: *Compress) !void { // Checksum value of the uncompressed data (excluding any // dictionary data) computed according to Adler-32 // algorithm. - std.mem.writeInt(u32, try out.writableArray(4), zlib.final, .big); + std.mem.writeInt(u32, try out.writableArray(4), zlib.adler, .big); }, .raw => {}, } @@ -243,15 +229,16 @@ pub const Simple = struct { pub const Strategy = enum { huffman, store }; - pub fn init(out: *Writer, buffer: []u8, container: Container) !Simple { - const self: Simple = .{ + pub fn init(output: *Writer, buffer: []u8, container: Container, strategy: Strategy) !Simple { + const header = container.header(); + try output.writeAll(header); + return .{ .buffer = buffer, .wp = 0, - .block_writer = .init(out), + .block_writer = .init(output), .hasher = .init(container), + .strategy = strategy, }; - try container.writeHeader(self.out); - return self; } pub fn flush(self: *Simple) !void { @@ -263,7 +250,7 @@ pub const Simple = struct { pub fn finish(self: *Simple) !void { try self.flushBuffer(true); try self.block_writer.flush(); - try self.hasher.container().writeFooter(&self.hasher, self.out); + try self.hasher.container().writeFooter(&self.hasher, self.block_writer.output); } fn flushBuffer(self: *Simple, final: bool) !void { @@ -300,7 +287,13 @@ test "generate a Huffman code from an array of frequencies" { }; var codes: [19]HuffmanEncoder.Code = undefined; - var enc: HuffmanEncoder = .{ .codes = &codes }; + var enc: HuffmanEncoder = .{ + .codes = &codes, + .freq_cache = undefined, + .bit_count = undefined, + .lns = undefined, + .lfs = undefined, + }; enc.generate(freqs[0..], 7); try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..])); @@ -337,247 +330,3 @@ test "generate a Huffman code from an array of frequencies" { try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code); try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code); } - -test "tokenization" { - const L = Token.initLiteral; - const M = Token.initMatch; - - const cases = [_]struct { - data: []const u8, - tokens: []const Token, - }{ - .{ - .data = "Blah blah blah blah blah!", - .tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') }, - }, - .{ - .data = "ABCDEABCD ABCDEABCD", - .tokens = &[_]Token{ - L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '), - L('A'), M(10, 8), - }, - }, - }; - - for (cases) |c| { - inline for (Container.list) |container| { // for each wrapping - - var cw = std.Io.countingWriter(std.Io.null_writer); - const cww = cw.writer(); - var df = try Compress(container, @TypeOf(cww), TestTokenWriter).init(cww, .{}); - - _ = try df.write(c.data); - try df.flush(); - - // df.token_writer.show(); - try expect(df.block_writer.pos == c.tokens.len); // number of tokens written - try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match - - try testing.expectEqual(container.headerSize(), cw.bytes_written); - try df.finish(); - try testing.expectEqual(container.size(), cw.bytes_written); - } - } -} - -// Tests that tokens written are equal to expected token list. -const TestTokenWriter = struct { - const Self = @This(); - - pos: usize = 0, - actual: [128]Token = undefined, - - pub fn init(_: anytype) Self { - return .{}; - } - pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void { - for (tokens) |t| { - self.actual[self.pos] = t; - self.pos += 1; - } - } - - pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {} - - pub fn get(self: *Self) []Token { - return self.actual[0..self.pos]; - } - - pub fn show(self: *Self) void { - std.debug.print("\n", .{}); - for (self.get()) |t| { - t.show(); - } - } - - pub fn flush(_: *Self) !void {} -}; - -test "file tokenization" { - const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 }; - const cases = [_]struct { - data: []const u8, // uncompressed content - // expected number of tokens producet in deflate tokenization - tokens_count: [levels.len]usize = .{0} ** levels.len, - }{ - .{ - .data = @embedFile("testdata/rfc1951.txt"), - .tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 }, - }, - - .{ - .data = @embedFile("testdata/block_writer/huffman-null-max.input"), - .tokens_count = .{ 257, 257, 257, 257, 257, 257 }, - }, - .{ - .data = @embedFile("testdata/block_writer/huffman-pi.input"), - .tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 }, - }, - .{ - .data = @embedFile("testdata/block_writer/huffman-text.input"), - .tokens_count = .{ 235, 234, 234, 234, 234, 234 }, - }, - .{ - .data = @embedFile("testdata/fuzz/roundtrip1.input"), - .tokens_count = .{ 333, 331, 331, 331, 331, 331 }, - }, - .{ - .data = @embedFile("testdata/fuzz/roundtrip2.input"), - .tokens_count = .{ 334, 334, 334, 334, 334, 334 }, - }, - }; - - for (cases) |case| { // for each case - const data = case.data; - - for (levels, 0..) |level, i| { // for each compression level - var original: std.Io.Reader = .fixed(data); - - // buffer for decompressed data - var al = std.ArrayList(u8).init(testing.allocator); - defer al.deinit(); - const writer = al.writer(); - - // create compressor - const WriterType = @TypeOf(writer); - const TokenWriter = TokenDecoder(@TypeOf(writer)); - var cmp = try Compress(.raw, WriterType, TokenWriter).init(writer, .{ .level = level }); - - // Stream uncompressed `original` data to the compressor. It will - // produce tokens list and pass that list to the TokenDecoder. This - // TokenDecoder uses CircularBuffer from inflate to convert list of - // tokens back to the uncompressed stream. - try cmp.compress(original.reader()); - try cmp.flush(); - const expected_count = case.tokens_count[i]; - const actual = cmp.block_writer.tokens_count; - if (expected_count == 0) { - std.debug.print("actual token count {d}\n", .{actual}); - } else { - try testing.expectEqual(expected_count, actual); - } - - try testing.expectEqual(data.len, al.items.len); - try testing.expectEqualSlices(u8, data, al.items); - } - } -} - -const TokenDecoder = struct { - output: *Writer, - tokens_count: usize, - - pub fn init(output: *Writer) TokenDecoder { - return .{ - .output = output, - .tokens_count = 0, - }; - } - - pub fn write(self: *TokenDecoder, tokens: []const Token, _: bool, _: ?[]const u8) !void { - self.tokens_count += tokens.len; - for (tokens) |t| { - switch (t.kind) { - .literal => self.hist.write(t.literal()), - .match => try self.hist.writeMatch(t.length(), t.distance()), - } - if (self.hist.free() < 285) try self.flushWin(); - } - try self.flushWin(); - } - - fn flushWin(self: *TokenDecoder) !void { - while (true) { - const buf = self.hist.read(); - if (buf.len == 0) break; - try self.output.writeAll(buf); - } - } -}; - -test "store simple compressor" { - if (true) return error.SkipZigTest; - //const data = "Hello world!"; - //const expected = [_]u8{ - // 0x1, // block type 0, final bit set - // 0xc, 0x0, // len = 12 - // 0xf3, 0xff, // ~len - // 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', // - // //0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21, - //}; - - //var fbs: std.Io.Reader = .fixed(data); - //var al = std.ArrayList(u8).init(testing.allocator); - //defer al.deinit(); - - //var cmp = try store.compressor(.raw, al.writer()); - //try cmp.compress(&fbs); - //try cmp.finish(); - //try testing.expectEqualSlices(u8, &expected, al.items); - - //fbs = .fixed(data); - //try al.resize(0); - - //// huffman only compresoor will also emit store block for this small sample - //var hc = try huffman.compressor(.raw, al.writer()); - //try hc.compress(&fbs); - //try hc.finish(); - //try testing.expectEqualSlices(u8, &expected, al.items); -} - -test "sliding window match" { - const data = "Blah blah blah blah blah!"; - var win: Writer = .{}; - try expect(win.write(data) == data.len); - try expect(win.wp == data.len); - try expect(win.rp == 0); - - // length between l symbols - try expect(win.match(1, 6, 0) == 18); - try expect(win.match(1, 11, 0) == 13); - try expect(win.match(1, 16, 0) == 8); - try expect(win.match(1, 21, 0) == 0); - - // position 15 = "blah blah!" - // position 20 = "blah!" - try expect(win.match(15, 20, 0) == 4); - try expect(win.match(15, 20, 3) == 4); - try expect(win.match(15, 20, 4) == 0); -} - -test "sliding window slide" { - var win: Writer = .{}; - win.wp = Writer.buffer_len - 11; - win.rp = Writer.buffer_len - 111; - win.buffer[win.rp] = 0xab; - try expect(win.lookahead().len == 100); - try expect(win.tokensBuffer().?.len == win.rp); - - const n = win.slide(); - try expect(n == 32757); - try expect(win.buffer[win.rp] == 0xab); - try expect(win.rp == Writer.hist_len - 111); - try expect(win.wp == Writer.hist_len - 11); - try expect(win.lookahead().len == 100); - try expect(win.tokensBuffer() == null); -} diff --git a/lib/std/compress/flate/Decompress.zig b/lib/std/compress/flate/Decompress.zig index ed9c0f3798..5f603baf9a 100644 --- a/lib/std/compress/flate/Decompress.zig +++ b/lib/std/compress/flate/Decompress.zig @@ -4,8 +4,8 @@ const Container = flate.Container; const Token = @import("Token.zig"); const testing = std.testing; const Decompress = @This(); -const Writer = std.io.Writer; -const Reader = std.io.Reader; +const Writer = std.Io.Writer; +const Reader = std.Io.Reader; input: *Reader, reader: Reader, @@ -129,7 +129,7 @@ fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol { return sym; } -pub fn stream(r: *Reader, w: *Writer, limit: std.io.Limit) Reader.StreamError!usize { +pub fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize { const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); return readInner(d, w, limit) catch |err| switch (err) { error.EndOfStream => return error.EndOfStream, @@ -143,7 +143,8 @@ pub fn stream(r: *Reader, w: *Writer, limit: std.io.Limit) Reader.StreamError!us }; } -fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.StreamError)!usize { +fn readInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader.StreamError)!usize { + var remaining = @intFromEnum(limit); const in = d.input; sw: switch (d.state) { .protocol_header => switch (d.hasher.container()) { @@ -182,15 +183,9 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S continue :sw .block_header; }, .zlib => { - const Header = extern struct { - cmf: packed struct(u8) { - cm: u4, - cinfo: u4, - }, - flg: u8, - }; - const header = try in.takeStruct(Header); - if (header.cmf.cm != 8 or header.cmf.cinfo > 7) return error.BadZlibHeader; + const header = try in.takeArray(2); + const cmf: packed struct(u8) { cm: u4, cinfo: u4 } = @bitCast(header[0]); + if (cmf.cm != 8 or cmf.cinfo > 7) return error.BadZlibHeader; continue :sw .block_header; }, .raw => continue :sw .block_header, @@ -219,7 +214,7 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S // lengths for code lengths var cl_lens = [_]u4{0} ** 19; for (0..hclen) |i| { - cl_lens[flate.huffman.codegen_order[i]] = try d.takeBits(u3); + cl_lens[flate.HuffmanEncoder.codegen_order[i]] = try d.takeBits(u3); } var cl_dec: CodegenDecoder = .{}; try cl_dec.generate(&cl_lens); @@ -259,52 +254,56 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S return n; }, .fixed_block => { - const start = w.count; - while (@intFromEnum(limit) > w.count - start) { + while (remaining > 0) { const code = try d.readFixedCode(); switch (code) { - 0...255 => try w.writeBytePreserve(flate.history_len, @intCast(code)), + 0...255 => { + try w.writeBytePreserve(flate.history_len, @intCast(code)); + remaining -= 1; + }, 256 => { d.state = if (d.final_block) .protocol_footer else .block_header; - return w.count - start; + return @intFromEnum(limit) - remaining; }, 257...285 => { // Handles fixed block non literal (length) code. // Length code is followed by 5 bits of distance code. const length = try d.decodeLength(@intCast(code - 257)); const distance = try d.decodeDistance(try d.takeBitsReverseBuffered(u5)); - try writeMatch(w, length, distance); + remaining = try writeMatch(w, length, distance, remaining); }, else => return error.InvalidCode, } } d.state = .fixed_block; - return w.count - start; + return @intFromEnum(limit) - remaining; }, .dynamic_block => { - // In larger archives most blocks are usually dynamic, so decompression - // performance depends on this logic. - const start = w.count; - while (@intFromEnum(limit) > w.count - start) { + // In larger archives most blocks are usually dynamic, so + // decompression performance depends on this logic. + while (remaining > 0) { const sym = try d.decodeSymbol(&d.lit_dec); switch (sym.kind) { - .literal => try w.writeBytePreserve(flate.history_len, sym.symbol), + .literal => { + try w.writeBytePreserve(flate.history_len, sym.symbol); + remaining -= 1; + }, .match => { // Decode match backreference const length = try d.decodeLength(sym.symbol); const dsm = try d.decodeSymbol(&d.dst_dec); const distance = try d.decodeDistance(dsm.symbol); - try writeMatch(w, length, distance); + remaining = try writeMatch(w, length, distance, remaining); }, .end_of_block => { d.state = if (d.final_block) .protocol_footer else .block_header; - return w.count - start; + return @intFromEnum(limit) - remaining; }, } } d.state = .dynamic_block; - return w.count - start; + return @intFromEnum(limit) - remaining; }, .protocol_footer => { d.alignBitsToByte(); @@ -314,7 +313,7 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S if (try in.takeInt(u32, .little) != gzip.count) return error.WrongGzipSize; }, .zlib => |*zlib| { - const chksum: u32 = @byteSwap(zlib.final()); + const chksum: u32 = @byteSwap(zlib.adler); if (try in.takeInt(u32, .big) != chksum) return error.WrongZlibChecksum; }, .raw => {}, @@ -328,10 +327,11 @@ fn readInner(d: *Decompress, w: *Writer, limit: std.io.Limit) (Error || Reader.S /// Write match (back-reference to the same data slice) starting at `distance` /// back from current write position, and `length` of bytes. -fn writeMatch(bw: *Writer, length: u16, distance: u16) !void { - _ = bw; +fn writeMatch(w: *Writer, length: u16, distance: u16, remaining: usize) !usize { + _ = w; _ = length; _ = distance; + _ = remaining; @panic("TODO"); } @@ -622,7 +622,13 @@ test "init/find" { test "encode/decode literals" { var codes: [flate.HuffmanEncoder.max_num_frequencies]flate.HuffmanEncoder.Code = undefined; for (1..286) |j| { // for all different number of codes - var enc: flate.HuffmanEncoder = .{ .codes = &codes }; + var enc: flate.HuffmanEncoder = .{ + .codes = &codes, + .freq_cache = undefined, + .bit_count = undefined, + .lns = undefined, + .lfs = undefined, + }; // create frequencies var freq = [_]u16{0} ** 286; freq[256] = 1; // ensure we have end of block code @@ -857,7 +863,7 @@ test "fuzzing tests" { const r = &decompress.reader; if (c.err) |expected_err| { try testing.expectError(error.ReadFailed, r.streamRemaining(&aw.writer)); - try testing.expectError(expected_err, decompress.read_err.?); + try testing.expectEqual(expected_err, decompress.read_err orelse return error.TestFailed); } else { _ = try r.streamRemaining(&aw.writer); try testing.expectEqualStrings(c.out, aw.getWritten()); @@ -891,3 +897,148 @@ test "reading into empty buffer" { var buf: [0]u8 = undefined; try testing.expectEqual(0, try r.readVec(&.{&buf})); } + +test "don't read past deflate stream's end" { + try testDecompress(.zlib, &[_]u8{ + 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff, + 0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01, + 0x83, 0x95, 0x0b, 0xf5, + }, &[_]u8{ + 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, + 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, + }); +} + +test "zlib header" { + // Truncated header + try testing.expectError( + error.EndOfStream, + testDecompress(.zlib, &[_]u8{0x78}, ""), + ); + // Wrong CM + try testing.expectError( + error.BadZlibHeader, + testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""), + ); + // Wrong CINFO + try testing.expectError( + error.BadZlibHeader, + testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""), + ); + // Wrong checksum + try testing.expectError( + error.WrongZlibChecksum, + testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""), + ); + // Truncated checksum + try testing.expectError( + error.EndOfStream, + testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""), + ); +} + +test "gzip header" { + // Truncated header + try testing.expectError( + error.EndOfStream, + testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined), + ); + // Wrong CM + try testing.expectError( + error.BadGzipHeader, + testDecompress(.gzip, &[_]u8{ + 0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, + }, undefined), + ); + + // Wrong checksum + try testing.expectError( + error.WrongGzipChecksum, + testDecompress(.gzip, &[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, + }, undefined), + ); + // Truncated checksum + try testing.expectError( + error.EndOfStream, + testDecompress(.gzip, &[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, + }, undefined), + ); + // Wrong initial size + try testing.expectError( + error.WrongGzipSize, + testDecompress(.gzip, &[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + }, undefined), + ); + // Truncated initial size field + try testing.expectError( + error.EndOfStream, + testDecompress(.gzip, &[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, + }, undefined), + ); + + try testDecompress(.gzip, &[_]u8{ + // GZIP header + 0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, + // header.FHCRC (should cover entire header) + 0x99, 0xd6, + // GZIP data + 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, ""); +} + +fn testDecompress(container: Container, compressed: []const u8, expected_plain: []const u8) !void { + var in: std.Io.Reader = .fixed(compressed); + var aw: std.Io.Writer.Allocating = .init(testing.allocator); + defer aw.deinit(); + + var decompress: Decompress = .init(&in, container, &.{}); + _ = try decompress.reader.streamRemaining(&aw.writer); + try testing.expectEqualSlices(u8, expected_plain, aw.getWritten()); +} + +test "zlib should not overshoot" { + // Compressed zlib data with extra 4 bytes at the end. + const data = [_]u8{ + 0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9, + 0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08, + 0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34, + 0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12, + }; + + var reader: std.Io.Reader = .fixed(&data); + + var decompress: Decompress = .init(&reader, .zlib, &.{}); + var out: [128]u8 = undefined; + + { + const n = try decompress.reader.readSliceShort(out[0..]); + + // Expected decompressed data + try std.testing.expectEqual(46, n); + try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]); + + // Decompressor don't overshoot underlying reader. + // It is leaving it at the end of compressed data chunk. + try std.testing.expectEqual(data.len - 4, reader.seek); + // TODO what was this testing, exactly? + //try std.testing.expectEqual(0, decompress.unreadBytes()); + } + + // 4 bytes after compressed chunk are available in reader. + const n = try reader.readSliceShort(out[0..]); + try std.testing.expectEqual(n, 4); + try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]); +} diff --git a/lib/std/compress/flate/HuffmanEncoder.zig b/lib/std/compress/flate/HuffmanEncoder.zig index bdcaf75801..2057038057 100644 --- a/lib/std/compress/flate/HuffmanEncoder.zig +++ b/lib/std/compress/flate/HuffmanEncoder.zig @@ -135,7 +135,7 @@ fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) // of ancestors of the rightmost node at level i. // leaf_counts[i][j] is the number of literals at the left // of the level j ancestor. - var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(0); + var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0)); { var level = @as(u32, 1); @@ -389,7 +389,8 @@ pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder } test "generate a Huffman code for the fixed literal table specific to Deflate" { - const enc = fixedLiteralEncoder(); + var codes: [max_num_frequencies]Code = undefined; + const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes); for (enc.codes) |c| { switch (c.len) { 7 => { diff --git a/lib/std/compress/flate/Lookup.zig b/lib/std/compress/flate/Lookup.zig index 722e175c8a..d1d93de50a 100644 --- a/lib/std/compress/flate/Lookup.zig +++ b/lib/std/compress/flate/Lookup.zig @@ -6,14 +6,19 @@ const std = @import("std"); const testing = std.testing; const expect = testing.expect; const flate = @import("../flate.zig"); +const Token = @import("Token.zig"); const Lookup = @This(); const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761 const chain_len = 2 * flate.history_len; +pub const bits = 15; +pub const len = 1 << bits; +pub const shift = 32 - bits; + // Maps hash => first position -head: [flate.lookup.len]u16 = [_]u16{0} ** flate.lookup.len, +head: [len]u16 = [_]u16{0} ** len, // Maps position => previous positions for the same hash value chain: [chain_len]u16 = [_]u16{0} ** (chain_len), @@ -52,8 +57,8 @@ pub fn slide(self: *Lookup, n: u16) void { // Add `len` 4 bytes hashes from `data` into lookup. // Position of the first byte is `pos`. -pub fn bulkAdd(self: *Lookup, data: []const u8, len: u16, pos: u16) void { - if (len == 0 or data.len < flate.match.min_length) { +pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void { + if (length == 0 or data.len < Token.min_length) { return; } var hb = @@ -64,7 +69,7 @@ pub fn bulkAdd(self: *Lookup, data: []const u8, len: u16, pos: u16) void { _ = self.set(hashu(hb), pos); var i = pos; - for (4..@min(len + 3, data.len)) |j| { + for (4..@min(length + 3, data.len)) |j| { hb = (hb << 8) | @as(u32, data[j]); i += 1; _ = self.set(hashu(hb), i); @@ -80,7 +85,7 @@ fn hash(b: *const [4]u8) u32 { } fn hashu(v: u32) u32 { - return @intCast((v *% prime4) >> flate.lookup.shift); + return @intCast((v *% prime4) >> shift); } test add { diff --git a/lib/std/compress/flate/Token.zig b/lib/std/compress/flate/Token.zig index 293a786cef..1383047693 100644 --- a/lib/std/compress/flate/Token.zig +++ b/lib/std/compress/flate/Token.zig @@ -6,7 +6,6 @@ const std = @import("std"); const assert = std.debug.assert; const print = std.debug.print; const expect = std.testing.expect; -const match = std.compress.flate.match; const Token = @This(); @@ -21,16 +20,23 @@ dist: u15 = 0, len_lit: u8 = 0, kind: Kind = .literal, +pub const base_length = 3; // smallest match length per the RFC section 3.2.5 +pub const min_length = 4; // min length used in this algorithm +pub const max_length = 258; + +pub const min_distance = 1; +pub const max_distance = std.compress.flate.history_len; + pub fn literal(t: Token) u8 { return t.len_lit; } pub fn distance(t: Token) u16 { - return @as(u16, t.dist) + match.min_distance; + return @as(u16, t.dist) + min_distance; } pub fn length(t: Token) u16 { - return @as(u16, t.len_lit) + match.base_length; + return @as(u16, t.len_lit) + base_length; } pub fn initLiteral(lit: u8) Token { @@ -40,12 +46,12 @@ pub fn initLiteral(lit: u8) Token { // distance range 1 - 32768, stored in dist as 0 - 32767 (u15) // length range 3 - 258, stored in len_lit as 0 - 255 (u8) pub fn initMatch(dist: u16, len: u16) Token { - assert(len >= match.min_length and len <= match.max_length); - assert(dist >= match.min_distance and dist <= match.max_distance); + assert(len >= min_length and len <= max_length); + assert(dist >= min_distance and dist <= max_distance); return .{ .kind = .match, - .dist = @intCast(dist - match.min_distance), - .len_lit = @intCast(len - match.base_length), + .dist = @intCast(dist - min_distance), + .len_lit = @intCast(len - base_length), }; }