Merge pull request #15010 from xxxbxxx/zlib-compress

Add zlib stream writer
This commit is contained in:
Andrew Kelley 2023-06-17 14:14:16 -07:00 committed by GitHub
commit 8a6c3d26c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 141 additions and 31 deletions

View File

@ -1,5 +1,5 @@
//
// Decompressor for ZLIB data streams (RFC1950)
// Compressor/Decompressor for ZLIB data streams (RFC1950)
const std = @import("std");
const io = std.io;
@ -8,7 +8,19 @@ const testing = std.testing;
const mem = std.mem;
const deflate = std.compress.deflate;
pub fn ZlibStream(comptime ReaderType: type) type {
// Zlib header format as specified in RFC1950
const ZLibHeader = packed struct {
checksum: u5,
preset_dict: u1,
compression_level: u2,
compression_method: u4,
compression_info: u4,
const DEFLATE = 8;
const WINDOW_32K = 7;
};
pub fn DecompressStream(comptime ReaderType: type) type {
return struct {
const Self = @This();
@ -24,26 +36,24 @@ pub fn ZlibStream(comptime ReaderType: type) type {
fn init(allocator: mem.Allocator, source: ReaderType) !Self {
// Zlib header format is specified in RFC1950
const header = try source.readBytesNoEof(2);
const header_u16 = try source.readIntBig(u16);
const CM = @truncate(u4, header[0]);
const CINFO = @truncate(u4, header[0] >> 4);
const FCHECK = @truncate(u5, header[1]);
_ = FCHECK;
const FDICT = @truncate(u1, header[1] >> 5);
if ((@as(u16, header[0]) << 8 | header[1]) % 31 != 0)
// verify the header checksum
if (header_u16 % 31 != 0)
return error.BadHeader;
const header = @bitCast(ZLibHeader, header_u16);
// The CM field must be 8 to indicate the use of DEFLATE
if (CM != 8) return error.InvalidCompression;
if (header.compression_method != ZLibHeader.DEFLATE)
return error.InvalidCompression;
// CINFO is the base-2 logarithm of the LZ77 window size, minus 8.
// Values above 7 are unspecified and therefore rejected.
if (CINFO > 7) return error.InvalidWindowSize;
if (header.compression_info > ZLibHeader.WINDOW_32K)
return error.InvalidWindowSize;
const dictionary = null;
// TODO: Support this case
if (FDICT != 0)
if (header.preset_dict != 0)
return error.Unsupported;
return Self{
@ -84,14 +94,96 @@ pub fn ZlibStream(comptime ReaderType: type) type {
};
}
pub fn zlibStream(allocator: mem.Allocator, reader: anytype) !ZlibStream(@TypeOf(reader)) {
return ZlibStream(@TypeOf(reader)).init(allocator, reader);
pub fn decompressStream(allocator: mem.Allocator, reader: anytype) !DecompressStream(@TypeOf(reader)) {
return DecompressStream(@TypeOf(reader)).init(allocator, reader);
}
fn testReader(data: []const u8, expected: []const u8) !void {
pub const CompressionLevel = enum(u2) {
no_compression = 0,
fastest = 1,
default = 2,
maximum = 3,
};
pub const CompressStreamOptions = struct {
level: CompressionLevel = .default,
};
pub fn CompressStream(comptime WriterType: type) type {
return struct {
const Self = @This();
const Error = WriterType.Error ||
deflate.Compressor(WriterType).Error;
pub const Writer = io.Writer(*Self, Error, write);
allocator: mem.Allocator,
deflator: deflate.Compressor(WriterType),
in_writer: WriterType,
hasher: std.hash.Adler32,
fn init(allocator: mem.Allocator, dest: WriterType, options: CompressStreamOptions) !Self {
var header = ZLibHeader{
.compression_info = ZLibHeader.WINDOW_32K,
.compression_method = ZLibHeader.DEFLATE,
.compression_level = @enumToInt(options.level),
.preset_dict = 0,
.checksum = 0,
};
header.checksum = @truncate(u5, 31 - @bitCast(u16, header) % 31);
try dest.writeIntBig(u16, @bitCast(u16, header));
const compression_level: deflate.Compression = switch (options.level) {
.no_compression => .no_compression,
.fastest => .best_speed,
.default => .default_compression,
.maximum => .best_compression,
};
return Self{
.allocator = allocator,
.deflator = try deflate.compressor(allocator, dest, .{ .level = compression_level }),
.in_writer = dest,
.hasher = std.hash.Adler32.init(),
};
}
pub fn write(self: *Self, bytes: []const u8) Error!usize {
if (bytes.len == 0) {
return 0;
}
const w = try self.deflator.write(bytes);
self.hasher.update(bytes[0..w]);
return w;
}
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
pub fn deinit(self: *Self) void {
self.deflator.deinit();
}
pub fn finish(self: *Self) !void {
const hash = self.hasher.final();
try self.deflator.close();
try self.in_writer.writeIntBig(u32, hash);
}
};
}
pub fn compressStream(allocator: mem.Allocator, writer: anytype, options: CompressStreamOptions) !CompressStream(@TypeOf(writer)) {
return CompressStream(@TypeOf(writer)).init(allocator, writer, options);
}
fn testDecompress(data: []const u8, expected: []const u8) !void {
var in_stream = io.fixedBufferStream(data);
var zlib_stream = try zlibStream(testing.allocator, in_stream.reader());
var zlib_stream = try decompressStream(testing.allocator, in_stream.reader());
defer zlib_stream.deinit();
// Read and decompress the whole file
@ -110,24 +202,24 @@ test "compressed data" {
const rfc1951_txt = @embedFile("testdata/rfc1951.txt");
// Compressed with compression level = 0
try testReader(
try testDecompress(
@embedFile("testdata/rfc1951.txt.z.0"),
rfc1951_txt,
);
// Compressed with compression level = 9
try testReader(
try testDecompress(
@embedFile("testdata/rfc1951.txt.z.9"),
rfc1951_txt,
);
// Compressed with compression level = 9 and fixed Huffman codes
try testReader(
try testDecompress(
@embedFile("testdata/rfc1951.txt.fixed.z.9"),
rfc1951_txt,
);
}
test "don't read past deflate stream's end" {
try testReader(&[_]u8{
try testDecompress(&[_]u8{
0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
0x83, 0x95, 0x0b, 0xf5,
@ -142,31 +234,49 @@ test "sanity checks" {
// Truncated header
try testing.expectError(
error.EndOfStream,
testReader(&[_]u8{0x78}, ""),
testDecompress(&[_]u8{0x78}, ""),
);
// Failed FCHECK check
try testing.expectError(
error.BadHeader,
testReader(&[_]u8{ 0x78, 0x9D }, ""),
testDecompress(&[_]u8{ 0x78, 0x9D }, ""),
);
// Wrong CM
try testing.expectError(
error.InvalidCompression,
testReader(&[_]u8{ 0x79, 0x94 }, ""),
testDecompress(&[_]u8{ 0x79, 0x94 }, ""),
);
// Wrong CINFO
try testing.expectError(
error.InvalidWindowSize,
testReader(&[_]u8{ 0x88, 0x98 }, ""),
testDecompress(&[_]u8{ 0x88, 0x98 }, ""),
);
// Wrong checksum
try testing.expectError(
error.WrongChecksum,
testReader(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
testDecompress(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
);
// Truncated checksum
try testing.expectError(
error.EndOfStream,
testReader(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
testDecompress(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
);
}
test "compress data" {
const allocator = testing.allocator;
const rfc1951_txt = @embedFile("testdata/rfc1951.txt");
for (std.meta.tags(CompressionLevel)) |level| {
var compressed_data = std.ArrayList(u8).init(allocator);
defer compressed_data.deinit();
var compressor = try compressStream(allocator, compressed_data.writer(), .{ .level = level });
defer compressor.deinit();
try compressor.writer().writeAll(rfc1951_txt);
try compressor.finish();
try testDecompress(compressed_data.items, rfc1951_txt);
}
}

View File

@ -309,7 +309,7 @@ pub const RequestTransfer = union(enum) {
/// The decompressor for response messages.
pub const Compression = union(enum) {
pub const DeflateDecompressor = std.compress.zlib.ZlibStream(Request.TransferReader);
pub const DeflateDecompressor = std.compress.zlib.DecompressStream(Request.TransferReader);
pub const GzipDecompressor = std.compress.gzip.Decompress(Request.TransferReader);
pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Request.TransferReader, .{});
@ -722,7 +722,7 @@ pub const Request = struct {
if (req.response.transfer_compression) |tc| switch (tc) {
.compress => return error.CompressionNotSupported,
.deflate => req.response.compression = .{
.deflate = std.compress.zlib.zlibStream(req.client.allocator, req.transferReader()) catch return error.CompressionInitializationFailed,
.deflate = std.compress.zlib.decompressStream(req.client.allocator, req.transferReader()) catch return error.CompressionInitializationFailed,
},
.gzip => req.response.compression = .{
.gzip = std.compress.gzip.decompress(req.client.allocator, req.transferReader()) catch return error.CompressionInitializationFailed,

View File

@ -155,7 +155,7 @@ pub const ResponseTransfer = union(enum) {
/// The decompressor for request messages.
pub const Compression = union(enum) {
pub const DeflateDecompressor = std.compress.zlib.ZlibStream(Response.TransferReader);
pub const DeflateDecompressor = std.compress.zlib.DecompressStream(Response.TransferReader);
pub const GzipDecompressor = std.compress.gzip.Decompress(Response.TransferReader);
pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Response.TransferReader, .{});
@ -520,7 +520,7 @@ pub const Response = struct {
if (res.request.transfer_compression) |tc| switch (tc) {
.compress => return error.CompressionNotSupported,
.deflate => res.request.compression = .{
.deflate = std.compress.zlib.zlibStream(res.allocator, res.transferReader()) catch return error.CompressionInitializationFailed,
.deflate = std.compress.zlib.decompressStream(res.allocator, res.transferReader()) catch return error.CompressionInitializationFailed,
},
.gzip => res.request.compression = .{
.gzip = std.compress.gzip.decompress(res.allocator, res.transferReader()) catch return error.CompressionInitializationFailed,