mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 14:23:09 +00:00
383 lines
12 KiB
Zig
383 lines
12 KiB
Zig
//
|
|
// Compressor/Decompressor for GZIP data streams (RFC1952)
|
|
|
|
const std = @import("../std.zig");
|
|
const io = std.io;
|
|
const fs = std.fs;
|
|
const testing = std.testing;
|
|
const mem = std.mem;
|
|
const deflate = std.compress.deflate;
|
|
|
|
const magic = &[2]u8{ 0x1f, 0x8b };
|
|
|
|
// Flags for the FLG field in the header
|
|
const FTEXT = 1 << 0;
|
|
const FHCRC = 1 << 1;
|
|
const FEXTRA = 1 << 2;
|
|
const FNAME = 1 << 3;
|
|
const FCOMMENT = 1 << 4;
|
|
|
|
const max_string_len = 1024;
|
|
|
|
pub const Header = struct {
|
|
extra: ?[]const u8 = null,
|
|
filename: ?[]const u8 = null,
|
|
comment: ?[]const u8 = null,
|
|
modification_time: u32 = 0,
|
|
operating_system: u8 = 255,
|
|
};
|
|
|
|
pub fn Decompress(comptime ReaderType: type) type {
|
|
return struct {
|
|
const Self = @This();
|
|
|
|
pub const Error = ReaderType.Error ||
|
|
deflate.Decompressor(ReaderType).Error ||
|
|
error{ CorruptedData, WrongChecksum };
|
|
pub const Reader = io.Reader(*Self, Error, read);
|
|
|
|
allocator: mem.Allocator,
|
|
inflater: deflate.Decompressor(ReaderType),
|
|
in_reader: ReaderType,
|
|
hasher: std.hash.Crc32,
|
|
read_amt: u32,
|
|
|
|
info: Header,
|
|
|
|
fn init(allocator: mem.Allocator, in_reader: ReaderType) !Self {
|
|
var hasher = std.compress.hashedReader(in_reader, std.hash.Crc32.init());
|
|
const hashed_reader = hasher.reader();
|
|
|
|
// gzip header format is specified in RFC1952
|
|
const header = try hashed_reader.readBytesNoEof(10);
|
|
|
|
// Check the ID1/ID2 fields
|
|
if (!std.mem.eql(u8, header[0..2], magic))
|
|
return error.BadHeader;
|
|
|
|
const CM = header[2];
|
|
// The CM field must be 8 to indicate the use of DEFLATE
|
|
if (CM != 8) return error.InvalidCompression;
|
|
// Flags
|
|
const FLG = header[3];
|
|
// Modification time, as a Unix timestamp.
|
|
// If zero there's no timestamp available.
|
|
const MTIME = mem.readInt(u32, header[4..8], .little);
|
|
// Extra flags
|
|
const XFL = header[8];
|
|
// Operating system where the compression took place
|
|
const OS = header[9];
|
|
_ = XFL;
|
|
|
|
const extra = if (FLG & FEXTRA != 0) blk: {
|
|
const len = try hashed_reader.readInt(u16, .little);
|
|
const tmp_buf = try allocator.alloc(u8, len);
|
|
errdefer allocator.free(tmp_buf);
|
|
|
|
try hashed_reader.readNoEof(tmp_buf);
|
|
break :blk tmp_buf;
|
|
} else null;
|
|
errdefer if (extra) |p| allocator.free(p);
|
|
|
|
const filename = if (FLG & FNAME != 0)
|
|
try hashed_reader.readUntilDelimiterAlloc(allocator, 0, max_string_len)
|
|
else
|
|
null;
|
|
errdefer if (filename) |p| allocator.free(p);
|
|
|
|
const comment = if (FLG & FCOMMENT != 0)
|
|
try hashed_reader.readUntilDelimiterAlloc(allocator, 0, max_string_len)
|
|
else
|
|
null;
|
|
errdefer if (comment) |p| allocator.free(p);
|
|
|
|
if (FLG & FHCRC != 0) {
|
|
const hash = try in_reader.readInt(u16, .little);
|
|
if (hash != @as(u16, @truncate(hasher.hasher.final())))
|
|
return error.WrongChecksum;
|
|
}
|
|
|
|
return .{
|
|
.allocator = allocator,
|
|
.inflater = try deflate.decompressor(allocator, in_reader, null),
|
|
.in_reader = in_reader,
|
|
.hasher = std.hash.Crc32.init(),
|
|
.info = .{
|
|
.filename = filename,
|
|
.comment = comment,
|
|
.extra = extra,
|
|
.modification_time = MTIME,
|
|
.operating_system = OS,
|
|
},
|
|
.read_amt = 0,
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *Self) void {
|
|
self.inflater.deinit();
|
|
if (self.info.extra) |extra|
|
|
self.allocator.free(extra);
|
|
if (self.info.filename) |filename|
|
|
self.allocator.free(filename);
|
|
if (self.info.comment) |comment|
|
|
self.allocator.free(comment);
|
|
}
|
|
|
|
/// Implements the io.Reader interface
|
|
pub fn read(self: *Self, buffer: []u8) Error!usize {
|
|
if (buffer.len == 0)
|
|
return 0;
|
|
|
|
// Read from the compressed stream and update the computed checksum
|
|
const r = try self.inflater.read(buffer);
|
|
if (r != 0) {
|
|
self.hasher.update(buffer[0..r]);
|
|
self.read_amt +%= @truncate(r);
|
|
return r;
|
|
}
|
|
|
|
try self.inflater.close();
|
|
|
|
// We've reached the end of stream, check if the checksum matches
|
|
const hash = try self.in_reader.readInt(u32, .little);
|
|
if (hash != self.hasher.final())
|
|
return error.WrongChecksum;
|
|
|
|
// The ISIZE field is the size of the uncompressed input modulo 2^32
|
|
const input_size = try self.in_reader.readInt(u32, .little);
|
|
if (self.read_amt != input_size)
|
|
return error.CorruptedData;
|
|
|
|
return 0;
|
|
}
|
|
|
|
pub fn reader(self: *Self) Reader {
|
|
return .{ .context = self };
|
|
}
|
|
};
|
|
}
|
|
|
|
pub fn decompress(allocator: mem.Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
|
|
return Decompress(@TypeOf(reader)).init(allocator, reader);
|
|
}
|
|
|
|
pub const CompressOptions = struct {
|
|
header: Header = .{},
|
|
hash_header: bool = true,
|
|
level: deflate.Compression = .default_compression,
|
|
};
|
|
|
|
pub fn Compress(comptime WriterType: type) type {
|
|
return struct {
|
|
const Self = @This();
|
|
|
|
pub const Error = WriterType.Error ||
|
|
deflate.Compressor(WriterType).Error;
|
|
pub const Writer = io.Writer(*Self, Error, write);
|
|
|
|
allocator: mem.Allocator,
|
|
deflater: deflate.Compressor(WriterType),
|
|
out_writer: WriterType,
|
|
hasher: std.hash.Crc32,
|
|
write_amt: u32,
|
|
|
|
fn init(allocator: mem.Allocator, out_writer: WriterType, options: CompressOptions) !Self {
|
|
var hasher = std.compress.hashedWriter(out_writer, std.hash.Crc32.init());
|
|
const hashed_writer = hasher.writer();
|
|
|
|
// ID1/ID2
|
|
try hashed_writer.writeAll(magic);
|
|
// CM
|
|
try hashed_writer.writeByte(8);
|
|
// Flags
|
|
try hashed_writer.writeByte(
|
|
@as(u8, if (options.hash_header) FHCRC else 0) |
|
|
@as(u8, if (options.header.extra) |_| FEXTRA else 0) |
|
|
@as(u8, if (options.header.filename) |_| FNAME else 0) |
|
|
@as(u8, if (options.header.comment) |_| FCOMMENT else 0),
|
|
);
|
|
// Modification time
|
|
try hashed_writer.writeInt(u32, options.header.modification_time, .little);
|
|
// Extra flags
|
|
try hashed_writer.writeByte(0);
|
|
// Operating system
|
|
try hashed_writer.writeByte(options.header.operating_system);
|
|
|
|
if (options.header.extra) |extra| {
|
|
try hashed_writer.writeInt(u16, @intCast(extra.len), .little);
|
|
try hashed_writer.writeAll(extra);
|
|
}
|
|
|
|
if (options.header.filename) |filename| {
|
|
try hashed_writer.writeAll(filename);
|
|
try hashed_writer.writeByte(0);
|
|
}
|
|
|
|
if (options.header.comment) |comment| {
|
|
try hashed_writer.writeAll(comment);
|
|
try hashed_writer.writeByte(0);
|
|
}
|
|
|
|
if (options.hash_header) {
|
|
try out_writer.writeInt(
|
|
u16,
|
|
@truncate(hasher.hasher.final()),
|
|
.little,
|
|
);
|
|
}
|
|
|
|
return .{
|
|
.allocator = allocator,
|
|
.deflater = try deflate.compressor(allocator, out_writer, .{ .level = options.level }),
|
|
.out_writer = out_writer,
|
|
.hasher = std.hash.Crc32.init(),
|
|
.write_amt = 0,
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *Self) void {
|
|
self.deflater.deinit();
|
|
}
|
|
|
|
/// Implements the io.Writer interface
|
|
pub fn write(self: *Self, buffer: []const u8) Error!usize {
|
|
if (buffer.len == 0)
|
|
return 0;
|
|
|
|
// Write to the compressed stream and update the computed checksum
|
|
const r = try self.deflater.write(buffer);
|
|
self.hasher.update(buffer[0..r]);
|
|
self.write_amt +%= @truncate(r);
|
|
return r;
|
|
}
|
|
|
|
pub fn writer(self: *Self) Writer {
|
|
return .{ .context = self };
|
|
}
|
|
|
|
pub fn flush(self: *Self) Error!void {
|
|
try self.deflater.flush();
|
|
}
|
|
|
|
pub fn close(self: *Self) Error!void {
|
|
try self.deflater.close();
|
|
try self.out_writer.writeInt(u32, self.hasher.final(), .little);
|
|
try self.out_writer.writeInt(u32, self.write_amt, .little);
|
|
}
|
|
};
|
|
}
|
|
|
|
pub fn compress(allocator: mem.Allocator, writer: anytype, options: CompressOptions) !Compress(@TypeOf(writer)) {
|
|
return Compress(@TypeOf(writer)).init(allocator, writer, options);
|
|
}
|
|
|
|
fn testReader(expected: []const u8, data: []const u8) !void {
|
|
var in_stream = io.fixedBufferStream(data);
|
|
|
|
var gzip_stream = try decompress(testing.allocator, in_stream.reader());
|
|
defer gzip_stream.deinit();
|
|
|
|
// Read and decompress the whole file
|
|
const buf = try gzip_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
|
|
defer testing.allocator.free(buf);
|
|
|
|
// Check against the reference
|
|
try testing.expectEqualSlices(u8, expected, buf);
|
|
}
|
|
|
|
fn testWriter(expected: []const u8, data: []const u8, options: CompressOptions) !void {
|
|
var actual = std.ArrayList(u8).init(testing.allocator);
|
|
defer actual.deinit();
|
|
|
|
var gzip_stream = try compress(testing.allocator, actual.writer(), options);
|
|
defer gzip_stream.deinit();
|
|
|
|
// Write and compress the whole file
|
|
try gzip_stream.writer().writeAll(data);
|
|
try gzip_stream.close();
|
|
|
|
// Check against the reference
|
|
try testing.expectEqualSlices(u8, expected, actual.items);
|
|
}
|
|
|
|
// All the test cases are obtained by compressing the RFC1952 text
|
|
//
|
|
// https://tools.ietf.org/rfc/rfc1952.txt length=25037 bytes
|
|
// SHA256=164ef0897b4cbec63abf1b57f069f3599bd0fb7c72c2a4dee21bd7e03ec9af67
|
|
test "compressed data" {
|
|
const plain = @embedFile("testdata/rfc1952.txt");
|
|
const compressed = @embedFile("testdata/rfc1952.txt.gz");
|
|
try testReader(plain, compressed);
|
|
try testWriter(compressed, plain, .{
|
|
.header = .{
|
|
.filename = "rfc1952.txt",
|
|
.modification_time = 1706533053,
|
|
.operating_system = 3,
|
|
},
|
|
});
|
|
}
|
|
|
|
test "sanity checks" {
|
|
// Truncated header
|
|
try testing.expectError(
|
|
error.EndOfStream,
|
|
testReader(undefined, &[_]u8{ 0x1f, 0x8B }),
|
|
);
|
|
// Wrong CM
|
|
try testing.expectError(
|
|
error.InvalidCompression,
|
|
testReader(undefined, &[_]u8{
|
|
0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x03,
|
|
}),
|
|
);
|
|
// Wrong checksum
|
|
try testing.expectError(
|
|
error.WrongChecksum,
|
|
testReader(undefined, &[_]u8{
|
|
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
}),
|
|
);
|
|
// Truncated checksum
|
|
try testing.expectError(
|
|
error.EndOfStream,
|
|
testReader(undefined, &[_]u8{
|
|
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
|
|
}),
|
|
);
|
|
// Wrong initial size
|
|
try testing.expectError(
|
|
error.CorruptedData,
|
|
testReader(undefined, &[_]u8{
|
|
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x01,
|
|
}),
|
|
);
|
|
// Truncated initial size field
|
|
try testing.expectError(
|
|
error.EndOfStream,
|
|
testReader(undefined, &[_]u8{
|
|
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00,
|
|
}),
|
|
);
|
|
}
|
|
|
|
test "header checksum" {
|
|
try testReader("", &[_]u8{
|
|
// GZIP header
|
|
0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
|
|
|
|
// header.FHCRC (should cover entire header)
|
|
0x99, 0xd6,
|
|
|
|
// GZIP data
|
|
0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
});
|
|
}
|