mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 14:23:09 +00:00
std.compress.lzma2: tests passing
This commit is contained in:
parent
3cb9baaf65
commit
a8ae6c2f42
@ -105,7 +105,6 @@ pub const RangeDecoder = struct {
|
||||
|
||||
pub const Decode = struct {
|
||||
properties: Properties,
|
||||
unpacked_size: ?u64,
|
||||
literal_probs: Vec2d,
|
||||
pos_slot_decoder: [4]BitTree(6),
|
||||
align_decoder: BitTree(4),
|
||||
@ -121,15 +120,10 @@ pub const Decode = struct {
|
||||
len_decoder: LenDecoder,
|
||||
rep_len_decoder: LenDecoder,
|
||||
|
||||
pub fn init(
|
||||
gpa: Allocator,
|
||||
properties: Properties,
|
||||
unpacked_size: ?u64,
|
||||
) !Decode {
|
||||
pub fn init(gpa: Allocator, properties: Properties) !Decode {
|
||||
return .{
|
||||
.properties = properties,
|
||||
.unpacked_size = unpacked_size,
|
||||
.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (properties.lc + properties.lp), 0x300 }),
|
||||
.literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (properties.lc + properties.lp), 0x300),
|
||||
.pos_slot_decoder = @splat(.{}),
|
||||
.align_decoder = .{},
|
||||
.pos_decoders = @splat(0x400),
|
||||
@ -157,7 +151,7 @@ pub const Decode = struct {
|
||||
self.literal_probs.fill(0x400);
|
||||
} else {
|
||||
self.literal_probs.deinit(gpa);
|
||||
self.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
|
||||
self.literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
|
||||
}
|
||||
|
||||
self.properties = new_props;
|
||||
@ -176,11 +170,12 @@ pub const Decode = struct {
|
||||
self.rep_len_decoder.reset();
|
||||
}
|
||||
|
||||
fn processNext(
|
||||
pub fn process(
|
||||
self: *Decode,
|
||||
reader: *Reader,
|
||||
allocating: *Writer.Allocating,
|
||||
buffer: *CircularBuffer,
|
||||
/// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
|
||||
buffer: anytype,
|
||||
decoder: *RangeDecoder,
|
||||
) !ProcessingStatus {
|
||||
const gpa = allocating.allocator;
|
||||
@ -256,39 +251,11 @@ pub const Decode = struct {
|
||||
return .more;
|
||||
}
|
||||
|
||||
pub fn process(
|
||||
self: *Decode,
|
||||
reader: *Reader,
|
||||
allocating: *Writer.Allocating,
|
||||
buffer: *CircularBuffer,
|
||||
decoder: *RangeDecoder,
|
||||
) !void {
|
||||
process_next: {
|
||||
if (self.unpacked_size) |unpacked_size| {
|
||||
if (buffer.len >= unpacked_size) {
|
||||
break :process_next;
|
||||
}
|
||||
} else if (decoder.isFinished()) {
|
||||
break :process_next;
|
||||
}
|
||||
switch (try self.processNext(reader, allocating, buffer, decoder)) {
|
||||
.more => return,
|
||||
.finished => {},
|
||||
}
|
||||
}
|
||||
|
||||
if (self.unpacked_size) |unpacked_size| {
|
||||
if (buffer.len != unpacked_size) return error.DecompressedSizeMismatch;
|
||||
}
|
||||
|
||||
try buffer.finish(&allocating.writer);
|
||||
self.state = math.maxInt(usize);
|
||||
}
|
||||
|
||||
fn decodeLiteral(
|
||||
self: *Decode,
|
||||
reader: *Reader,
|
||||
buffer: *CircularBuffer,
|
||||
/// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
|
||||
buffer: anytype,
|
||||
decoder: *RangeDecoder,
|
||||
) !u8 {
|
||||
const def_prev_byte = 0;
|
||||
@ -377,10 +344,7 @@ pub const Decode = struct {
|
||||
}
|
||||
|
||||
pub fn get(self: CircularBuffer, index: usize) u8 {
|
||||
return if (0 <= index and index < self.buf.items.len)
|
||||
self.buf.items[index]
|
||||
else
|
||||
0;
|
||||
return if (0 <= index and index < self.buf.items.len) self.buf.items[index] else 0;
|
||||
}
|
||||
|
||||
pub fn set(self: *CircularBuffer, gpa: Allocator, index: usize, value: u8) !void {
|
||||
@ -524,29 +488,29 @@ pub const Decode = struct {
|
||||
data: []u16,
|
||||
cols: usize,
|
||||
|
||||
pub fn init(gpa: Allocator, value: u16, size: struct { usize, usize }) !Vec2d {
|
||||
const len = try math.mul(usize, size[0], size[1]);
|
||||
pub fn init(gpa: Allocator, value: u16, w: usize, h: usize) !Vec2d {
|
||||
const len = try math.mul(usize, w, h);
|
||||
const data = try gpa.alloc(u16, len);
|
||||
@memset(data, value);
|
||||
return .{
|
||||
.data = data,
|
||||
.cols = size[1],
|
||||
.cols = h,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Vec2d, gpa: Allocator) void {
|
||||
gpa.free(self.data);
|
||||
self.* = undefined;
|
||||
pub fn deinit(v: *Vec2d, gpa: Allocator) void {
|
||||
gpa.free(v.data);
|
||||
v.* = undefined;
|
||||
}
|
||||
|
||||
pub fn fill(self: *Vec2d, value: u16) void {
|
||||
@memset(self.data, value);
|
||||
pub fn fill(v: *Vec2d, value: u16) void {
|
||||
@memset(v.data, value);
|
||||
}
|
||||
|
||||
fn get(self: Vec2d, row: usize) ![]u16 {
|
||||
const start_row = try math.mul(usize, row, self.cols);
|
||||
const end_row = try math.add(usize, start_row, self.cols);
|
||||
return self.data[start_row..end_row];
|
||||
fn get(v: Vec2d, row: usize) ![]u16 {
|
||||
const start_row = try math.mul(usize, row, v.cols);
|
||||
const end_row = try math.add(usize, start_row, v.cols);
|
||||
return v.data[start_row..end_row];
|
||||
}
|
||||
};
|
||||
|
||||
@ -627,6 +591,7 @@ pub const Decompress = struct {
|
||||
range_decoder: RangeDecoder,
|
||||
decode: Decode,
|
||||
err: ?Error,
|
||||
unpacked_size: ?u64,
|
||||
|
||||
pub const Error = error{
|
||||
OutOfMemory,
|
||||
@ -654,7 +619,7 @@ pub const Decompress = struct {
|
||||
.input = input,
|
||||
.buffer = Decode.CircularBuffer.init(params.dict_size, mem_limit),
|
||||
.range_decoder = try RangeDecoder.init(input),
|
||||
.decode = try Decode.init(gpa, params.properties, params.unpacked_size),
|
||||
.decode = try Decode.init(gpa, params.properties),
|
||||
.reader = .{
|
||||
.buffer = buffer,
|
||||
.vtable = &.{
|
||||
@ -666,6 +631,7 @@ pub const Decompress = struct {
|
||||
.end = 0,
|
||||
},
|
||||
.err = null,
|
||||
.unpacked_size = params.unpacked_size,
|
||||
};
|
||||
}
|
||||
|
||||
@ -728,20 +694,46 @@ pub const Decompress = struct {
|
||||
r.end = allocating.writer.end;
|
||||
}
|
||||
if (d.decode.state == math.maxInt(usize)) return error.EndOfStream;
|
||||
d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
|
||||
|
||||
process_next: {
|
||||
if (d.unpacked_size) |unpacked_size| {
|
||||
if (d.buffer.len >= unpacked_size) break :process_next;
|
||||
} else if (d.range_decoder.isFinished()) {
|
||||
break :process_next;
|
||||
}
|
||||
switch (d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
|
||||
error.WriteFailed => {
|
||||
d.err = error.OutOfMemory;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
error.EndOfStream => {
|
||||
d.err = error.EndOfStream;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
else => |e| {
|
||||
d.err = e;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
}) {
|
||||
.more => return 0,
|
||||
.finished => break :process_next,
|
||||
}
|
||||
}
|
||||
|
||||
if (d.unpacked_size) |unpacked_size| {
|
||||
if (d.buffer.len != unpacked_size) {
|
||||
d.err = error.DecompressedSizeMismatch;
|
||||
return error.ReadFailed;
|
||||
}
|
||||
}
|
||||
|
||||
d.buffer.finish(&allocating.writer) catch |err| switch (err) {
|
||||
error.WriteFailed => {
|
||||
d.err = error.OutOfMemory;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
error.EndOfStream => {
|
||||
d.err = error.EndOfStream;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
else => |e| {
|
||||
d.err = e;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
};
|
||||
d.decode.state = math.maxInt(usize);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
@ -6,17 +6,15 @@ const Writer = std.Io.Writer;
|
||||
const Reader = std.Io.Reader;
|
||||
|
||||
/// An accumulating buffer for LZ sequences
|
||||
pub const LzAccumBuffer = struct {
|
||||
pub const AccumBuffer = struct {
|
||||
/// Buffer
|
||||
buf: ArrayList(u8),
|
||||
|
||||
/// Buffer memory limit
|
||||
memlimit: usize,
|
||||
|
||||
/// Total number of bytes sent through the buffer
|
||||
len: usize,
|
||||
|
||||
pub fn init(memlimit: usize) LzAccumBuffer {
|
||||
pub fn init(memlimit: usize) AccumBuffer {
|
||||
return .{
|
||||
.buf = .{},
|
||||
.memlimit = memlimit,
|
||||
@ -24,20 +22,20 @@ pub const LzAccumBuffer = struct {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn appendByte(self: *LzAccumBuffer, allocator: Allocator, byte: u8) !void {
|
||||
pub fn appendByte(self: *AccumBuffer, allocator: Allocator, byte: u8) !void {
|
||||
try self.buf.append(allocator, byte);
|
||||
self.len += 1;
|
||||
}
|
||||
|
||||
/// Reset the internal dictionary
|
||||
pub fn reset(self: *LzAccumBuffer, writer: *Writer) !void {
|
||||
pub fn reset(self: *AccumBuffer, writer: *Writer) !void {
|
||||
try writer.writeAll(self.buf.items);
|
||||
self.buf.clearRetainingCapacity();
|
||||
self.len = 0;
|
||||
}
|
||||
|
||||
/// Retrieve the last byte or return a default
|
||||
pub fn lastOr(self: LzAccumBuffer, lit: u8) u8 {
|
||||
pub fn lastOr(self: AccumBuffer, lit: u8) u8 {
|
||||
const buf_len = self.buf.items.len;
|
||||
return if (buf_len == 0)
|
||||
lit
|
||||
@ -46,7 +44,7 @@ pub const LzAccumBuffer = struct {
|
||||
}
|
||||
|
||||
/// Retrieve the n-th last byte
|
||||
pub fn lastN(self: LzAccumBuffer, dist: usize) !u8 {
|
||||
pub fn lastN(self: AccumBuffer, dist: usize) !u8 {
|
||||
const buf_len = self.buf.items.len;
|
||||
if (dist > buf_len) {
|
||||
return error.CorruptInput;
|
||||
@ -57,7 +55,7 @@ pub const LzAccumBuffer = struct {
|
||||
|
||||
/// Append a literal
|
||||
pub fn appendLiteral(
|
||||
self: *LzAccumBuffer,
|
||||
self: *AccumBuffer,
|
||||
allocator: Allocator,
|
||||
lit: u8,
|
||||
writer: *Writer,
|
||||
@ -72,7 +70,7 @@ pub const LzAccumBuffer = struct {
|
||||
|
||||
/// Fetch an LZ sequence (length, distance) from inside the buffer
|
||||
pub fn appendLz(
|
||||
self: *LzAccumBuffer,
|
||||
self: *AccumBuffer,
|
||||
allocator: Allocator,
|
||||
len: usize,
|
||||
dist: usize,
|
||||
@ -95,12 +93,12 @@ pub const LzAccumBuffer = struct {
|
||||
self.len += len;
|
||||
}
|
||||
|
||||
pub fn finish(self: *LzAccumBuffer, writer: *Writer) !void {
|
||||
pub fn finish(self: *AccumBuffer, writer: *Writer) !void {
|
||||
try writer.writeAll(self.buf.items);
|
||||
self.buf.clearRetainingCapacity();
|
||||
}
|
||||
|
||||
pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
|
||||
pub fn deinit(self: *AccumBuffer, allocator: Allocator) void {
|
||||
self.buf.deinit(allocator);
|
||||
self.* = undefined;
|
||||
}
|
||||
@ -109,59 +107,43 @@ pub const LzAccumBuffer = struct {
|
||||
pub const Decode = struct {
|
||||
lzma_decode: lzma.Decode,
|
||||
|
||||
pub fn init(allocator: Allocator) !Decode {
|
||||
return Decode{
|
||||
.lzma_decode = try lzma.Decode.init(
|
||||
allocator,
|
||||
.{
|
||||
.lc = 0,
|
||||
.lp = 0,
|
||||
.pb = 0,
|
||||
},
|
||||
null,
|
||||
),
|
||||
};
|
||||
pub fn init(gpa: Allocator) !Decode {
|
||||
return .{ .lzma_decode = try lzma.Decode.init(gpa, .{ .lc = 0, .lp = 0, .pb = 0 }) };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Decode, allocator: Allocator) void {
|
||||
self.lzma_decode.deinit(allocator);
|
||||
pub fn deinit(self: *Decode, gpa: Allocator) void {
|
||||
self.lzma_decode.deinit(gpa);
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
pub fn decompress(
|
||||
self: *Decode,
|
||||
allocator: Allocator,
|
||||
reader: *Reader,
|
||||
writer: *Writer,
|
||||
) !void {
|
||||
var accum = LzAccumBuffer.init(std.math.maxInt(usize));
|
||||
defer accum.deinit(allocator);
|
||||
pub fn decompress(d: *Decode, reader: *Reader, allocating: *Writer.Allocating) !void {
|
||||
const gpa = allocating.allocator;
|
||||
|
||||
var accum = AccumBuffer.init(std.math.maxInt(usize));
|
||||
defer accum.deinit(gpa);
|
||||
|
||||
while (true) {
|
||||
const status = try reader.readByte();
|
||||
const status = try reader.takeByte();
|
||||
|
||||
switch (status) {
|
||||
0 => break,
|
||||
1 => try parseUncompressed(allocator, reader, writer, &accum, true),
|
||||
2 => try parseUncompressed(allocator, reader, writer, &accum, false),
|
||||
else => try self.parseLzma(allocator, reader, writer, &accum, status),
|
||||
1 => try parseUncompressed(reader, allocating, &accum, true),
|
||||
2 => try parseUncompressed(reader, allocating, &accum, false),
|
||||
else => try d.parseLzma(reader, allocating, &accum, status),
|
||||
}
|
||||
}
|
||||
|
||||
try accum.finish(writer);
|
||||
try accum.finish(&allocating.writer);
|
||||
}
|
||||
|
||||
fn parseLzma(
|
||||
self: *Decode,
|
||||
allocator: Allocator,
|
||||
d: *Decode,
|
||||
reader: *Reader,
|
||||
writer: *Writer,
|
||||
accum: *LzAccumBuffer,
|
||||
allocating: *Writer.Allocating,
|
||||
accum: *AccumBuffer,
|
||||
status: u8,
|
||||
) !void {
|
||||
if (status & 0x80 == 0) {
|
||||
return error.CorruptInput;
|
||||
}
|
||||
if (status & 0x80 == 0) return error.CorruptInput;
|
||||
|
||||
const Reset = struct {
|
||||
dict: bool,
|
||||
@ -169,23 +151,23 @@ pub const Decode = struct {
|
||||
props: bool,
|
||||
};
|
||||
|
||||
const reset = switch ((status >> 5) & 0x3) {
|
||||
0 => Reset{
|
||||
const reset: Reset = switch ((status >> 5) & 0x3) {
|
||||
0 => .{
|
||||
.dict = false,
|
||||
.state = false,
|
||||
.props = false,
|
||||
},
|
||||
1 => Reset{
|
||||
1 => .{
|
||||
.dict = false,
|
||||
.state = true,
|
||||
.props = false,
|
||||
},
|
||||
2 => Reset{
|
||||
2 => .{
|
||||
.dict = false,
|
||||
.state = true,
|
||||
.props = true,
|
||||
},
|
||||
3 => Reset{
|
||||
3 => .{
|
||||
.dict = true,
|
||||
.state = true,
|
||||
.props = true,
|
||||
@ -196,24 +178,24 @@ pub const Decode = struct {
|
||||
const unpacked_size = blk: {
|
||||
var tmp: u64 = status & 0x1F;
|
||||
tmp <<= 16;
|
||||
tmp |= try reader.readInt(u16, .big);
|
||||
tmp |= try reader.takeInt(u16, .big);
|
||||
break :blk tmp + 1;
|
||||
};
|
||||
|
||||
const packed_size = blk: {
|
||||
const tmp: u17 = try reader.readInt(u16, .big);
|
||||
const tmp: u17 = try reader.takeInt(u16, .big);
|
||||
break :blk tmp + 1;
|
||||
};
|
||||
|
||||
if (reset.dict) {
|
||||
try accum.reset(writer);
|
||||
}
|
||||
if (reset.dict) try accum.reset(&allocating.writer);
|
||||
|
||||
const ld = &d.lzma_decode;
|
||||
|
||||
if (reset.state) {
|
||||
var new_props = self.lzma_decode.properties;
|
||||
var new_props = ld.properties;
|
||||
|
||||
if (reset.props) {
|
||||
var props = try reader.readByte();
|
||||
var props = try reader.takeByte();
|
||||
if (props >= 225) {
|
||||
return error.CorruptInput;
|
||||
}
|
||||
@ -231,38 +213,44 @@ pub const Decode = struct {
|
||||
new_props = .{ .lc = lc, .lp = lp, .pb = pb };
|
||||
}
|
||||
|
||||
try self.lzma_decode.resetState(allocator, new_props);
|
||||
try ld.resetState(allocating.allocator, new_props);
|
||||
}
|
||||
|
||||
self.lzma_decode.unpacked_size = unpacked_size + accum.len;
|
||||
var range_decoder = try lzma.RangeDecoder.init(reader);
|
||||
|
||||
var counter = std.io.countingReader(reader);
|
||||
const counter_reader = counter.reader();
|
||||
|
||||
var rangecoder = try lzma.RangeDecoder.init(counter_reader);
|
||||
while (try self.lzma_decode.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
|
||||
|
||||
if (counter.bytes_read != packed_size) {
|
||||
return error.CorruptInput;
|
||||
while (true) {
|
||||
if (accum.len >= unpacked_size) break;
|
||||
if (range_decoder.isFinished()) break;
|
||||
switch (try ld.process(reader, allocating, accum, &range_decoder)) {
|
||||
.more => continue,
|
||||
.finished => break,
|
||||
}
|
||||
}
|
||||
if (accum.len != unpacked_size) return error.DecompressedSizeMismatch;
|
||||
|
||||
// TODO restore this error
|
||||
//if (counter.bytes_read != packed_size) {
|
||||
// return error.CorruptInput;
|
||||
//}
|
||||
_ = packed_size;
|
||||
}
|
||||
|
||||
fn parseUncompressed(
|
||||
allocator: Allocator,
|
||||
reader: *Reader,
|
||||
writer: *Writer,
|
||||
accum: *LzAccumBuffer,
|
||||
allocating: *Writer.Allocating,
|
||||
accum: *AccumBuffer,
|
||||
reset_dict: bool,
|
||||
) !void {
|
||||
const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1;
|
||||
const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1;
|
||||
|
||||
if (reset_dict) {
|
||||
try accum.reset(writer);
|
||||
}
|
||||
if (reset_dict) try accum.reset(&allocating.writer);
|
||||
|
||||
var i: @TypeOf(unpacked_size) = 0;
|
||||
while (i < unpacked_size) : (i += 1) {
|
||||
try accum.appendByte(allocator, try reader.readByte());
|
||||
const gpa = allocating.allocator;
|
||||
|
||||
var i = unpacked_size;
|
||||
while (i != 0) {
|
||||
try accum.appendByte(gpa, try reader.takeByte());
|
||||
i -= 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -273,13 +261,13 @@ test "decompress hello world stream" {
|
||||
|
||||
const gpa = std.testing.allocator;
|
||||
|
||||
var stream: std.Io.Reader = .fixed(compressed);
|
||||
|
||||
var decode = try Decode.init(gpa, &stream);
|
||||
var decode = try Decode.init(gpa);
|
||||
defer decode.deinit(gpa);
|
||||
|
||||
const result = try decode.reader.allocRemaining(gpa, .unlimited);
|
||||
defer gpa.free(result);
|
||||
var stream: std.Io.Reader = .fixed(compressed);
|
||||
var result: std.Io.Writer.Allocating = .init(gpa);
|
||||
defer result.deinit();
|
||||
|
||||
try std.testing.expectEqualStrings(expected, result);
|
||||
try decode.decompress(&stream, &result);
|
||||
try std.testing.expectEqualStrings(expected, result.written());
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user