mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 14:23:09 +00:00
std.compress.xz.Decompress: some tests passing
This commit is contained in:
parent
d87eb7d4e4
commit
722e066173
@ -12,11 +12,19 @@ pub const RangeDecoder = struct {
|
|||||||
code: u32,
|
code: u32,
|
||||||
|
|
||||||
pub fn init(reader: *Reader) !RangeDecoder {
|
pub fn init(reader: *Reader) !RangeDecoder {
|
||||||
|
var counter: u64 = 0;
|
||||||
|
return initCounting(reader, &counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn initCounting(reader: *Reader, n_read: *u64) !RangeDecoder {
|
||||||
const reserved = try reader.takeByte();
|
const reserved = try reader.takeByte();
|
||||||
|
n_read.* += 1;
|
||||||
if (reserved != 0) return error.InvalidRangeCode;
|
if (reserved != 0) return error.InvalidRangeCode;
|
||||||
|
const code = try reader.takeInt(u32, .big);
|
||||||
|
n_read.* += 4;
|
||||||
return .{
|
return .{
|
||||||
.range = 0xFFFF_FFFF,
|
.range = 0xFFFF_FFFF,
|
||||||
.code = try reader.takeInt(u32, .big),
|
.code = code,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,47 +32,47 @@ pub const RangeDecoder = struct {
|
|||||||
return self.code == 0;
|
return self.code == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn normalize(self: *RangeDecoder, reader: *Reader) !void {
|
fn normalize(self: *RangeDecoder, reader: *Reader, n_read: *u64) !void {
|
||||||
if (self.range < 0x0100_0000) {
|
if (self.range < 0x0100_0000) {
|
||||||
self.range <<= 8;
|
self.range <<= 8;
|
||||||
self.code = (self.code << 8) ^ @as(u32, try reader.takeByte());
|
self.code = (self.code << 8) ^ @as(u32, try reader.takeByte());
|
||||||
|
n_read.* += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn getBit(self: *RangeDecoder, reader: *Reader) !bool {
|
fn getBit(self: *RangeDecoder, reader: *Reader, n_read: *u64) !bool {
|
||||||
self.range >>= 1;
|
self.range >>= 1;
|
||||||
|
|
||||||
const bit = self.code >= self.range;
|
const bit = self.code >= self.range;
|
||||||
if (bit)
|
if (bit) self.code -= self.range;
|
||||||
self.code -= self.range;
|
|
||||||
|
|
||||||
try self.normalize(reader);
|
try self.normalize(reader, n_read);
|
||||||
return bit;
|
return bit;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(self: *RangeDecoder, reader: *Reader, count: usize) !u32 {
|
pub fn get(self: *RangeDecoder, reader: *Reader, count: usize, n_read: *u64) !u32 {
|
||||||
var result: u32 = 0;
|
var result: u32 = 0;
|
||||||
var i: usize = 0;
|
for (0..count) |_| {
|
||||||
while (i < count) : (i += 1)
|
result = (result << 1) ^ @intFromBool(try self.getBit(reader, n_read));
|
||||||
result = (result << 1) ^ @intFromBool(try self.getBit(reader));
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decodeBit(self: *RangeDecoder, reader: *Reader, prob: *u16) !bool {
|
pub fn decodeBit(self: *RangeDecoder, reader: *Reader, prob: *u16, n_read: *u64) !bool {
|
||||||
const bound = (self.range >> 11) * prob.*;
|
const bound = (self.range >> 11) * prob.*;
|
||||||
|
|
||||||
if (self.code < bound) {
|
if (self.code < bound) {
|
||||||
prob.* += (0x800 - prob.*) >> 5;
|
prob.* += (0x800 - prob.*) >> 5;
|
||||||
self.range = bound;
|
self.range = bound;
|
||||||
|
|
||||||
try self.normalize(reader);
|
try self.normalize(reader, n_read);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
prob.* -= prob.* >> 5;
|
prob.* -= prob.* >> 5;
|
||||||
self.code -= bound;
|
self.code -= bound;
|
||||||
self.range -= bound;
|
self.range -= bound;
|
||||||
|
|
||||||
try self.normalize(reader);
|
try self.normalize(reader, n_read);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -74,11 +82,12 @@ pub const RangeDecoder = struct {
|
|||||||
reader: *Reader,
|
reader: *Reader,
|
||||||
num_bits: u5,
|
num_bits: u5,
|
||||||
probs: []u16,
|
probs: []u16,
|
||||||
|
n_read: *u64,
|
||||||
) !u32 {
|
) !u32 {
|
||||||
var tmp: u32 = 1;
|
var tmp: u32 = 1;
|
||||||
var i: @TypeOf(num_bits) = 0;
|
var i: @TypeOf(num_bits) = 0;
|
||||||
while (i < num_bits) : (i += 1) {
|
while (i < num_bits) : (i += 1) {
|
||||||
const bit = try self.decodeBit(reader, &probs[tmp]);
|
const bit = try self.decodeBit(reader, &probs[tmp], n_read);
|
||||||
tmp = (tmp << 1) ^ @intFromBool(bit);
|
tmp = (tmp << 1) ^ @intFromBool(bit);
|
||||||
}
|
}
|
||||||
return tmp - (@as(u32, 1) << num_bits);
|
return tmp - (@as(u32, 1) << num_bits);
|
||||||
@ -90,12 +99,13 @@ pub const RangeDecoder = struct {
|
|||||||
num_bits: u5,
|
num_bits: u5,
|
||||||
probs: []u16,
|
probs: []u16,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
|
n_read: *u64,
|
||||||
) !u32 {
|
) !u32 {
|
||||||
var result: u32 = 0;
|
var result: u32 = 0;
|
||||||
var tmp: usize = 1;
|
var tmp: usize = 1;
|
||||||
var i: @TypeOf(num_bits) = 0;
|
var i: @TypeOf(num_bits) = 0;
|
||||||
while (i < num_bits) : (i += 1) {
|
while (i < num_bits) : (i += 1) {
|
||||||
const bit = @intFromBool(try self.decodeBit(reader, &probs[offset + tmp]));
|
const bit = @intFromBool(try self.decodeBit(reader, &probs[offset + tmp], n_read));
|
||||||
tmp = (tmp << 1) ^ bit;
|
tmp = (tmp << 1) ^ bit;
|
||||||
result ^= @as(u32, bit) << i;
|
result ^= @as(u32, bit) << i;
|
||||||
}
|
}
|
||||||
@ -177,13 +187,14 @@ pub const Decode = struct {
|
|||||||
/// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
|
/// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
|
||||||
buffer: anytype,
|
buffer: anytype,
|
||||||
decoder: *RangeDecoder,
|
decoder: *RangeDecoder,
|
||||||
|
n_read: *u64,
|
||||||
) !ProcessingStatus {
|
) !ProcessingStatus {
|
||||||
const gpa = allocating.allocator;
|
const gpa = allocating.allocator;
|
||||||
const writer = &allocating.writer;
|
const writer = &allocating.writer;
|
||||||
const pos_state = buffer.len & ((@as(usize, 1) << self.properties.pb) - 1);
|
const pos_state = buffer.len & ((@as(usize, 1) << self.properties.pb) - 1);
|
||||||
|
|
||||||
if (!try decoder.decodeBit(reader, &self.is_match[(self.state << 4) + pos_state])) {
|
if (!try decoder.decodeBit(reader, &self.is_match[(self.state << 4) + pos_state], n_read)) {
|
||||||
const byte: u8 = try self.decodeLiteral(reader, buffer, decoder);
|
const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, n_read);
|
||||||
|
|
||||||
try buffer.appendLiteral(gpa, byte, writer);
|
try buffer.appendLiteral(gpa, byte, writer);
|
||||||
|
|
||||||
@ -197,18 +208,18 @@ pub const Decode = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var len: usize = undefined;
|
var len: usize = undefined;
|
||||||
if (try decoder.decodeBit(reader, &self.is_rep[self.state])) {
|
if (try decoder.decodeBit(reader, &self.is_rep[self.state], n_read)) {
|
||||||
if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state])) {
|
if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], n_read)) {
|
||||||
if (!try decoder.decodeBit(reader, &self.is_rep_0long[(self.state << 4) + pos_state])) {
|
if (!try decoder.decodeBit(reader, &self.is_rep_0long[(self.state << 4) + pos_state], n_read)) {
|
||||||
self.state = if (self.state < 7) 9 else 11;
|
self.state = if (self.state < 7) 9 else 11;
|
||||||
const dist = self.rep[0] + 1;
|
const dist = self.rep[0] + 1;
|
||||||
try buffer.appendLz(gpa, 1, dist, writer);
|
try buffer.appendLz(gpa, 1, dist, writer);
|
||||||
return .more;
|
return .more;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state]))
|
const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], n_read))
|
||||||
1
|
1
|
||||||
else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state]))
|
else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], n_read))
|
||||||
2
|
2
|
||||||
else
|
else
|
||||||
3;
|
3;
|
||||||
@ -220,7 +231,7 @@ pub const Decode = struct {
|
|||||||
self.rep[0] = dist;
|
self.rep[0] = dist;
|
||||||
}
|
}
|
||||||
|
|
||||||
len = try self.rep_len_decoder.decode(reader, decoder, pos_state);
|
len = try self.rep_len_decoder.decode(reader, decoder, pos_state, n_read);
|
||||||
|
|
||||||
self.state = if (self.state < 7) 8 else 11;
|
self.state = if (self.state < 7) 8 else 11;
|
||||||
} else {
|
} else {
|
||||||
@ -228,11 +239,11 @@ pub const Decode = struct {
|
|||||||
self.rep[2] = self.rep[1];
|
self.rep[2] = self.rep[1];
|
||||||
self.rep[1] = self.rep[0];
|
self.rep[1] = self.rep[0];
|
||||||
|
|
||||||
len = try self.len_decoder.decode(reader, decoder, pos_state);
|
len = try self.len_decoder.decode(reader, decoder, pos_state, n_read);
|
||||||
|
|
||||||
self.state = if (self.state < 7) 7 else 10;
|
self.state = if (self.state < 7) 7 else 10;
|
||||||
|
|
||||||
const rep_0 = try self.decodeDistance(reader, decoder, len);
|
const rep_0 = try self.decodeDistance(reader, decoder, len, n_read);
|
||||||
|
|
||||||
self.rep[0] = rep_0;
|
self.rep[0] = rep_0;
|
||||||
if (self.rep[0] == 0xFFFF_FFFF) {
|
if (self.rep[0] == 0xFFFF_FFFF) {
|
||||||
@ -257,6 +268,7 @@ pub const Decode = struct {
|
|||||||
/// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
|
/// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
|
||||||
buffer: anytype,
|
buffer: anytype,
|
||||||
decoder: *RangeDecoder,
|
decoder: *RangeDecoder,
|
||||||
|
n_read: *u64,
|
||||||
) !u8 {
|
) !u8 {
|
||||||
const def_prev_byte = 0;
|
const def_prev_byte = 0;
|
||||||
const prev_byte = @as(usize, buffer.lastOr(def_prev_byte));
|
const prev_byte = @as(usize, buffer.lastOr(def_prev_byte));
|
||||||
@ -275,6 +287,7 @@ pub const Decode = struct {
|
|||||||
const bit = @intFromBool(try decoder.decodeBit(
|
const bit = @intFromBool(try decoder.decodeBit(
|
||||||
reader,
|
reader,
|
||||||
&probs[((@as(usize, 1) + match_bit) << 8) + result],
|
&probs[((@as(usize, 1) + match_bit) << 8) + result],
|
||||||
|
n_read,
|
||||||
));
|
));
|
||||||
result = (result << 1) ^ bit;
|
result = (result << 1) ^ bit;
|
||||||
if (match_bit != bit) {
|
if (match_bit != bit) {
|
||||||
@ -284,10 +297,10 @@ pub const Decode = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (result < 0x100) {
|
while (result < 0x100) {
|
||||||
result = (result << 1) ^ @intFromBool(try decoder.decodeBit(reader, &probs[result]));
|
result = (result << 1) ^ @intFromBool(try decoder.decodeBit(reader, &probs[result], n_read));
|
||||||
}
|
}
|
||||||
|
|
||||||
return @as(u8, @truncate(result - 0x100));
|
return @truncate(result - 0x100);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decodeDistance(
|
fn decodeDistance(
|
||||||
@ -295,12 +308,12 @@ pub const Decode = struct {
|
|||||||
reader: *Reader,
|
reader: *Reader,
|
||||||
decoder: *RangeDecoder,
|
decoder: *RangeDecoder,
|
||||||
length: usize,
|
length: usize,
|
||||||
|
n_read: *u64,
|
||||||
) !usize {
|
) !usize {
|
||||||
const len_state = if (length > 3) 3 else length;
|
const len_state = if (length > 3) 3 else length;
|
||||||
|
|
||||||
const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder));
|
const pos_slot: usize = try self.pos_slot_decoder[len_state].parse(reader, decoder, n_read);
|
||||||
if (pos_slot < 4)
|
if (pos_slot < 4) return pos_slot;
|
||||||
return pos_slot;
|
|
||||||
|
|
||||||
const num_direct_bits = @as(u5, @intCast((pos_slot >> 1) - 1));
|
const num_direct_bits = @as(u5, @intCast((pos_slot >> 1) - 1));
|
||||||
var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
|
var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
|
||||||
@ -311,10 +324,11 @@ pub const Decode = struct {
|
|||||||
num_direct_bits,
|
num_direct_bits,
|
||||||
&self.pos_decoders,
|
&self.pos_decoders,
|
||||||
result - pos_slot,
|
result - pos_slot,
|
||||||
|
n_read,
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4;
|
result += @as(usize, try decoder.get(reader, num_direct_bits - 4, n_read)) << 4;
|
||||||
result += try self.align_decoder.parseReverse(reader, decoder);
|
result += try self.align_decoder.parseReverse(reader, decoder, n_read);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -435,16 +449,17 @@ pub const Decode = struct {
|
|||||||
return struct {
|
return struct {
|
||||||
probs: [1 << num_bits]u16 = @splat(0x400),
|
probs: [1 << num_bits]u16 = @splat(0x400),
|
||||||
|
|
||||||
pub fn parse(self: *@This(), reader: *Reader, decoder: *RangeDecoder) !u32 {
|
pub fn parse(self: *@This(), reader: *Reader, decoder: *RangeDecoder, n_read: *u64) !u32 {
|
||||||
return decoder.parseBitTree(reader, num_bits, &self.probs);
|
return decoder.parseBitTree(reader, num_bits, &self.probs, n_read);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parseReverse(
|
pub fn parseReverse(
|
||||||
self: *@This(),
|
self: *@This(),
|
||||||
reader: *Reader,
|
reader: *Reader,
|
||||||
decoder: *RangeDecoder,
|
decoder: *RangeDecoder,
|
||||||
|
n_read: *u64,
|
||||||
) !u32 {
|
) !u32 {
|
||||||
return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0);
|
return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, n_read);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset(self: *@This()) void {
|
pub fn reset(self: *@This()) void {
|
||||||
@ -465,13 +480,14 @@ pub const Decode = struct {
|
|||||||
reader: *Reader,
|
reader: *Reader,
|
||||||
decoder: *RangeDecoder,
|
decoder: *RangeDecoder,
|
||||||
pos_state: usize,
|
pos_state: usize,
|
||||||
|
n_read: *u64,
|
||||||
) !usize {
|
) !usize {
|
||||||
if (!try decoder.decodeBit(reader, &self.choice)) {
|
if (!try decoder.decodeBit(reader, &self.choice, n_read)) {
|
||||||
return @as(usize, try self.low_coder[pos_state].parse(reader, decoder));
|
return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, n_read));
|
||||||
} else if (!try decoder.decodeBit(reader, &self.choice2)) {
|
} else if (!try decoder.decodeBit(reader, &self.choice2, n_read)) {
|
||||||
return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder)) + 8;
|
return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, n_read)) + 8;
|
||||||
} else {
|
} else {
|
||||||
return @as(usize, try self.high_coder.parse(reader, decoder)) + 16;
|
return @as(usize, try self.high_coder.parse(reader, decoder, n_read)) + 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -701,7 +717,8 @@ pub const Decompress = struct {
|
|||||||
} else if (d.range_decoder.isFinished()) {
|
} else if (d.range_decoder.isFinished()) {
|
||||||
break :process_next;
|
break :process_next;
|
||||||
}
|
}
|
||||||
switch (d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
|
var n_read: u64 = 0;
|
||||||
|
switch (d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder, &n_read) catch |err| switch (err) {
|
||||||
error.WriteFailed => {
|
error.WriteFailed => {
|
||||||
d.err = error.OutOfMemory;
|
d.err = error.OutOfMemory;
|
||||||
return error.ReadFailed;
|
return error.ReadFailed;
|
||||||
|
|||||||
@ -116,24 +116,29 @@ pub const Decode = struct {
|
|||||||
self.* = undefined;
|
self.* = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decompress(d: *Decode, reader: *Reader, allocating: *Writer.Allocating) !void {
|
/// Returns how many compressed bytes were consumed.
|
||||||
|
pub fn decompress(d: *Decode, reader: *Reader, allocating: *Writer.Allocating) !u64 {
|
||||||
const gpa = allocating.allocator;
|
const gpa = allocating.allocator;
|
||||||
|
|
||||||
var accum = AccumBuffer.init(std.math.maxInt(usize));
|
var accum = AccumBuffer.init(std.math.maxInt(usize));
|
||||||
defer accum.deinit(gpa);
|
defer accum.deinit(gpa);
|
||||||
|
|
||||||
|
var n_read: u64 = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
const status = try reader.takeByte();
|
const status = try reader.takeByte();
|
||||||
|
n_read += 1;
|
||||||
|
|
||||||
switch (status) {
|
switch (status) {
|
||||||
0 => break,
|
0 => break,
|
||||||
1 => try parseUncompressed(reader, allocating, &accum, true),
|
1 => n_read += try parseUncompressed(reader, allocating, &accum, true),
|
||||||
2 => try parseUncompressed(reader, allocating, &accum, false),
|
2 => n_read += try parseUncompressed(reader, allocating, &accum, false),
|
||||||
else => try d.parseLzma(reader, allocating, &accum, status),
|
else => n_read += try d.parseLzma(reader, allocating, &accum, status),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try accum.finish(&allocating.writer);
|
try accum.finish(&allocating.writer);
|
||||||
|
return n_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parseLzma(
|
fn parseLzma(
|
||||||
@ -142,7 +147,7 @@ pub const Decode = struct {
|
|||||||
allocating: *Writer.Allocating,
|
allocating: *Writer.Allocating,
|
||||||
accum: *AccumBuffer,
|
accum: *AccumBuffer,
|
||||||
status: u8,
|
status: u8,
|
||||||
) !void {
|
) !u64 {
|
||||||
if (status & 0x80 == 0) return error.CorruptInput;
|
if (status & 0x80 == 0) return error.CorruptInput;
|
||||||
|
|
||||||
const Reset = struct {
|
const Reset = struct {
|
||||||
@ -175,15 +180,19 @@ pub const Decode = struct {
|
|||||||
else => unreachable,
|
else => unreachable,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
var n_read: u64 = 0;
|
||||||
|
|
||||||
const unpacked_size = blk: {
|
const unpacked_size = blk: {
|
||||||
var tmp: u64 = status & 0x1F;
|
var tmp: u64 = status & 0x1F;
|
||||||
tmp <<= 16;
|
tmp <<= 16;
|
||||||
tmp |= try reader.takeInt(u16, .big);
|
tmp |= try reader.takeInt(u16, .big);
|
||||||
|
n_read += 2;
|
||||||
break :blk tmp + 1;
|
break :blk tmp + 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
const packed_size = blk: {
|
const packed_size = blk: {
|
||||||
const tmp: u17 = try reader.takeInt(u16, .big);
|
const tmp: u17 = try reader.takeInt(u16, .big);
|
||||||
|
n_read += 2;
|
||||||
break :blk tmp + 1;
|
break :blk tmp + 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -196,6 +205,7 @@ pub const Decode = struct {
|
|||||||
|
|
||||||
if (reset.props) {
|
if (reset.props) {
|
||||||
var props = try reader.takeByte();
|
var props = try reader.takeByte();
|
||||||
|
n_read += 1;
|
||||||
if (props >= 225) {
|
if (props >= 225) {
|
||||||
return error.CorruptInput;
|
return error.CorruptInput;
|
||||||
}
|
}
|
||||||
@ -216,23 +226,21 @@ pub const Decode = struct {
|
|||||||
try ld.resetState(allocating.allocator, new_props);
|
try ld.resetState(allocating.allocator, new_props);
|
||||||
}
|
}
|
||||||
|
|
||||||
var range_decoder = try lzma.RangeDecoder.init(reader);
|
const start_count = n_read;
|
||||||
|
var range_decoder = try lzma.RangeDecoder.initCounting(reader, &n_read);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (accum.len >= unpacked_size) break;
|
if (accum.len >= unpacked_size) break;
|
||||||
if (range_decoder.isFinished()) break;
|
if (range_decoder.isFinished()) break;
|
||||||
switch (try ld.process(reader, allocating, accum, &range_decoder)) {
|
switch (try ld.process(reader, allocating, accum, &range_decoder, &n_read)) {
|
||||||
.more => continue,
|
.more => continue,
|
||||||
.finished => break,
|
.finished => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (accum.len != unpacked_size) return error.DecompressedSizeMismatch;
|
if (accum.len != unpacked_size) return error.DecompressedSizeMismatch;
|
||||||
|
if (n_read - start_count != packed_size) return error.CompressedSizeMismatch;
|
||||||
|
|
||||||
// TODO restore this error
|
return n_read;
|
||||||
//if (counter.bytes_read != packed_size) {
|
|
||||||
// return error.CorruptInput;
|
|
||||||
//}
|
|
||||||
_ = packed_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parseUncompressed(
|
fn parseUncompressed(
|
||||||
@ -240,18 +248,17 @@ pub const Decode = struct {
|
|||||||
allocating: *Writer.Allocating,
|
allocating: *Writer.Allocating,
|
||||||
accum: *AccumBuffer,
|
accum: *AccumBuffer,
|
||||||
reset_dict: bool,
|
reset_dict: bool,
|
||||||
) !void {
|
) !usize {
|
||||||
const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1;
|
const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1;
|
||||||
|
|
||||||
if (reset_dict) try accum.reset(&allocating.writer);
|
if (reset_dict) try accum.reset(&allocating.writer);
|
||||||
|
|
||||||
const gpa = allocating.allocator;
|
const gpa = allocating.allocator;
|
||||||
|
|
||||||
var i = unpacked_size;
|
for (0..unpacked_size) |_| {
|
||||||
while (i != 0) {
|
|
||||||
try accum.appendByte(gpa, try reader.takeByte());
|
try accum.appendByte(gpa, try reader.takeByte());
|
||||||
i -= 1;
|
|
||||||
}
|
}
|
||||||
|
return 2 + unpacked_size;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -268,6 +275,7 @@ test "decompress hello world stream" {
|
|||||||
var result: std.Io.Writer.Allocating = .init(gpa);
|
var result: std.Io.Writer.Allocating = .init(gpa);
|
||||||
defer result.deinit();
|
defer result.deinit();
|
||||||
|
|
||||||
try decode.decompress(&stream, &result);
|
const n_read = try decode.decompress(&stream, &result);
|
||||||
|
try std.testing.expectEqual(compressed.len, n_read);
|
||||||
try std.testing.expectEqualStrings(expected, result.written());
|
try std.testing.expectEqualStrings(expected, result.written());
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,6 +8,7 @@ const Sha256 = std.crypto.hash.sha2.Sha256;
|
|||||||
const lzma2 = std.compress.lzma2;
|
const lzma2 = std.compress.lzma2;
|
||||||
const Writer = std.Io.Writer;
|
const Writer = std.Io.Writer;
|
||||||
const Reader = std.Io.Reader;
|
const Reader = std.Io.Reader;
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
|
||||||
/// Underlying compressed data stream to pull bytes from.
|
/// Underlying compressed data stream to pull bytes from.
|
||||||
input: *Reader,
|
input: *Reader,
|
||||||
@ -28,6 +29,7 @@ pub const Error = error{
|
|||||||
Overflow,
|
Overflow,
|
||||||
InvalidRangeCode,
|
InvalidRangeCode,
|
||||||
DecompressedSizeMismatch,
|
DecompressedSizeMismatch,
|
||||||
|
CompressedSizeMismatch,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Check = enum(u4) {
|
pub const Check = enum(u4) {
|
||||||
@ -62,10 +64,10 @@ pub fn init(
|
|||||||
if (!std.mem.eql(u8, magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
|
if (!std.mem.eql(u8, magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
|
||||||
return error.NotXzStream;
|
return error.NotXzStream;
|
||||||
|
|
||||||
const actual_hash = Crc32.hash(try input.peek(@sizeOf(StreamFlags)));
|
const computed_checksum = Crc32.hash(try input.peek(@sizeOf(StreamFlags)));
|
||||||
const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable;
|
const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable;
|
||||||
const stored_hash = try input.takeInt(u32, .little);
|
const stored_hash = try input.takeInt(u32, .little);
|
||||||
if (actual_hash != stored_hash) return error.WrongChecksum;
|
if (computed_checksum != stored_hash) return error.WrongChecksum;
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.input = input,
|
.input = input,
|
||||||
@ -129,6 +131,7 @@ fn readIndirect(r: *Reader) Reader.Error!usize {
|
|||||||
r.end = allocating.writer.end;
|
r.end = allocating.writer.end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (d.err != null) return error.ReadFailed;
|
||||||
if (d.block_count == std.math.maxInt(usize)) return error.EndOfStream;
|
if (d.block_count == std.math.maxInt(usize)) return error.EndOfStream;
|
||||||
|
|
||||||
readBlock(input, &allocating) catch |err| switch (err) {
|
readBlock(input, &allocating) catch |err| switch (err) {
|
||||||
@ -137,7 +140,10 @@ fn readIndirect(r: *Reader) Reader.Error!usize {
|
|||||||
return error.ReadFailed;
|
return error.ReadFailed;
|
||||||
},
|
},
|
||||||
error.SuccessfulEndOfStream => {
|
error.SuccessfulEndOfStream => {
|
||||||
finish(d);
|
finish(d) catch |finish_err| {
|
||||||
|
d.err = finish_err;
|
||||||
|
return error.ReadFailed;
|
||||||
|
};
|
||||||
d.block_count = std.math.maxInt(usize);
|
d.block_count = std.math.maxInt(usize);
|
||||||
return error.EndOfStream;
|
return error.EndOfStream;
|
||||||
},
|
},
|
||||||
@ -184,7 +190,7 @@ fn readBlock(input: *Reader, allocating: *Writer.Allocating) !void {
|
|||||||
var packed_size: ?u64 = null;
|
var packed_size: ?u64 = null;
|
||||||
var unpacked_size: ?u64 = null;
|
var unpacked_size: ?u64 = null;
|
||||||
|
|
||||||
{
|
const header_size = h: {
|
||||||
// Read the block header via peeking so that we can hash the whole thing too.
|
// Read the block header via peeking so that we can hash the whole thing too.
|
||||||
const first_byte: usize = try input.peekByte();
|
const first_byte: usize = try input.peekByte();
|
||||||
if (first_byte == 0) return error.SuccessfulEndOfStream;
|
if (first_byte == 0) return error.SuccessfulEndOfStream;
|
||||||
@ -223,95 +229,92 @@ fn readBlock(input: *Reader, allocating: *Writer.Allocating) !void {
|
|||||||
|
|
||||||
const actual_header_size = input.seek - header_seek_start;
|
const actual_header_size = input.seek - header_seek_start;
|
||||||
if (actual_header_size > declared_header_size) return error.CorruptInput;
|
if (actual_header_size > declared_header_size) return error.CorruptInput;
|
||||||
var remaining_bytes = declared_header_size - actual_header_size;
|
const remaining_bytes = declared_header_size - actual_header_size;
|
||||||
while (remaining_bytes != 0) {
|
for (0..remaining_bytes) |_| {
|
||||||
if (try input.takeByte() != 0) return error.CorruptInput;
|
if (try input.takeByte() != 0) return error.CorruptInput;
|
||||||
remaining_bytes -= 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const header_slice = input.buffer[header_seek_start..][0..declared_header_size];
|
const header_slice = input.buffer[header_seek_start..][0..declared_header_size];
|
||||||
const actual_hash = Crc32.hash(header_slice);
|
const computed_checksum = Crc32.hash(header_slice);
|
||||||
const declared_hash = try input.takeInt(u32, .little);
|
const declared_checksum = try input.takeInt(u32, .little);
|
||||||
if (actual_hash != declared_hash) return error.WrongChecksum;
|
if (computed_checksum != declared_checksum) return error.WrongChecksum;
|
||||||
}
|
break :h declared_header_size;
|
||||||
|
};
|
||||||
|
|
||||||
// Compressed Data
|
// Compressed Data
|
||||||
|
|
||||||
var lzma2_decode = try lzma2.Decode.init(allocating.allocator);
|
var lzma2_decode = try lzma2.Decode.init(allocating.allocator);
|
||||||
|
defer lzma2_decode.deinit(allocating.allocator);
|
||||||
const before_size = allocating.writer.end;
|
const before_size = allocating.writer.end;
|
||||||
try lzma2_decode.decompress(input, allocating);
|
const packed_bytes_read = try lzma2_decode.decompress(input, allocating);
|
||||||
const unpacked_bytes = allocating.writer.end - before_size;
|
const unpacked_bytes = allocating.writer.end - before_size;
|
||||||
|
|
||||||
// TODO restore this check
|
if (packed_size) |s| {
|
||||||
//if (packed_size) |s| {
|
if (s != packed_bytes_read) return error.CorruptInput;
|
||||||
// if (s != packed_counter.bytes_read)
|
}
|
||||||
// return error.CorruptInput;
|
|
||||||
//}
|
|
||||||
|
|
||||||
if (unpacked_size) |s| {
|
if (unpacked_size) |s| {
|
||||||
if (s != unpacked_bytes) return error.CorruptInput;
|
if (s != unpacked_bytes) return error.CorruptInput;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Block Padding
|
// Block Padding
|
||||||
if (true) @panic("TODO account for block padding");
|
const block_counter = header_size + packed_bytes_read;
|
||||||
//while (block_counter.bytes_read % 4 != 0) {
|
const padding = (4 - (block_counter % 4)) % 4;
|
||||||
// if (try block_reader.takeByte() != 0)
|
for (0..padding) |_| {
|
||||||
// return error.CorruptInput;
|
if (try input.takeByte() != 0) return error.CorruptInput;
|
||||||
//}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish(d: *Decompress) void {
|
fn finish(d: *Decompress) !void {
|
||||||
_ = d;
|
const input = d.input;
|
||||||
@panic("TODO");
|
const index_size = blk: {
|
||||||
//const input = d.input;
|
// Assume that we already peeked a zero in readBlock().
|
||||||
//const index_size = blk: {
|
assert(input.buffered()[0] == 0);
|
||||||
// const record_count = try input.takeLeb128(u64);
|
var input_counter: u64 = 1;
|
||||||
// if (record_count != d.block_decode.block_count)
|
var checksum: Crc32 = .init();
|
||||||
// return error.CorruptInput;
|
checksum.update(&.{0});
|
||||||
|
input.toss(1);
|
||||||
|
|
||||||
// var i: usize = 0;
|
const record_count = try countLeb128(input, u64, &input_counter, &checksum);
|
||||||
// while (i < record_count) : (i += 1) {
|
if (record_count != d.block_count)
|
||||||
// // TODO: validate records
|
return error.CorruptInput;
|
||||||
// _ = try std.leb.readUleb128(u64, counting_reader);
|
|
||||||
// _ = try std.leb.readUleb128(u64, counting_reader);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// while (counter.bytes_read % 4 != 0) {
|
for (0..record_count) |_| {
|
||||||
// if (try counting_reader.takeByte() != 0)
|
// TODO: validate records
|
||||||
// return error.CorruptInput;
|
_ = try countLeb128(input, u64, &input_counter, &checksum);
|
||||||
// }
|
_ = try countLeb128(input, u64, &input_counter, &checksum);
|
||||||
|
}
|
||||||
// const hash_a = hasher.hasher.final();
|
|
||||||
// const hash_b = try counting_reader.takeInt(u32, .little);
|
const padding_len = (4 - (input_counter % 4)) % 4;
|
||||||
// if (hash_a != hash_b)
|
const padding = try input.take(padding_len);
|
||||||
// return error.WrongChecksum;
|
for (padding) |byte| {
|
||||||
|
if (byte != 0) return error.CorruptInput;
|
||||||
// break :blk counter.bytes_read;
|
}
|
||||||
//};
|
checksum.update(padding);
|
||||||
|
|
||||||
//const hash_a = try d.in_reader.takeInt(u32, .little);
|
const declared_checksum = try input.takeInt(u32, .little);
|
||||||
|
const computed_checksum = checksum.final();
|
||||||
//const hash_b = blk: {
|
if (computed_checksum != declared_checksum) return error.WrongChecksum;
|
||||||
// var hasher = hashedReader(d.in_reader, Crc32.init());
|
|
||||||
// const hashed_reader = hasher.reader();
|
break :blk input_counter + padding.len + 4;
|
||||||
|
};
|
||||||
// const backward_size = (@as(u64, try hashed_reader.takeInt(u32, .little)) + 1) * 4;
|
|
||||||
// if (backward_size != index_size)
|
const declared_checksum = try input.takeInt(u32, .little);
|
||||||
// return error.CorruptInput;
|
const computed_checksum = Crc32.hash(try input.peek(4 + @sizeOf(StreamFlags)));
|
||||||
|
if (declared_checksum != computed_checksum) return error.WrongChecksum;
|
||||||
// var check: Check = undefined;
|
const backward_size = (@as(u64, try input.takeInt(u32, .little)) + 1) * 4;
|
||||||
// try readStreamFlags(hashed_reader, &check);
|
if (backward_size != index_size) return error.CorruptInput;
|
||||||
|
input.toss(@sizeOf(StreamFlags));
|
||||||
// break :blk hasher.hasher.final();
|
if (!std.mem.eql(u8, try input.takeArray(2), &.{ 'Y', 'Z' }))
|
||||||
//};
|
return error.CorruptInput;
|
||||||
|
}
|
||||||
//if (hash_a != hash_b)
|
|
||||||
// return error.WrongChecksum;
|
fn countLeb128(reader: *Reader, comptime T: type, counter: *u64, hasher: *Crc32) !T {
|
||||||
|
try reader.fill(8);
|
||||||
//const magic = try d.in_reader.takeBytesNoEof(2);
|
const start = reader.seek;
|
||||||
//if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
|
const result = try reader.takeLeb128(T);
|
||||||
// return error.CorruptInput;
|
const read_slice = reader.buffer[start..reader.seek];
|
||||||
|
hasher.update(read_slice);
|
||||||
//return 0;
|
counter.* += read_slice.len;
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,33 +22,49 @@ fn testReader(data: []const u8, comptime expected: []const u8) !void {
|
|||||||
try testing.expectEqualSlices(u8, expected, result);
|
try testing.expectEqualSlices(u8, expected, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "compressed data" {
|
test "fixture good-0-empty.xz" {
|
||||||
try testReader(@embedFile("testdata/good-0-empty.xz"), "");
|
try testReader(@embedFile("testdata/good-0-empty.xz"), "");
|
||||||
|
}
|
||||||
|
|
||||||
inline for ([_][]const u8{
|
const hello_world_text =
|
||||||
"good-1-check-none.xz",
|
|
||||||
"good-1-check-crc32.xz",
|
|
||||||
"good-1-check-crc64.xz",
|
|
||||||
"good-1-check-sha256.xz",
|
|
||||||
"good-2-lzma2.xz",
|
|
||||||
"good-1-block_header-1.xz",
|
|
||||||
"good-1-block_header-2.xz",
|
|
||||||
"good-1-block_header-3.xz",
|
|
||||||
}) |filename| {
|
|
||||||
try testReader(@embedFile("testdata/" ++ filename),
|
|
||||||
\\Hello
|
\\Hello
|
||||||
\\World!
|
\\World!
|
||||||
\\
|
\\
|
||||||
);
|
;
|
||||||
|
|
||||||
|
test "fixture good-1-check-none.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-check-none.xz"), hello_world_text);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline for ([_][]const u8{
|
test "fixture good-1-check-crc32.xz" {
|
||||||
"good-1-lzma2-1.xz",
|
try testReader(@embedFile("testdata/good-1-check-crc32.xz"), hello_world_text);
|
||||||
"good-1-lzma2-2.xz",
|
}
|
||||||
"good-1-lzma2-3.xz",
|
|
||||||
"good-1-lzma2-4.xz",
|
test "fixture good-1-check-crc64.xz" {
|
||||||
}) |filename| {
|
try testReader(@embedFile("testdata/good-1-check-crc64.xz"), hello_world_text);
|
||||||
try testReader(@embedFile("testdata/" ++ filename),
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-check-sha256.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-check-sha256.xz"), hello_world_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fixture good-2-lzma2.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-2-lzma2.xz"), hello_world_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-block_header-1.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-block_header-1.xz"), hello_world_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-block_header-2.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-block_header-2.xz"), hello_world_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-block_header-3.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-block_header-3.xz"), hello_world_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
const lorem_ipsum_text =
|
||||||
\\Lorem ipsum dolor sit amet, consectetur adipisicing
|
\\Lorem ipsum dolor sit amet, consectetur adipisicing
|
||||||
\\elit, sed do eiusmod tempor incididunt ut
|
\\elit, sed do eiusmod tempor incididunt ut
|
||||||
\\labore et dolore magna aliqua. Ut enim
|
\\labore et dolore magna aliqua. Ut enim
|
||||||
@ -60,9 +76,25 @@ test "compressed data" {
|
|||||||
\\non proident, sunt in culpa qui officia
|
\\non proident, sunt in culpa qui officia
|
||||||
\\deserunt mollit anim id est laborum.
|
\\deserunt mollit anim id est laborum.
|
||||||
\\
|
\\
|
||||||
);
|
;
|
||||||
|
|
||||||
|
test "fixture good-1-lzma2-1.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-lzma2-1.xz"), lorem_ipsum_text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-lzma2-2.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-lzma2-2.xz"), lorem_ipsum_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-lzma2-3.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-lzma2-3.xz"), lorem_ipsum_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-lzma2-4.xz" {
|
||||||
|
try testReader(@embedFile("testdata/good-1-lzma2-4.xz"), lorem_ipsum_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "fixture good-1-lzma2-5.xz" {
|
||||||
try testReader(@embedFile("testdata/good-1-lzma2-5.xz"), "");
|
try testReader(@embedFile("testdata/good-1-lzma2-5.xz"), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user