Merge pull request #14518 from FnControlOption/lzma

This commit is contained in:
Andrew Kelley 2023-02-09 10:13:25 -05:00 committed by GitHub
commit d24ebf1d12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 1339 additions and 787 deletions

View File

@ -128,6 +128,8 @@ pub fn build(b: *std.Build) !void {
"compress-gettysburg.txt",
"compress-pi.txt",
"rfc1951.txt",
// exclude files from lib/std/compress/lzma/testdata
".lzma",
// exclude files from lib/std/compress/xz/testdata
".xz",
// exclude files from lib/std/tz/

View File

@ -2,8 +2,10 @@ const std = @import("std.zig");
pub const deflate = @import("compress/deflate.zig");
pub const gzip = @import("compress/gzip.zig");
pub const zlib = @import("compress/zlib.zig");
pub const lzma = @import("compress/lzma.zig");
pub const lzma2 = @import("compress/lzma2.zig");
pub const xz = @import("compress/xz.zig");
pub const zlib = @import("compress/zlib.zig");
pub fn HashedReader(
comptime ReaderType: anytype,
@ -38,6 +40,8 @@ pub fn hashedReader(
test {
_ = deflate;
_ = gzip;
_ = zlib;
_ = lzma;
_ = lzma2;
_ = xz;
_ = zlib;
}

90
lib/std/compress/lzma.zig Normal file
View File

@ -0,0 +1,90 @@
const std = @import("../std.zig");
const math = std.math;
const mem = std.mem;
const Allocator = std.mem.Allocator;
pub const decode = @import("lzma/decode.zig");
pub fn decompress(
allocator: Allocator,
reader: anytype,
) !Decompress(@TypeOf(reader)) {
return decompressWithOptions(allocator, reader, .{});
}
pub fn decompressWithOptions(
allocator: Allocator,
reader: anytype,
options: decode.Options,
) !Decompress(@TypeOf(reader)) {
const params = try decode.Params.readHeader(reader, options);
return Decompress(@TypeOf(reader)).init(allocator, reader, params, options.memlimit);
}
pub fn Decompress(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error =
ReaderType.Error ||
Allocator.Error ||
error{ CorruptInput, EndOfStream, Overflow };
pub const Reader = std.io.Reader(*Self, Error, read);
allocator: Allocator,
in_reader: ReaderType,
to_read: std.ArrayListUnmanaged(u8),
buffer: decode.lzbuffer.LzCircularBuffer,
decoder: decode.rangecoder.RangeDecoder,
state: decode.DecoderState,
pub fn init(allocator: Allocator, source: ReaderType, params: decode.Params, memlimit: ?usize) !Self {
return Self{
.allocator = allocator,
.in_reader = source,
.to_read = .{},
.buffer = decode.lzbuffer.LzCircularBuffer.init(params.dict_size, memlimit orelse math.maxInt(usize)),
.decoder = try decode.rangecoder.RangeDecoder.init(source),
.state = try decode.DecoderState.init(allocator, params.properties, params.unpacked_size),
};
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
pub fn deinit(self: *Self) void {
self.to_read.deinit(self.allocator);
self.buffer.deinit(self.allocator);
self.state.deinit(self.allocator);
self.* = undefined;
}
pub fn read(self: *Self, output: []u8) Error!usize {
const writer = self.to_read.writer(self.allocator);
while (self.to_read.items.len < output.len) {
switch (try self.state.process(self.allocator, self.in_reader, writer, &self.buffer, &self.decoder)) {
.continue_ => {},
.finished => {
try self.buffer.finish(writer);
break;
},
}
}
const input = self.to_read.items;
const n = math.min(input.len, output.len);
mem.copy(u8, output[0..n], input[0..n]);
mem.copy(u8, input, input[n..]);
self.to_read.shrinkRetainingCapacity(input.len - n);
return n;
}
};
}
test {
_ = @import("lzma/test.zig");
_ = @import("lzma/vec2d.zig");
}

View File

@ -0,0 +1,379 @@
const std = @import("../../std.zig");
const assert = std.debug.assert;
const math = std.math;
const Allocator = std.mem.Allocator;
pub const lzbuffer = @import("decode/lzbuffer.zig");
pub const rangecoder = @import("decode/rangecoder.zig");
const LzCircularBuffer = lzbuffer.LzCircularBuffer;
const BitTree = rangecoder.BitTree;
const LenDecoder = rangecoder.LenDecoder;
const RangeDecoder = rangecoder.RangeDecoder;
const Vec2D = @import("vec2d.zig").Vec2D;
pub const Options = struct {
unpacked_size: UnpackedSize = .read_from_header,
memlimit: ?usize = null,
allow_incomplete: bool = false,
};
pub const UnpackedSize = union(enum) {
read_from_header,
read_header_but_use_provided: ?u64,
use_provided: ?u64,
};
const ProcessingStatus = enum {
continue_,
finished,
};
pub const Properties = struct {
lc: u4,
lp: u3,
pb: u3,
fn validate(self: Properties) void {
assert(self.lc <= 8);
assert(self.lp <= 4);
assert(self.pb <= 4);
}
};
pub const Params = struct {
properties: Properties,
dict_size: u32,
unpacked_size: ?u64,
pub fn readHeader(reader: anytype, options: Options) !Params {
var props = try reader.readByte();
if (props >= 225) {
return error.CorruptInput;
}
const lc = @intCast(u4, props % 9);
props /= 9;
const lp = @intCast(u3, props % 5);
props /= 5;
const pb = @intCast(u3, props);
const dict_size_provided = try reader.readIntLittle(u32);
const dict_size = math.max(0x1000, dict_size_provided);
const unpacked_size = switch (options.unpacked_size) {
.read_from_header => blk: {
const unpacked_size_provided = try reader.readIntLittle(u64);
const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
break :blk if (marker_mandatory)
null
else
unpacked_size_provided;
},
.read_header_but_use_provided => |x| blk: {
_ = try reader.readIntLittle(u64);
break :blk x;
},
.use_provided => |x| x,
};
return Params{
.properties = Properties{ .lc = lc, .lp = lp, .pb = pb },
.dict_size = dict_size,
.unpacked_size = unpacked_size,
};
}
};
pub const DecoderState = struct {
lzma_props: Properties,
unpacked_size: ?u64,
literal_probs: Vec2D(u16),
pos_slot_decoder: [4]BitTree(6),
align_decoder: BitTree(4),
pos_decoders: [115]u16,
is_match: [192]u16,
is_rep: [12]u16,
is_rep_g0: [12]u16,
is_rep_g1: [12]u16,
is_rep_g2: [12]u16,
is_rep_0long: [192]u16,
state: usize,
rep: [4]usize,
len_decoder: LenDecoder,
rep_len_decoder: LenDecoder,
pub fn init(
allocator: Allocator,
lzma_props: Properties,
unpacked_size: ?u64,
) !DecoderState {
return .{
.lzma_props = lzma_props,
.unpacked_size = unpacked_size,
.literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }),
.pos_slot_decoder = .{.{}} ** 4,
.align_decoder = .{},
.pos_decoders = .{0x400} ** 115,
.is_match = .{0x400} ** 192,
.is_rep = .{0x400} ** 12,
.is_rep_g0 = .{0x400} ** 12,
.is_rep_g1 = .{0x400} ** 12,
.is_rep_g2 = .{0x400} ** 12,
.is_rep_0long = .{0x400} ** 192,
.state = 0,
.rep = .{0} ** 4,
.len_decoder = .{},
.rep_len_decoder = .{},
};
}
pub fn deinit(self: *DecoderState, allocator: Allocator) void {
self.literal_probs.deinit(allocator);
self.* = undefined;
}
pub fn resetState(self: *DecoderState, allocator: Allocator, new_props: Properties) !void {
new_props.validate();
if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
self.literal_probs.fill(0x400);
} else {
self.literal_probs.deinit(allocator);
self.literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
}
self.lzma_props = new_props;
for (self.pos_slot_decoder) |*t| t.reset();
self.align_decoder.reset();
self.pos_decoders = .{0x400} ** 115;
self.is_match = .{0x400} ** 192;
self.is_rep = .{0x400} ** 12;
self.is_rep_g0 = .{0x400} ** 12;
self.is_rep_g1 = .{0x400} ** 12;
self.is_rep_g2 = .{0x400} ** 12;
self.is_rep_0long = .{0x400} ** 192;
self.state = 0;
self.rep = .{0} ** 4;
self.len_decoder.reset();
self.rep_len_decoder.reset();
}
fn processNextInner(
self: *DecoderState,
allocator: Allocator,
reader: anytype,
writer: anytype,
buffer: anytype,
decoder: *RangeDecoder,
update: bool,
) !ProcessingStatus {
const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1);
if (!try decoder.decodeBit(
reader,
&self.is_match[(self.state << 4) + pos_state],
update,
)) {
const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update);
if (update) {
try buffer.appendLiteral(allocator, byte, writer);
self.state = if (self.state < 4)
0
else if (self.state < 10)
self.state - 3
else
self.state - 6;
}
return .continue_;
}
var len: usize = undefined;
if (try decoder.decodeBit(reader, &self.is_rep[self.state], update)) {
if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], update)) {
if (!try decoder.decodeBit(
reader,
&self.is_rep_0long[(self.state << 4) + pos_state],
update,
)) {
if (update) {
self.state = if (self.state < 7) 9 else 11;
const dist = self.rep[0] + 1;
try buffer.appendLz(allocator, 1, dist, writer);
}
return .continue_;
}
} else {
const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update))
1
else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], update))
2
else
3;
if (update) {
const dist = self.rep[idx];
var i = idx;
while (i > 0) : (i -= 1) {
self.rep[i] = self.rep[i - 1];
}
self.rep[0] = dist;
}
}
len = try self.rep_len_decoder.decode(reader, decoder, pos_state, update);
if (update) {
self.state = if (self.state < 7) 8 else 11;
}
} else {
if (update) {
self.rep[3] = self.rep[2];
self.rep[2] = self.rep[1];
self.rep[1] = self.rep[0];
}
len = try self.len_decoder.decode(reader, decoder, pos_state, update);
if (update) {
self.state = if (self.state < 7) 7 else 10;
}
const rep_0 = try self.decodeDistance(reader, decoder, len, update);
if (update) {
self.rep[0] = rep_0;
if (self.rep[0] == 0xFFFF_FFFF) {
if (decoder.isFinished()) {
return .finished;
}
return error.CorruptInput;
}
}
}
if (update) {
len += 2;
const dist = self.rep[0] + 1;
try buffer.appendLz(allocator, len, dist, writer);
}
return .continue_;
}
fn processNext(
self: *DecoderState,
allocator: Allocator,
reader: anytype,
writer: anytype,
buffer: anytype,
decoder: *RangeDecoder,
) !ProcessingStatus {
return self.processNextInner(allocator, reader, writer, buffer, decoder, true);
}
pub fn process(
self: *DecoderState,
allocator: Allocator,
reader: anytype,
writer: anytype,
buffer: anytype,
decoder: *RangeDecoder,
) !ProcessingStatus {
process_next: {
if (self.unpacked_size) |unpacked_size| {
if (buffer.len >= unpacked_size) {
break :process_next;
}
} else if (decoder.isFinished()) {
break :process_next;
}
switch (try self.processNext(allocator, reader, writer, buffer, decoder)) {
.continue_ => return .continue_,
.finished => break :process_next,
}
}
if (self.unpacked_size) |unpacked_size| {
if (buffer.len != unpacked_size) {
return error.CorruptInput;
}
}
return .finished;
}
fn decodeLiteral(
self: *DecoderState,
reader: anytype,
buffer: anytype,
decoder: *RangeDecoder,
update: bool,
) !u8 {
const def_prev_byte = 0;
const prev_byte = @as(usize, buffer.lastOr(def_prev_byte));
var result: usize = 1;
const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
(prev_byte >> (8 - self.lzma_props.lc));
const probs = try self.literal_probs.getMut(lit_state);
if (self.state >= 7) {
var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1));
while (result < 0x100) {
const match_bit = (match_byte >> 7) & 1;
match_byte <<= 1;
const bit = @boolToInt(try decoder.decodeBit(
reader,
&probs[((@as(usize, 1) + match_bit) << 8) + result],
update,
));
result = (result << 1) ^ bit;
if (match_bit != bit) {
break;
}
}
}
while (result < 0x100) {
result = (result << 1) ^ @boolToInt(try decoder.decodeBit(reader, &probs[result], update));
}
return @truncate(u8, result - 0x100);
}
fn decodeDistance(
self: *DecoderState,
reader: anytype,
decoder: *RangeDecoder,
length: usize,
update: bool,
) !usize {
const len_state = if (length > 3) 3 else length;
const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder, update));
if (pos_slot < 4)
return pos_slot;
const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
if (pos_slot < 14) {
result += try decoder.parseReverseBitTree(
reader,
num_direct_bits,
&self.pos_decoders,
result - pos_slot,
update,
);
} else {
result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4;
result += try self.align_decoder.parseReverse(reader, decoder, update);
}
return result;
}
};

View File

@ -0,0 +1,228 @@
const std = @import("../../../std.zig");
const math = std.math;
const mem = std.mem;
const Allocator = std.mem.Allocator;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
/// An accumulating buffer for LZ sequences
pub const LzAccumBuffer = struct {
/// Buffer
buf: ArrayListUnmanaged(u8),
/// Buffer memory limit
memlimit: usize,
/// Total number of bytes sent through the buffer
len: usize,
const Self = @This();
pub fn init(memlimit: usize) Self {
return Self{
.buf = .{},
.memlimit = memlimit,
.len = 0,
};
}
pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void {
try self.buf.append(allocator, byte);
self.len += 1;
}
/// Reset the internal dictionary
pub fn reset(self: *Self, writer: anytype) !void {
try writer.writeAll(self.buf.items);
self.buf.clearRetainingCapacity();
self.len = 0;
}
/// Retrieve the last byte or return a default
pub fn lastOr(self: Self, lit: u8) u8 {
const buf_len = self.buf.items.len;
return if (buf_len == 0)
lit
else
self.buf.items[buf_len - 1];
}
/// Retrieve the n-th last byte
pub fn lastN(self: Self, dist: usize) !u8 {
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
}
return self.buf.items[buf_len - dist];
}
/// Append a literal
pub fn appendLiteral(
self: *Self,
allocator: Allocator,
lit: u8,
writer: anytype,
) !void {
_ = writer;
if (self.len >= self.memlimit) {
return error.CorruptInput;
}
try self.buf.append(allocator, lit);
self.len += 1;
}
/// Fetch an LZ sequence (length, distance) from inside the buffer
pub fn appendLz(
self: *Self,
allocator: Allocator,
len: usize,
dist: usize,
writer: anytype,
) !void {
_ = writer;
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
}
var offset = buf_len - dist;
var i: usize = 0;
while (i < len) : (i += 1) {
const x = self.buf.items[offset];
try self.buf.append(allocator, x);
offset += 1;
}
self.len += len;
}
pub fn finish(self: *Self, writer: anytype) !void {
try writer.writeAll(self.buf.items);
self.buf.clearRetainingCapacity();
}
pub fn deinit(self: *Self, allocator: Allocator) void {
self.buf.deinit(allocator);
self.* = undefined;
}
};
/// A circular buffer for LZ sequences
pub const LzCircularBuffer = struct {
/// Circular buffer
buf: ArrayListUnmanaged(u8),
/// Length of the buffer
dict_size: usize,
/// Buffer memory limit
memlimit: usize,
/// Current position
cursor: usize,
/// Total number of bytes sent through the buffer
len: usize,
const Self = @This();
pub fn init(dict_size: usize, memlimit: usize) Self {
return Self{
.buf = .{},
.dict_size = dict_size,
.memlimit = memlimit,
.cursor = 0,
.len = 0,
};
}
pub fn get(self: Self, index: usize) u8 {
return if (0 <= index and index < self.buf.items.len)
self.buf.items[index]
else
0;
}
pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void {
if (index >= self.memlimit) {
return error.CorruptInput;
}
try self.buf.ensureTotalCapacity(allocator, index + 1);
while (self.buf.items.len < index) {
self.buf.appendAssumeCapacity(0);
}
self.buf.appendAssumeCapacity(value);
}
/// Retrieve the last byte or return a default
pub fn lastOr(self: Self, lit: u8) u8 {
return if (self.len == 0)
lit
else
self.get((self.dict_size + self.cursor - 1) % self.dict_size);
}
/// Retrieve the n-th last byte
pub fn lastN(self: Self, dist: usize) !u8 {
if (dist > self.dict_size or dist > self.len) {
return error.CorruptInput;
}
const offset = (self.dict_size + self.cursor - dist) % self.dict_size;
return self.get(offset);
}
/// Append a literal
pub fn appendLiteral(
self: *Self,
allocator: Allocator,
lit: u8,
writer: anytype,
) !void {
try self.set(allocator, self.cursor, lit);
self.cursor += 1;
self.len += 1;
// Flush the circular buffer to the output
if (self.cursor == self.dict_size) {
try writer.writeAll(self.buf.items);
self.cursor = 0;
}
}
/// Fetch an LZ sequence (length, distance) from inside the buffer
pub fn appendLz(
self: *Self,
allocator: Allocator,
len: usize,
dist: usize,
writer: anytype,
) !void {
if (dist > self.dict_size or dist > self.len) {
return error.CorruptInput;
}
var offset = (self.dict_size + self.cursor - dist) % self.dict_size;
var i: usize = 0;
while (i < len) : (i += 1) {
const x = self.get(offset);
try self.appendLiteral(allocator, x, writer);
offset += 1;
if (offset == self.dict_size) {
offset = 0;
}
}
}
pub fn finish(self: *Self, writer: anytype) !void {
if (self.cursor > 0) {
try writer.writeAll(self.buf.items[0..self.cursor]);
self.cursor = 0;
}
}
pub fn deinit(self: *Self, allocator: Allocator) void {
self.buf.deinit(allocator);
self.* = undefined;
}
};

View File

@ -0,0 +1,181 @@
const std = @import("../../../std.zig");
const mem = std.mem;
pub const RangeDecoder = struct {
range: u32,
code: u32,
pub fn init(reader: anytype) !RangeDecoder {
const reserved = try reader.readByte();
if (reserved != 0) {
return error.CorruptInput;
}
return RangeDecoder{
.range = 0xFFFF_FFFF,
.code = try reader.readIntBig(u32),
};
}
pub fn fromParts(
range: u32,
code: u32,
) RangeDecoder {
return .{
.range = range,
.code = code,
};
}
pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
self.range = range;
self.code = code;
}
pub inline fn isFinished(self: RangeDecoder) bool {
return self.code == 0;
}
inline fn normalize(self: *RangeDecoder, reader: anytype) !void {
if (self.range < 0x0100_0000) {
self.range <<= 8;
self.code = (self.code << 8) ^ @as(u32, try reader.readByte());
}
}
inline fn getBit(self: *RangeDecoder, reader: anytype) !bool {
self.range >>= 1;
const bit = self.code >= self.range;
if (bit)
self.code -= self.range;
try self.normalize(reader);
return bit;
}
pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 {
var result: u32 = 0;
var i: usize = 0;
while (i < count) : (i += 1)
result = (result << 1) ^ @boolToInt(try self.getBit(reader));
return result;
}
pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool {
const bound = (self.range >> 11) * prob.*;
if (self.code < bound) {
if (update)
prob.* += (0x800 - prob.*) >> 5;
self.range = bound;
try self.normalize(reader);
return false;
} else {
if (update)
prob.* -= prob.* >> 5;
self.code -= bound;
self.range -= bound;
try self.normalize(reader);
return true;
}
}
fn parseBitTree(
self: *RangeDecoder,
reader: anytype,
num_bits: u5,
probs: []u16,
update: bool,
) !u32 {
var tmp: u32 = 1;
var i: @TypeOf(num_bits) = 0;
while (i < num_bits) : (i += 1) {
const bit = try self.decodeBit(reader, &probs[tmp], update);
tmp = (tmp << 1) ^ @boolToInt(bit);
}
return tmp - (@as(u32, 1) << num_bits);
}
pub fn parseReverseBitTree(
self: *RangeDecoder,
reader: anytype,
num_bits: u5,
probs: []u16,
offset: usize,
update: bool,
) !u32 {
var result: u32 = 0;
var tmp: usize = 1;
var i: @TypeOf(num_bits) = 0;
while (i < num_bits) : (i += 1) {
const bit = @boolToInt(try self.decodeBit(reader, &probs[offset + tmp], update));
tmp = (tmp << 1) ^ bit;
result ^= @as(u32, bit) << i;
}
return result;
}
};
pub fn BitTree(comptime num_bits: usize) type {
return struct {
probs: [1 << num_bits]u16 = .{0x400} ** (1 << num_bits),
const Self = @This();
pub fn parse(
self: *Self,
reader: anytype,
decoder: *RangeDecoder,
update: bool,
) !u32 {
return decoder.parseBitTree(reader, num_bits, &self.probs, update);
}
pub fn parseReverse(
self: *Self,
reader: anytype,
decoder: *RangeDecoder,
update: bool,
) !u32 {
return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update);
}
pub fn reset(self: *Self) void {
mem.set(u16, &self.probs, 0x400);
}
};
}
pub const LenDecoder = struct {
choice: u16 = 0x400,
choice2: u16 = 0x400,
low_coder: [16]BitTree(3) = .{.{}} ** 16,
mid_coder: [16]BitTree(3) = .{.{}} ** 16,
high_coder: BitTree(8) = .{},
pub fn decode(
self: *LenDecoder,
reader: anytype,
decoder: *RangeDecoder,
pos_state: usize,
update: bool,
) !usize {
if (!try decoder.decodeBit(reader, &self.choice, update)) {
return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update));
} else if (!try decoder.decodeBit(reader, &self.choice2, update)) {
return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8;
} else {
return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16;
}
}
pub fn reset(self: *LenDecoder) void {
self.choice = 0x400;
self.choice2 = 0x400;
for (self.low_coder) |*t| t.reset();
for (self.mid_coder) |*t| t.reset();
self.high_coder.reset();
}
};

View File

@ -0,0 +1,89 @@
const std = @import("../../std.zig");
const lzma = @import("../lzma.zig");
fn testDecompress(compressed: []const u8) ![]u8 {
const allocator = std.testing.allocator;
var stream = std.io.fixedBufferStream(compressed);
var decompressor = try lzma.decompress(allocator, stream.reader());
defer decompressor.deinit();
const reader = decompressor.reader();
return reader.readAllAlloc(allocator, std.math.maxInt(usize));
}
fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void {
const allocator = std.testing.allocator;
const decomp = try testDecompress(compressed);
defer allocator.free(decomp);
try std.testing.expectEqualSlices(u8, expected, decomp);
}
fn testDecompressError(expected: anyerror, compressed: []const u8) !void {
return std.testing.expectError(expected, testDecompress(compressed));
}
test "LZMA: decompress empty world" {
try testDecompressEqual(
"",
&[_]u8{
0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x83, 0xff,
0xfb, 0xff, 0xff, 0xc0, 0x00, 0x00, 0x00,
},
);
}
test "LZMA: decompress hello world" {
try testDecompressEqual(
"Hello world\n",
&[_]u8{
0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19,
0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe,
0xa5, 0xb0, 0x00,
},
);
}
test "LZMA: decompress huge dict" {
try testDecompressEqual(
"Hello world\n",
&[_]u8{
0x5d, 0x7f, 0x7f, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19,
0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe,
0xa5, 0xb0, 0x00,
},
);
}
test "LZMA: unknown size with end of payload marker" {
try testDecompressEqual(
"Hello\nWorld!\n",
@embedFile("testdata/good-unknown_size-with_eopm.lzma"),
);
}
test "LZMA: known size without end of payload marker" {
try testDecompressEqual(
"Hello\nWorld!\n",
@embedFile("testdata/good-known_size-without_eopm.lzma"),
);
}
test "LZMA: known size with end of payload marker" {
try testDecompressEqual(
"Hello\nWorld!\n",
@embedFile("testdata/good-known_size-with_eopm.lzma"),
);
}
test "LZMA: too big uncompressed size in header" {
try testDecompressError(
error.CorruptInput,
@embedFile("testdata/bad-too_big_size-with_eopm.lzma"),
);
}
test "LZMA: too small uncompressed size in header" {
try testDecompressError(
error.CorruptInput,
@embedFile("testdata/bad-too_small_size-without_eopm-3.lzma"),
);
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,128 @@
const std = @import("../../std.zig");
const math = std.math;
const mem = std.mem;
const Allocator = std.mem.Allocator;
pub fn Vec2D(comptime T: type) type {
return struct {
data: []T,
cols: usize,
const Self = @This();
pub fn init(allocator: Allocator, value: T, size: struct { usize, usize }) !Self {
const len = try math.mul(usize, size[0], size[1]);
const data = try allocator.alloc(T, len);
mem.set(T, data, value);
return Self{
.data = data,
.cols = size[1],
};
}
pub fn deinit(self: *Self, allocator: Allocator) void {
allocator.free(self.data);
self.* = undefined;
}
pub fn fill(self: *Self, value: T) void {
mem.set(T, self.data, value);
}
inline fn _get(self: Self, row: usize) ![]T {
const start_row = try math.mul(usize, row, self.cols);
const end_row = try math.add(usize, start_row, self.cols);
return self.data[start_row..end_row];
}
pub fn get(self: Self, row: usize) ![]const T {
return self._get(row);
}
pub fn getMut(self: *Self, row: usize) ![]T {
return self._get(row);
}
};
}
const testing = std.testing;
const expectEqualSlices = std.testing.expectEqualSlices;
const expectError = std.testing.expectError;
test "Vec2D.init" {
const allocator = testing.allocator;
var vec2d = try Vec2D(i32).init(allocator, 1, .{ 2, 3 });
defer vec2d.deinit(allocator);
try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(0));
try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(1));
}
test "Vec2D.init overflow" {
const allocator = testing.allocator;
try expectError(
error.Overflow,
Vec2D(i32).init(allocator, 1, .{ math.maxInt(usize), math.maxInt(usize) }),
);
}
test "Vec2D.fill" {
const allocator = testing.allocator;
var vec2d = try Vec2D(i32).init(allocator, 0, .{ 2, 3 });
defer vec2d.deinit(allocator);
vec2d.fill(7);
try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(0));
try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(1));
}
test "Vec2D.get" {
var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 };
const vec2d = Vec2D(i32){
.data = &data,
.cols = 2,
};
try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0));
try expectEqualSlices(i32, &.{ 2, 3 }, try vec2d.get(1));
try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2));
try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3));
}
test "Vec2D.getMut" {
var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 };
var vec2d = Vec2D(i32){
.data = &data,
.cols = 2,
};
const row = try vec2d.getMut(1);
row[1] = 9;
try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0));
// (1, 1) should be 9.
try expectEqualSlices(i32, &.{ 2, 9 }, try vec2d.get(1));
try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2));
try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3));
}
test "Vec2D.get multiplication overflow" {
const allocator = testing.allocator;
var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 4 });
defer matrix.deinit(allocator);
const row = (math.maxInt(usize) / 4) + 1;
try expectError(error.Overflow, matrix.get(row));
try expectError(error.Overflow, matrix.getMut(row));
}
test "Vec2D.get addition overflow" {
const allocator = testing.allocator;
var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 5 });
defer matrix.deinit(allocator);
const row = math.maxInt(usize) / 5;
try expectError(error.Overflow, matrix.get(row));
try expectError(error.Overflow, matrix.getMut(row));
}

View File

@ -0,0 +1,26 @@
const std = @import("../std.zig");
const Allocator = std.mem.Allocator;
pub const decode = @import("lzma2/decode.zig");
pub fn decompress(
allocator: Allocator,
reader: anytype,
writer: anytype,
) !void {
var decoder = try decode.Decoder.init(allocator);
defer decoder.deinit(allocator);
return decoder.decompress(allocator, reader, writer);
}
test {
const expected = "Hello\nWorld!\n";
const compressed = &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 };
const allocator = std.testing.allocator;
var decomp = std.ArrayList(u8).init(allocator);
defer decomp.deinit();
var stream = std.io.fixedBufferStream(compressed);
try decompress(allocator, stream.reader(), decomp.writer());
try std.testing.expectEqualSlices(u8, expected, decomp.items);
}

View File

@ -0,0 +1,169 @@
const std = @import("../../std.zig");
const Allocator = std.mem.Allocator;
const lzma = @import("../lzma.zig");
const DecoderState = lzma.decode.DecoderState;
const LzAccumBuffer = lzma.decode.lzbuffer.LzAccumBuffer;
const Properties = lzma.decode.Properties;
const RangeDecoder = lzma.decode.rangecoder.RangeDecoder;
pub const Decoder = struct {
lzma_state: DecoderState,
pub fn init(allocator: Allocator) !Decoder {
return Decoder{
.lzma_state = try DecoderState.init(
allocator,
Properties{
.lc = 0,
.lp = 0,
.pb = 0,
},
null,
),
};
}
pub fn deinit(self: *Decoder, allocator: Allocator) void {
self.lzma_state.deinit(allocator);
self.* = undefined;
}
pub fn decompress(
self: *Decoder,
allocator: Allocator,
reader: anytype,
writer: anytype,
) !void {
var accum = LzAccumBuffer.init(std.math.maxInt(usize));
defer accum.deinit(allocator);
while (true) {
const status = try reader.readByte();
switch (status) {
0 => break,
1 => try parseUncompressed(allocator, reader, writer, &accum, true),
2 => try parseUncompressed(allocator, reader, writer, &accum, false),
else => try self.parseLzma(allocator, reader, writer, &accum, status),
}
}
try accum.finish(writer);
}
fn parseLzma(
self: *Decoder,
allocator: Allocator,
reader: anytype,
writer: anytype,
accum: *LzAccumBuffer,
status: u8,
) !void {
if (status & 0x80 == 0) {
return error.CorruptInput;
}
const Reset = struct {
dict: bool,
state: bool,
props: bool,
};
const reset = switch ((status >> 5) & 0x3) {
0 => Reset{
.dict = false,
.state = false,
.props = false,
},
1 => Reset{
.dict = false,
.state = true,
.props = false,
},
2 => Reset{
.dict = false,
.state = true,
.props = true,
},
3 => Reset{
.dict = true,
.state = true,
.props = true,
},
else => unreachable,
};
const unpacked_size = blk: {
var tmp: u64 = status & 0x1F;
tmp <<= 16;
tmp |= try reader.readIntBig(u16);
break :blk tmp + 1;
};
const packed_size = blk: {
const tmp: u17 = try reader.readIntBig(u16);
break :blk tmp + 1;
};
if (reset.dict) {
try accum.reset(writer);
}
if (reset.state) {
var new_props = self.lzma_state.lzma_props;
if (reset.props) {
var props = try reader.readByte();
if (props >= 225) {
return error.CorruptInput;
}
const lc = @intCast(u4, props % 9);
props /= 9;
const lp = @intCast(u3, props % 5);
props /= 5;
const pb = @intCast(u3, props);
if (lc + lp > 4) {
return error.CorruptInput;
}
new_props = Properties{ .lc = lc, .lp = lp, .pb = pb };
}
try self.lzma_state.resetState(allocator, new_props);
}
self.lzma_state.unpacked_size = unpacked_size + accum.len;
var counter = std.io.countingReader(reader);
const counter_reader = counter.reader();
var rangecoder = try RangeDecoder.init(counter_reader);
while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
if (counter.bytes_read != packed_size) {
return error.CorruptInput;
}
}
fn parseUncompressed(
allocator: Allocator,
reader: anytype,
writer: anytype,
accum: *LzAccumBuffer,
reset_dict: bool,
) !void {
const unpacked_size = @as(u17, try reader.readIntBig(u16)) + 1;
if (reset_dict) {
try accum.reset(writer);
}
var i: @TypeOf(unpacked_size) = 0;
while (i < unpacked_size) : (i += 1) {
try accum.appendByte(allocator, try reader.readByte());
}
}
};

View File

@ -118,7 +118,7 @@ pub fn Decompress(comptime ReaderType: type) type {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
const hashed_reader = hasher.reader();
const backward_size = (try hashed_reader.readIntLittle(u32) + 1) * 4;
const backward_size = (@as(u64, try hashed_reader.readIntLittle(u32)) + 1) * 4;
if (backward_size != index_size)
return error.CorruptInput;

View File

@ -1,6 +1,7 @@
const std = @import("../../std.zig");
const lzma = @import("lzma.zig");
const lzma2 = std.compress.lzma2;
const Allocator = std.mem.Allocator;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const Crc32 = std.hash.Crc32;
const Crc64 = std.hash.crc.Crc64Xz;
const Sha256 = std.crypto.hash.sha2.Sha256;
@ -32,8 +33,7 @@ pub fn Decoder(comptime ReaderType: type) type {
inner_reader: ReaderType,
check: xz.Check,
err: ?Error,
accum: lzma.LzAccumBuffer,
lzma_state: lzma.DecoderState,
to_read: ArrayListUnmanaged(u8),
block_count: usize,
fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self {
@ -42,15 +42,13 @@ pub fn Decoder(comptime ReaderType: type) type {
.inner_reader = in_reader,
.check = check,
.err = null,
.accum = .{},
.lzma_state = try lzma.DecoderState.init(allocator),
.to_read = .{},
.block_count = 0,
};
}
pub fn deinit(self: *Self) void {
self.accum.deinit(self.allocator);
self.lzma_state.deinit(self.allocator);
self.to_read.deinit(self.allocator);
}
pub fn reader(self: *Self) Reader {
@ -59,9 +57,13 @@ pub fn Decoder(comptime ReaderType: type) type {
pub fn read(self: *Self, output: []u8) Error!usize {
while (true) {
if (self.accum.to_read.items.len > 0) {
const n = self.accum.read(output);
if (self.accum.to_read.items.len == 0 and self.err != null) {
if (self.to_read.items.len > 0) {
const input = self.to_read.items;
const n = std.math.min(input.len, output.len);
std.mem.copy(u8, output[0..n], input[0..n]);
std.mem.copy(u8, input, input[n..]);
self.to_read.shrinkRetainingCapacity(input.len - n);
if (self.to_read.items.len == 0 and self.err != null) {
if (self.err.? == DecodeError.EndOfStreamWithNoError) {
return n;
}
@ -77,15 +79,12 @@ pub fn Decoder(comptime ReaderType: type) type {
}
self.readBlock() catch |e| {
self.err = e;
if (self.accum.to_read.items.len == 0) {
try self.accum.reset(self.allocator);
}
};
}
}
fn readBlock(self: *Self) Error!void {
const unpacked_pos = self.accum.to_read.items.len;
const unpacked_pos = self.to_read.items.len;
var block_counter = std.io.countingReader(self.inner_reader);
const block_reader = block_counter.reader();
@ -98,7 +97,7 @@ pub fn Decoder(comptime ReaderType: type) type {
var header_hasher = std.compress.hashedReader(block_reader, Crc32.init());
const header_reader = header_hasher.reader();
const header_size = try header_reader.readByte() * 4;
const header_size = @as(u64, try header_reader.readByte()) * 4;
if (header_size == 0)
return error.EndOfStreamWithNoError;
@ -156,15 +155,18 @@ pub fn Decoder(comptime ReaderType: type) type {
// Compressed Data
var packed_counter = std.io.countingReader(block_reader);
const packed_reader = packed_counter.reader();
while (try self.readLzma2Chunk(packed_reader)) {}
try lzma2.decompress(
self.allocator,
packed_counter.reader(),
self.to_read.writer(self.allocator),
);
if (packed_size) |s| {
if (s != packed_counter.bytes_read)
return error.CorruptInput;
}
const unpacked_bytes = self.accum.to_read.items[unpacked_pos..];
const unpacked_bytes = self.to_read.items[unpacked_pos..];
if (unpacked_size) |s| {
if (s != unpacked_bytes.len)
return error.CorruptInput;
@ -205,113 +207,5 @@ pub fn Decoder(comptime ReaderType: type) type {
self.block_count += 1;
}
fn readLzma2Chunk(self: *Self, packed_reader: anytype) Error!bool {
const status = try packed_reader.readByte();
switch (status) {
0 => {
try self.accum.reset(self.allocator);
return false;
},
1, 2 => {
if (status == 1)
try self.accum.reset(self.allocator);
const size = try packed_reader.readIntBig(u16) + 1;
try self.accum.ensureUnusedCapacity(self.allocator, size);
var i: usize = 0;
while (i < size) : (i += 1)
self.accum.appendAssumeCapacity(try packed_reader.readByte());
return true;
},
else => {
if (status & 0x80 == 0)
return error.CorruptInput;
const Reset = struct {
dict: bool,
state: bool,
props: bool,
};
const reset = switch ((status >> 5) & 0x3) {
0 => Reset{
.dict = false,
.state = false,
.props = false,
},
1 => Reset{
.dict = false,
.state = true,
.props = false,
},
2 => Reset{
.dict = false,
.state = true,
.props = true,
},
3 => Reset{
.dict = true,
.state = true,
.props = true,
},
else => unreachable,
};
const unpacked_size = blk: {
var tmp: u64 = status & 0x1F;
tmp <<= 16;
tmp |= try packed_reader.readIntBig(u16);
break :blk tmp + 1;
};
const packed_size = blk: {
const tmp: u17 = try packed_reader.readIntBig(u16);
break :blk tmp + 1;
};
if (reset.dict)
try self.accum.reset(self.allocator);
if (reset.state) {
var new_props = self.lzma_state.lzma_props;
if (reset.props) {
var props = try packed_reader.readByte();
if (props >= 225)
return error.CorruptInput;
const lc = @intCast(u4, props % 9);
props /= 9;
const lp = @intCast(u3, props % 5);
props /= 5;
const pb = @intCast(u3, props);
if (lc + lp > 4)
return error.CorruptInput;
new_props = .{ .lc = lc, .lp = lp, .pb = pb };
}
try self.lzma_state.reset_state(self.allocator, new_props);
}
self.lzma_state.unpacked_size = unpacked_size + self.accum.len();
const buffer = try self.allocator.alloc(u8, packed_size);
defer self.allocator.free(buffer);
for (buffer) |*b|
b.* = try packed_reader.readByte();
var rangecoder = try lzma.RangeDecoder.init(buffer);
try self.lzma_state.process(self.allocator, &self.accum, &rangecoder);
return true;
},
}
}
};
}

View File

@ -1,658 +0,0 @@
// Ported from https://github.com/gendx/lzma-rs
const std = @import("../../std.zig");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const LzmaProperties = struct {
lc: u4,
lp: u3,
pb: u3,
fn validate(self: LzmaProperties) void {
assert(self.lc <= 8);
assert(self.lp <= 4);
assert(self.pb <= 4);
}
};
pub const DecoderState = struct {
lzma_props: LzmaProperties,
unpacked_size: ?u64,
literal_probs: Vec2D(u16),
pos_slot_decoder: [4]BitTree,
align_decoder: BitTree,
pos_decoders: [115]u16,
is_match: [192]u16,
is_rep: [12]u16,
is_rep_g0: [12]u16,
is_rep_g1: [12]u16,
is_rep_g2: [12]u16,
is_rep_0long: [192]u16,
state: usize,
rep: [4]usize,
len_decoder: LenDecoder,
rep_len_decoder: LenDecoder,
pub fn init(allocator: Allocator) !DecoderState {
return .{
.lzma_props = LzmaProperties{ .lc = 0, .lp = 0, .pb = 0 },
.unpacked_size = null,
.literal_probs = try Vec2D(u16).init(allocator, 0x400, 1, 0x300),
.pos_slot_decoder = .{
try BitTree.init(allocator, 6),
try BitTree.init(allocator, 6),
try BitTree.init(allocator, 6),
try BitTree.init(allocator, 6),
},
.align_decoder = try BitTree.init(allocator, 4),
.pos_decoders = .{0x400} ** 115,
.is_match = .{0x400} ** 192,
.is_rep = .{0x400} ** 12,
.is_rep_g0 = .{0x400} ** 12,
.is_rep_g1 = .{0x400} ** 12,
.is_rep_g2 = .{0x400} ** 12,
.is_rep_0long = .{0x400} ** 192,
.state = 0,
.rep = .{0} ** 4,
.len_decoder = try LenDecoder.init(allocator),
.rep_len_decoder = try LenDecoder.init(allocator),
};
}
pub fn deinit(self: *DecoderState, allocator: Allocator) void {
self.literal_probs.deinit(allocator);
for (self.pos_slot_decoder) |*t| t.deinit(allocator);
self.align_decoder.deinit(allocator);
self.len_decoder.deinit(allocator);
self.rep_len_decoder.deinit(allocator);
}
pub fn reset_state(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void {
new_props.validate();
if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
self.literal_probs.fill(0x400);
} else {
self.literal_probs.deinit(allocator);
self.literal_probs = try Vec2D(u16).init(allocator, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
}
self.lzma_props = new_props;
for (self.pos_slot_decoder) |*t| t.reset();
self.align_decoder.reset();
self.pos_decoders = .{0x400} ** 115;
self.is_match = .{0x400} ** 192;
self.is_rep = .{0x400} ** 12;
self.is_rep_g0 = .{0x400} ** 12;
self.is_rep_g1 = .{0x400} ** 12;
self.is_rep_g2 = .{0x400} ** 12;
self.is_rep_0long = .{0x400} ** 192;
self.state = 0;
self.rep = .{0} ** 4;
self.len_decoder.reset();
self.rep_len_decoder.reset();
}
fn processNextInner(
self: *DecoderState,
allocator: Allocator,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
update: bool,
) !ProcessingStatus {
const pos_state = output.len() & ((@as(usize, 1) << self.lzma_props.pb) - 1);
if (!try rangecoder.decodeBit(
&self.is_match[(self.state << 4) + pos_state],
update,
)) {
const byte: u8 = try self.decodeLiteral(output, rangecoder, update);
if (update) {
try output.appendLiteral(allocator, byte);
self.state = if (self.state < 4)
0
else if (self.state < 10)
self.state - 3
else
self.state - 6;
}
return .continue_;
}
var len: usize = undefined;
if (try rangecoder.decodeBit(&self.is_rep[self.state], update)) {
if (!try rangecoder.decodeBit(&self.is_rep_g0[self.state], update)) {
if (!try rangecoder.decodeBit(
&self.is_rep_0long[(self.state << 4) + pos_state],
update,
)) {
if (update) {
self.state = if (self.state < 7) 9 else 11;
const dist = self.rep[0] + 1;
try output.appendLz(allocator, 1, dist);
}
return .continue_;
}
} else {
const idx: usize = if (!try rangecoder.decodeBit(&self.is_rep_g1[self.state], update))
1
else if (!try rangecoder.decodeBit(&self.is_rep_g2[self.state], update))
2
else
3;
if (update) {
const dist = self.rep[idx];
var i = idx;
while (i > 0) : (i -= 1) {
self.rep[i] = self.rep[i - 1];
}
self.rep[0] = dist;
}
}
len = try self.rep_len_decoder.decode(rangecoder, pos_state, update);
if (update) {
self.state = if (self.state < 7) 8 else 11;
}
} else {
if (update) {
self.rep[3] = self.rep[2];
self.rep[2] = self.rep[1];
self.rep[1] = self.rep[0];
}
len = try self.len_decoder.decode(rangecoder, pos_state, update);
if (update) {
self.state = if (self.state < 7) 7 else 10;
}
const rep_0 = try self.decodeDistance(rangecoder, len, update);
if (update) {
self.rep[0] = rep_0;
if (self.rep[0] == 0xFFFF_FFFF) {
if (rangecoder.isFinished()) {
return .finished;
}
return error.CorruptInput;
}
}
}
if (update) {
len += 2;
const dist = self.rep[0] + 1;
try output.appendLz(allocator, len, dist);
}
return .continue_;
}
fn processNext(
self: *DecoderState,
allocator: Allocator,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
) !ProcessingStatus {
return self.processNextInner(allocator, output, rangecoder, true);
}
pub fn process(
self: *DecoderState,
allocator: Allocator,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
) !void {
while (true) {
if (self.unpacked_size) |unpacked_size| {
if (output.len() >= unpacked_size) {
break;
}
} else if (rangecoder.isFinished()) {
break;
}
if (try self.processNext(allocator, output, rangecoder) == .finished) {
break;
}
}
if (self.unpacked_size) |len| {
if (len != output.len()) {
return error.CorruptInput;
}
}
}
fn decodeLiteral(
self: *DecoderState,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
update: bool,
) !u8 {
const def_prev_byte = 0;
const prev_byte = @as(usize, output.lastOr(def_prev_byte));
var result: usize = 1;
const lit_state = ((output.len() & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
(prev_byte >> (8 - self.lzma_props.lc));
const probs = try self.literal_probs.get(lit_state);
if (self.state >= 7) {
var match_byte = @as(usize, try output.lastN(self.rep[0] + 1));
while (result < 0x100) {
const match_bit = (match_byte >> 7) & 1;
match_byte <<= 1;
const bit = @boolToInt(try rangecoder.decodeBit(
&probs[((@as(usize, 1) + match_bit) << 8) + result],
update,
));
result = (result << 1) ^ bit;
if (match_bit != bit) {
break;
}
}
}
while (result < 0x100) {
result = (result << 1) ^ @boolToInt(try rangecoder.decodeBit(&probs[result], update));
}
return @truncate(u8, result - 0x100);
}
fn decodeDistance(
self: *DecoderState,
rangecoder: *RangeDecoder,
length: usize,
update: bool,
) !usize {
const len_state = if (length > 3) 3 else length;
const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(rangecoder, update));
if (pos_slot < 4)
return pos_slot;
const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
if (pos_slot < 14) {
result += try rangecoder.parseReverseBitTree(
num_direct_bits,
&self.pos_decoders,
result - pos_slot,
update,
);
} else {
result += @as(usize, try rangecoder.get(num_direct_bits - 4)) << 4;
result += try self.align_decoder.parseReverse(rangecoder, update);
}
return result;
}
};
const ProcessingStatus = enum {
continue_,
finished,
};
pub const LzAccumBuffer = struct {
to_read: ArrayListUnmanaged(u8) = .{},
buf: ArrayListUnmanaged(u8) = .{},
pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
self.to_read.deinit(allocator);
self.buf.deinit(allocator);
}
pub fn read(self: *LzAccumBuffer, output: []u8) usize {
const input = self.to_read.items;
const n = std.math.min(input.len, output.len);
std.mem.copy(u8, output[0..n], input[0..n]);
std.mem.copy(u8, input, input[n..]);
self.to_read.shrinkRetainingCapacity(input.len - n);
return n;
}
pub fn ensureUnusedCapacity(
self: *LzAccumBuffer,
allocator: Allocator,
additional_count: usize,
) !void {
try self.buf.ensureUnusedCapacity(allocator, additional_count);
}
pub fn appendAssumeCapacity(self: *LzAccumBuffer, byte: u8) void {
self.buf.appendAssumeCapacity(byte);
}
pub fn reset(self: *LzAccumBuffer, allocator: Allocator) !void {
try self.to_read.appendSlice(allocator, self.buf.items);
self.buf.clearRetainingCapacity();
}
pub fn len(self: *const LzAccumBuffer) usize {
return self.buf.items.len;
}
pub fn lastOr(self: *const LzAccumBuffer, lit: u8) u8 {
const buf_len = self.buf.items.len;
return if (buf_len == 0)
lit
else
self.buf.items[buf_len - 1];
}
pub fn lastN(self: *const LzAccumBuffer, dist: usize) !u8 {
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
}
return self.buf.items[buf_len - dist];
}
pub fn appendLiteral(self: *LzAccumBuffer, allocator: Allocator, lit: u8) !void {
try self.buf.append(allocator, lit);
}
pub fn appendLz(self: *LzAccumBuffer, allocator: Allocator, length: usize, dist: usize) !void {
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
}
var offset = buf_len - dist;
var i: usize = 0;
while (i < length) : (i += 1) {
const x = self.buf.items[offset];
try self.buf.append(allocator, x);
offset += 1;
}
}
};
pub const RangeDecoder = struct {
stream: std.io.FixedBufferStream([]const u8),
range: u32,
code: u32,
pub fn init(buffer: []const u8) !RangeDecoder {
var dec = RangeDecoder{
.stream = std.io.fixedBufferStream(buffer),
.range = 0xFFFF_FFFF,
.code = 0,
};
const reader = dec.stream.reader();
_ = try reader.readByte();
dec.code = try reader.readIntBig(u32);
return dec;
}
pub fn fromParts(
buffer: []const u8,
range: u32,
code: u32,
) RangeDecoder {
return .{
.stream = std.io.fixedBufferStream(buffer),
.range = range,
.code = code,
};
}
pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
self.range = range;
self.code = code;
}
pub fn readInto(self: *RangeDecoder, dest: []u8) !usize {
return self.stream.read(dest);
}
pub inline fn isFinished(self: *const RangeDecoder) bool {
return self.code == 0 and self.isEof();
}
pub inline fn isEof(self: *const RangeDecoder) bool {
return self.stream.pos == self.stream.buffer.len;
}
inline fn normalize(self: *RangeDecoder) !void {
if (self.range < 0x0100_0000) {
self.range <<= 8;
self.code = (self.code << 8) ^ @as(u32, try self.stream.reader().readByte());
}
}
inline fn getBit(self: *RangeDecoder) !bool {
self.range >>= 1;
const bit = self.code >= self.range;
if (bit)
self.code -= self.range;
try self.normalize();
return bit;
}
fn get(self: *RangeDecoder, count: usize) !u32 {
var result: u32 = 0;
var i: usize = 0;
while (i < count) : (i += 1)
result = (result << 1) ^ @boolToInt(try self.getBit());
return result;
}
pub inline fn decodeBit(self: *RangeDecoder, prob: *u16, update: bool) !bool {
const bound = (self.range >> 11) * prob.*;
if (self.code < bound) {
if (update)
prob.* += (0x800 - prob.*) >> 5;
self.range = bound;
try self.normalize();
return false;
} else {
if (update)
prob.* -= prob.* >> 5;
self.code -= bound;
self.range -= bound;
try self.normalize();
return true;
}
}
fn parseBitTree(
self: *RangeDecoder,
num_bits: u5,
probs: []u16,
update: bool,
) !u32 {
var tmp: u32 = 1;
var i: u5 = 0;
while (i < num_bits) : (i += 1) {
const bit = try self.decodeBit(&probs[tmp], update);
tmp = (tmp << 1) ^ @boolToInt(bit);
}
return tmp - (@as(u32, 1) << num_bits);
}
pub fn parseReverseBitTree(
self: *RangeDecoder,
num_bits: u5,
probs: []u16,
offset: usize,
update: bool,
) !u32 {
var result: u32 = 0;
var tmp: usize = 1;
var i: u5 = 0;
while (i < num_bits) : (i += 1) {
const bit = @boolToInt(try self.decodeBit(&probs[offset + tmp], update));
tmp = (tmp << 1) ^ bit;
result ^= @as(u32, bit) << i;
}
return result;
}
};
fn Vec2D(comptime T: type) type {
return struct {
data: []T,
cols: usize,
const Self = @This();
pub fn init(allocator: Allocator, data: T, rows: usize, cols: usize) !Self {
const len = try std.math.mul(usize, rows, cols);
var vec2d = Self{
.data = try allocator.alloc(T, len),
.cols = cols,
};
vec2d.fill(data);
return vec2d;
}
pub fn deinit(self: *Self, allocator: Allocator) void {
allocator.free(self.data);
}
pub fn fill(self: *Self, value: T) void {
std.mem.set(T, self.data, value);
}
pub fn get(self: *Self, row: usize) ![]T {
const start_row = try std.math.mul(usize, row, self.cols);
return self.data[start_row .. start_row + self.cols];
}
};
}
const BitTree = struct {
num_bits: u5,
probs: ArrayListUnmanaged(u16),
pub fn init(allocator: Allocator, num_bits: u5) !BitTree {
var probs_len = @as(usize, 1) << num_bits;
var probs = try ArrayListUnmanaged(u16).initCapacity(allocator, probs_len);
while (probs_len > 0) : (probs_len -= 1)
probs.appendAssumeCapacity(0x400);
return .{ .num_bits = num_bits, .probs = probs };
}
pub fn deinit(self: *BitTree, allocator: Allocator) void {
self.probs.deinit(allocator);
}
pub fn parse(
self: *BitTree,
rangecoder: *RangeDecoder,
update: bool,
) !u32 {
return rangecoder.parseBitTree(self.num_bits, self.probs.items, update);
}
pub fn parseReverse(
self: *BitTree,
rangecoder: *RangeDecoder,
update: bool,
) !u32 {
return rangecoder.parseReverseBitTree(self.num_bits, self.probs.items, 0, update);
}
pub fn reset(self: *BitTree) void {
std.mem.set(u16, self.probs.items, 0x400);
}
};
const LenDecoder = struct {
choice: u16,
choice2: u16,
low_coder: [16]BitTree,
mid_coder: [16]BitTree,
high_coder: BitTree,
pub fn init(allocator: Allocator) !LenDecoder {
return .{
.choice = 0x400,
.choice2 = 0x400,
.low_coder = .{
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
},
.mid_coder = .{
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
},
.high_coder = try BitTree.init(allocator, 8),
};
}
pub fn deinit(self: *LenDecoder, allocator: Allocator) void {
for (self.low_coder) |*t| t.deinit(allocator);
for (self.mid_coder) |*t| t.deinit(allocator);
self.high_coder.deinit(allocator);
}
pub fn decode(
self: *LenDecoder,
rangecoder: *RangeDecoder,
pos_state: usize,
update: bool,
) !usize {
if (!try rangecoder.decodeBit(&self.choice, update)) {
return @as(usize, try self.low_coder[pos_state].parse(rangecoder, update));
} else if (!try rangecoder.decodeBit(&self.choice2, update)) {
return @as(usize, try self.mid_coder[pos_state].parse(rangecoder, update)) + 8;
} else {
return @as(usize, try self.high_coder.parse(rangecoder, update)) + 16;
}
}
pub fn reset(self: *LenDecoder) void {
self.choice = 0x400;
self.choice2 = 0x400;
for (self.low_coder) |*t| t.reset();
for (self.mid_coder) |*t| t.reset();
self.high_coder.reset();
}
};

View File

@ -78,3 +78,23 @@ test "unsupported" {
);
}
}
fn testDontPanic(data: []const u8) !void {
const buf = decompress(data) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
else => return,
};
defer testing.allocator.free(buf);
}
test "size fields: integer overflow avoidance" {
// These cases were found via fuzz testing and each previously caused
// an integer overflow when decoding. We just want to ensure they no longer
// cause a panic
const header_size_overflow = "\xfd7zXZ\x00\x00\x01i\"\xde6z";
try testDontPanic(header_size_overflow);
const lzma2_chunk_size_overflow = "\xfd7zXZ\x00\x00\x01i\"\xde6\x02\x00!\x01\x08\x00\x00\x00\xd8\x0f#\x13\x01\xff\xff";
try testDontPanic(lzma2_chunk_size_overflow);
const backward_size_overflow = "\xfd7zXZ\x00\x00\x01i\"\xde6\x00\x00\x00\x00\x1c\xdfD!\x90B\x99\r\x01\x00\x00\xff\xff\x10\x00\x00\x00\x01DD\xff\xff\xff\x01";
try testDontPanic(backward_size_overflow);
}