From a8ae6c2f4265a66c7a63d788a13549c48a1dd8c0 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 25 Aug 2025 20:24:19 -0700 Subject: [PATCH] std.compress.lzma2: tests passing --- lib/std/compress/lzma.zig | 126 ++++++++++++++---------------- lib/std/compress/lzma2.zig | 156 +++++++++++++++++-------------------- 2 files changed, 131 insertions(+), 151 deletions(-) diff --git a/lib/std/compress/lzma.zig b/lib/std/compress/lzma.zig index 5f96579be5..ab7e217871 100644 --- a/lib/std/compress/lzma.zig +++ b/lib/std/compress/lzma.zig @@ -105,7 +105,6 @@ pub const RangeDecoder = struct { pub const Decode = struct { properties: Properties, - unpacked_size: ?u64, literal_probs: Vec2d, pos_slot_decoder: [4]BitTree(6), align_decoder: BitTree(4), @@ -121,15 +120,10 @@ pub const Decode = struct { len_decoder: LenDecoder, rep_len_decoder: LenDecoder, - pub fn init( - gpa: Allocator, - properties: Properties, - unpacked_size: ?u64, - ) !Decode { + pub fn init(gpa: Allocator, properties: Properties) !Decode { return .{ .properties = properties, - .unpacked_size = unpacked_size, - .literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (properties.lc + properties.lp), 0x300 }), + .literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (properties.lc + properties.lp), 0x300), .pos_slot_decoder = @splat(.{}), .align_decoder = .{}, .pos_decoders = @splat(0x400), @@ -157,7 +151,7 @@ pub const Decode = struct { self.literal_probs.fill(0x400); } else { self.literal_probs.deinit(gpa); - self.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 }); + self.literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300); } self.properties = new_props; @@ -176,11 +170,12 @@ pub const Decode = struct { self.rep_len_decoder.reset(); } - fn processNext( + pub fn process( self: *Decode, reader: *Reader, allocating: *Writer.Allocating, - buffer: *CircularBuffer, + /// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`. + buffer: anytype, decoder: *RangeDecoder, ) !ProcessingStatus { const gpa = allocating.allocator; @@ -256,39 +251,11 @@ pub const Decode = struct { return .more; } - pub fn process( - self: *Decode, - reader: *Reader, - allocating: *Writer.Allocating, - buffer: *CircularBuffer, - decoder: *RangeDecoder, - ) !void { - process_next: { - if (self.unpacked_size) |unpacked_size| { - if (buffer.len >= unpacked_size) { - break :process_next; - } - } else if (decoder.isFinished()) { - break :process_next; - } - switch (try self.processNext(reader, allocating, buffer, decoder)) { - .more => return, - .finished => {}, - } - } - - if (self.unpacked_size) |unpacked_size| { - if (buffer.len != unpacked_size) return error.DecompressedSizeMismatch; - } - - try buffer.finish(&allocating.writer); - self.state = math.maxInt(usize); - } - fn decodeLiteral( self: *Decode, reader: *Reader, - buffer: *CircularBuffer, + /// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`. + buffer: anytype, decoder: *RangeDecoder, ) !u8 { const def_prev_byte = 0; @@ -377,10 +344,7 @@ pub const Decode = struct { } pub fn get(self: CircularBuffer, index: usize) u8 { - return if (0 <= index and index < self.buf.items.len) - self.buf.items[index] - else - 0; + return if (0 <= index and index < self.buf.items.len) self.buf.items[index] else 0; } pub fn set(self: *CircularBuffer, gpa: Allocator, index: usize, value: u8) !void { @@ -524,29 +488,29 @@ pub const Decode = struct { data: []u16, cols: usize, - pub fn init(gpa: Allocator, value: u16, size: struct { usize, usize }) !Vec2d { - const len = try math.mul(usize, size[0], size[1]); + pub fn init(gpa: Allocator, value: u16, w: usize, h: usize) !Vec2d { + const len = try math.mul(usize, w, h); const data = try gpa.alloc(u16, len); @memset(data, value); return .{ .data = data, - .cols = size[1], + .cols = h, }; } - pub fn deinit(self: *Vec2d, gpa: Allocator) void { - gpa.free(self.data); - self.* = undefined; + pub fn deinit(v: *Vec2d, gpa: Allocator) void { + gpa.free(v.data); + v.* = undefined; } - pub fn fill(self: *Vec2d, value: u16) void { - @memset(self.data, value); + pub fn fill(v: *Vec2d, value: u16) void { + @memset(v.data, value); } - fn get(self: Vec2d, row: usize) ![]u16 { - const start_row = try math.mul(usize, row, self.cols); - const end_row = try math.add(usize, start_row, self.cols); - return self.data[start_row..end_row]; + fn get(v: Vec2d, row: usize) ![]u16 { + const start_row = try math.mul(usize, row, v.cols); + const end_row = try math.add(usize, start_row, v.cols); + return v.data[start_row..end_row]; } }; @@ -627,6 +591,7 @@ pub const Decompress = struct { range_decoder: RangeDecoder, decode: Decode, err: ?Error, + unpacked_size: ?u64, pub const Error = error{ OutOfMemory, @@ -654,7 +619,7 @@ pub const Decompress = struct { .input = input, .buffer = Decode.CircularBuffer.init(params.dict_size, mem_limit), .range_decoder = try RangeDecoder.init(input), - .decode = try Decode.init(gpa, params.properties, params.unpacked_size), + .decode = try Decode.init(gpa, params.properties), .reader = .{ .buffer = buffer, .vtable = &.{ @@ -666,6 +631,7 @@ pub const Decompress = struct { .end = 0, }, .err = null, + .unpacked_size = params.unpacked_size, }; } @@ -728,20 +694,46 @@ pub const Decompress = struct { r.end = allocating.writer.end; } if (d.decode.state == math.maxInt(usize)) return error.EndOfStream; - d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) { + + process_next: { + if (d.unpacked_size) |unpacked_size| { + if (d.buffer.len >= unpacked_size) break :process_next; + } else if (d.range_decoder.isFinished()) { + break :process_next; + } + switch (d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) { + error.WriteFailed => { + d.err = error.OutOfMemory; + return error.ReadFailed; + }, + error.EndOfStream => { + d.err = error.EndOfStream; + return error.ReadFailed; + }, + else => |e| { + d.err = e; + return error.ReadFailed; + }, + }) { + .more => return 0, + .finished => break :process_next, + } + } + + if (d.unpacked_size) |unpacked_size| { + if (d.buffer.len != unpacked_size) { + d.err = error.DecompressedSizeMismatch; + return error.ReadFailed; + } + } + + d.buffer.finish(&allocating.writer) catch |err| switch (err) { error.WriteFailed => { d.err = error.OutOfMemory; return error.ReadFailed; }, - error.EndOfStream => { - d.err = error.EndOfStream; - return error.ReadFailed; - }, - else => |e| { - d.err = e; - return error.ReadFailed; - }, }; + d.decode.state = math.maxInt(usize); return 0; } }; diff --git a/lib/std/compress/lzma2.zig b/lib/std/compress/lzma2.zig index d880bad21d..6aad2e1198 100644 --- a/lib/std/compress/lzma2.zig +++ b/lib/std/compress/lzma2.zig @@ -6,17 +6,15 @@ const Writer = std.Io.Writer; const Reader = std.Io.Reader; /// An accumulating buffer for LZ sequences -pub const LzAccumBuffer = struct { +pub const AccumBuffer = struct { /// Buffer buf: ArrayList(u8), - /// Buffer memory limit memlimit: usize, - /// Total number of bytes sent through the buffer len: usize, - pub fn init(memlimit: usize) LzAccumBuffer { + pub fn init(memlimit: usize) AccumBuffer { return .{ .buf = .{}, .memlimit = memlimit, @@ -24,20 +22,20 @@ pub const LzAccumBuffer = struct { }; } - pub fn appendByte(self: *LzAccumBuffer, allocator: Allocator, byte: u8) !void { + pub fn appendByte(self: *AccumBuffer, allocator: Allocator, byte: u8) !void { try self.buf.append(allocator, byte); self.len += 1; } /// Reset the internal dictionary - pub fn reset(self: *LzAccumBuffer, writer: *Writer) !void { + pub fn reset(self: *AccumBuffer, writer: *Writer) !void { try writer.writeAll(self.buf.items); self.buf.clearRetainingCapacity(); self.len = 0; } /// Retrieve the last byte or return a default - pub fn lastOr(self: LzAccumBuffer, lit: u8) u8 { + pub fn lastOr(self: AccumBuffer, lit: u8) u8 { const buf_len = self.buf.items.len; return if (buf_len == 0) lit @@ -46,7 +44,7 @@ pub const LzAccumBuffer = struct { } /// Retrieve the n-th last byte - pub fn lastN(self: LzAccumBuffer, dist: usize) !u8 { + pub fn lastN(self: AccumBuffer, dist: usize) !u8 { const buf_len = self.buf.items.len; if (dist > buf_len) { return error.CorruptInput; @@ -57,7 +55,7 @@ pub const LzAccumBuffer = struct { /// Append a literal pub fn appendLiteral( - self: *LzAccumBuffer, + self: *AccumBuffer, allocator: Allocator, lit: u8, writer: *Writer, @@ -72,7 +70,7 @@ pub const LzAccumBuffer = struct { /// Fetch an LZ sequence (length, distance) from inside the buffer pub fn appendLz( - self: *LzAccumBuffer, + self: *AccumBuffer, allocator: Allocator, len: usize, dist: usize, @@ -95,12 +93,12 @@ pub const LzAccumBuffer = struct { self.len += len; } - pub fn finish(self: *LzAccumBuffer, writer: *Writer) !void { + pub fn finish(self: *AccumBuffer, writer: *Writer) !void { try writer.writeAll(self.buf.items); self.buf.clearRetainingCapacity(); } - pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void { + pub fn deinit(self: *AccumBuffer, allocator: Allocator) void { self.buf.deinit(allocator); self.* = undefined; } @@ -109,59 +107,43 @@ pub const LzAccumBuffer = struct { pub const Decode = struct { lzma_decode: lzma.Decode, - pub fn init(allocator: Allocator) !Decode { - return Decode{ - .lzma_decode = try lzma.Decode.init( - allocator, - .{ - .lc = 0, - .lp = 0, - .pb = 0, - }, - null, - ), - }; + pub fn init(gpa: Allocator) !Decode { + return .{ .lzma_decode = try lzma.Decode.init(gpa, .{ .lc = 0, .lp = 0, .pb = 0 }) }; } - pub fn deinit(self: *Decode, allocator: Allocator) void { - self.lzma_decode.deinit(allocator); + pub fn deinit(self: *Decode, gpa: Allocator) void { + self.lzma_decode.deinit(gpa); self.* = undefined; } - pub fn decompress( - self: *Decode, - allocator: Allocator, - reader: *Reader, - writer: *Writer, - ) !void { - var accum = LzAccumBuffer.init(std.math.maxInt(usize)); - defer accum.deinit(allocator); + pub fn decompress(d: *Decode, reader: *Reader, allocating: *Writer.Allocating) !void { + const gpa = allocating.allocator; + + var accum = AccumBuffer.init(std.math.maxInt(usize)); + defer accum.deinit(gpa); while (true) { - const status = try reader.readByte(); + const status = try reader.takeByte(); switch (status) { 0 => break, - 1 => try parseUncompressed(allocator, reader, writer, &accum, true), - 2 => try parseUncompressed(allocator, reader, writer, &accum, false), - else => try self.parseLzma(allocator, reader, writer, &accum, status), + 1 => try parseUncompressed(reader, allocating, &accum, true), + 2 => try parseUncompressed(reader, allocating, &accum, false), + else => try d.parseLzma(reader, allocating, &accum, status), } } - try accum.finish(writer); + try accum.finish(&allocating.writer); } fn parseLzma( - self: *Decode, - allocator: Allocator, + d: *Decode, reader: *Reader, - writer: *Writer, - accum: *LzAccumBuffer, + allocating: *Writer.Allocating, + accum: *AccumBuffer, status: u8, ) !void { - if (status & 0x80 == 0) { - return error.CorruptInput; - } + if (status & 0x80 == 0) return error.CorruptInput; const Reset = struct { dict: bool, @@ -169,23 +151,23 @@ pub const Decode = struct { props: bool, }; - const reset = switch ((status >> 5) & 0x3) { - 0 => Reset{ + const reset: Reset = switch ((status >> 5) & 0x3) { + 0 => .{ .dict = false, .state = false, .props = false, }, - 1 => Reset{ + 1 => .{ .dict = false, .state = true, .props = false, }, - 2 => Reset{ + 2 => .{ .dict = false, .state = true, .props = true, }, - 3 => Reset{ + 3 => .{ .dict = true, .state = true, .props = true, @@ -196,24 +178,24 @@ pub const Decode = struct { const unpacked_size = blk: { var tmp: u64 = status & 0x1F; tmp <<= 16; - tmp |= try reader.readInt(u16, .big); + tmp |= try reader.takeInt(u16, .big); break :blk tmp + 1; }; const packed_size = blk: { - const tmp: u17 = try reader.readInt(u16, .big); + const tmp: u17 = try reader.takeInt(u16, .big); break :blk tmp + 1; }; - if (reset.dict) { - try accum.reset(writer); - } + if (reset.dict) try accum.reset(&allocating.writer); + + const ld = &d.lzma_decode; if (reset.state) { - var new_props = self.lzma_decode.properties; + var new_props = ld.properties; if (reset.props) { - var props = try reader.readByte(); + var props = try reader.takeByte(); if (props >= 225) { return error.CorruptInput; } @@ -231,38 +213,44 @@ pub const Decode = struct { new_props = .{ .lc = lc, .lp = lp, .pb = pb }; } - try self.lzma_decode.resetState(allocator, new_props); + try ld.resetState(allocating.allocator, new_props); } - self.lzma_decode.unpacked_size = unpacked_size + accum.len; + var range_decoder = try lzma.RangeDecoder.init(reader); - var counter = std.io.countingReader(reader); - const counter_reader = counter.reader(); - - var rangecoder = try lzma.RangeDecoder.init(counter_reader); - while (try self.lzma_decode.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {} - - if (counter.bytes_read != packed_size) { - return error.CorruptInput; + while (true) { + if (accum.len >= unpacked_size) break; + if (range_decoder.isFinished()) break; + switch (try ld.process(reader, allocating, accum, &range_decoder)) { + .more => continue, + .finished => break, + } } + if (accum.len != unpacked_size) return error.DecompressedSizeMismatch; + + // TODO restore this error + //if (counter.bytes_read != packed_size) { + // return error.CorruptInput; + //} + _ = packed_size; } fn parseUncompressed( - allocator: Allocator, reader: *Reader, - writer: *Writer, - accum: *LzAccumBuffer, + allocating: *Writer.Allocating, + accum: *AccumBuffer, reset_dict: bool, ) !void { - const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1; + const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1; - if (reset_dict) { - try accum.reset(writer); - } + if (reset_dict) try accum.reset(&allocating.writer); - var i: @TypeOf(unpacked_size) = 0; - while (i < unpacked_size) : (i += 1) { - try accum.appendByte(allocator, try reader.readByte()); + const gpa = allocating.allocator; + + var i = unpacked_size; + while (i != 0) { + try accum.appendByte(gpa, try reader.takeByte()); + i -= 1; } } }; @@ -273,13 +261,13 @@ test "decompress hello world stream" { const gpa = std.testing.allocator; - var stream: std.Io.Reader = .fixed(compressed); - - var decode = try Decode.init(gpa, &stream); + var decode = try Decode.init(gpa); defer decode.deinit(gpa); - const result = try decode.reader.allocRemaining(gpa, .unlimited); - defer gpa.free(result); + var stream: std.Io.Reader = .fixed(compressed); + var result: std.Io.Writer.Allocating = .init(gpa); + defer result.deinit(); - try std.testing.expectEqualStrings(expected, result); + try decode.decompress(&stream, &result); + try std.testing.expectEqualStrings(expected, result.written()); }