std.compress.lzma2: tests passing

2025-12-06 14:23:09 +00:00 · 2025-08-25 20:24:19 -07:00 · 2025-08-25 20:24:19 -07:00 · a8ae6c2f42
commit a8ae6c2f42
parent 3cb9baaf65
2 changed files with 131 additions and 151 deletions
--- a/lib/std/compress/lzma.zig
+++ b/lib/std/compress/lzma.zig
@ -105,7 +105,6 @@ pub const RangeDecoder = struct {

 pub const Decode = struct {
    properties: Properties,
-    unpacked_size: ?u64,
    literal_probs: Vec2d,
    pos_slot_decoder: [4]BitTree(6),
    align_decoder: BitTree(4),
@ -121,15 +120,10 @@ pub const Decode = struct {
    len_decoder: LenDecoder,
    rep_len_decoder: LenDecoder,

-    pub fn init(
-        gpa: Allocator,
-        properties: Properties,
-        unpacked_size: ?u64,
-    ) !Decode {
+    pub fn init(gpa: Allocator, properties: Properties) !Decode {
        return .{
            .properties = properties,
-            .unpacked_size = unpacked_size,
-            .literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (properties.lc + properties.lp), 0x300 }),
+            .literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (properties.lc + properties.lp), 0x300),
            .pos_slot_decoder = @splat(.{}),
            .align_decoder = .{},
            .pos_decoders = @splat(0x400),
@ -157,7 +151,7 @@ pub const Decode = struct {
            self.literal_probs.fill(0x400);
        } else {
            self.literal_probs.deinit(gpa);
-            self.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
+            self.literal_probs = try Vec2d.init(gpa, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
        }

        self.properties = new_props;
@ -176,11 +170,12 @@ pub const Decode = struct {
        self.rep_len_decoder.reset();
    }

-    fn processNext(
+    pub fn process(
        self: *Decode,
        reader: *Reader,
        allocating: *Writer.Allocating,
-        buffer: *CircularBuffer,
+        /// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
+        buffer: anytype,
        decoder: *RangeDecoder,
    ) !ProcessingStatus {
        const gpa = allocating.allocator;
@ -256,39 +251,11 @@ pub const Decode = struct {
        return .more;
    }

-    pub fn process(
-        self: *Decode,
-        reader: *Reader,
-        allocating: *Writer.Allocating,
-        buffer: *CircularBuffer,
-        decoder: *RangeDecoder,
-    ) !void {
-        process_next: {
-            if (self.unpacked_size) |unpacked_size| {
-                if (buffer.len >= unpacked_size) {
-                    break :process_next;
-                }
-            } else if (decoder.isFinished()) {
-                break :process_next;
-            }
-            switch (try self.processNext(reader, allocating, buffer, decoder)) {
-                .more => return,
-                .finished => {},
-            }
-        }
-
-        if (self.unpacked_size) |unpacked_size| {
-            if (buffer.len != unpacked_size) return error.DecompressedSizeMismatch;
-        }
-
-        try buffer.finish(&allocating.writer);
-        self.state = math.maxInt(usize);
-    }
-
    fn decodeLiteral(
        self: *Decode,
        reader: *Reader,
-        buffer: *CircularBuffer,
+        /// `CircularBuffer` or `std.compress.lzma2.AccumBuffer`.
+        buffer: anytype,
        decoder: *RangeDecoder,
    ) !u8 {
        const def_prev_byte = 0;
@ -377,10 +344,7 @@ pub const Decode = struct {
        }

        pub fn get(self: CircularBuffer, index: usize) u8 {
-            return if (0 <= index and index < self.buf.items.len)
-                self.buf.items[index]
-            else
-                0;
+            return if (0 <= index and index < self.buf.items.len) self.buf.items[index] else 0;
        }

        pub fn set(self: *CircularBuffer, gpa: Allocator, index: usize, value: u8) !void {
@ -524,29 +488,29 @@ pub const Decode = struct {
        data: []u16,
        cols: usize,

-        pub fn init(gpa: Allocator, value: u16, size: struct { usize, usize }) !Vec2d {
-            const len = try math.mul(usize, size[0], size[1]);
+        pub fn init(gpa: Allocator, value: u16, w: usize, h: usize) !Vec2d {
+            const len = try math.mul(usize, w, h);
            const data = try gpa.alloc(u16, len);
            @memset(data, value);
            return .{
                .data = data,
-                .cols = size[1],
+                .cols = h,
            };
        }

-        pub fn deinit(self: *Vec2d, gpa: Allocator) void {
-            gpa.free(self.data);
-            self.* = undefined;
+        pub fn deinit(v: *Vec2d, gpa: Allocator) void {
+            gpa.free(v.data);
+            v.* = undefined;
        }

-        pub fn fill(self: *Vec2d, value: u16) void {
-            @memset(self.data, value);
+        pub fn fill(v: *Vec2d, value: u16) void {
+            @memset(v.data, value);
        }

-        fn get(self: Vec2d, row: usize) ![]u16 {
-            const start_row = try math.mul(usize, row, self.cols);
-            const end_row = try math.add(usize, start_row, self.cols);
-            return self.data[start_row..end_row];
+        fn get(v: Vec2d, row: usize) ![]u16 {
+            const start_row = try math.mul(usize, row, v.cols);
+            const end_row = try math.add(usize, start_row, v.cols);
+            return v.data[start_row..end_row];
        }
    };

@ -627,6 +591,7 @@ pub const Decompress = struct {
    range_decoder: RangeDecoder,
    decode: Decode,
    err: ?Error,
+    unpacked_size: ?u64,

    pub const Error = error{
        OutOfMemory,
@ -654,7 +619,7 @@ pub const Decompress = struct {
            .input = input,
            .buffer = Decode.CircularBuffer.init(params.dict_size, mem_limit),
            .range_decoder = try RangeDecoder.init(input),
-            .decode = try Decode.init(gpa, params.properties, params.unpacked_size),
+            .decode = try Decode.init(gpa, params.properties),
            .reader = .{
                .buffer = buffer,
                .vtable = &.{
@ -666,6 +631,7 @@ pub const Decompress = struct {
                .end = 0,
            },
            .err = null,
+            .unpacked_size = params.unpacked_size,
        };
    }

@ -728,20 +694,46 @@ pub const Decompress = struct {
            r.end = allocating.writer.end;
        }
        if (d.decode.state == math.maxInt(usize)) return error.EndOfStream;
-        d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
+
+        process_next: {
+            if (d.unpacked_size) |unpacked_size| {
+                if (d.buffer.len >= unpacked_size) break :process_next;
+            } else if (d.range_decoder.isFinished()) {
+                break :process_next;
+            }
+            switch (d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
+                error.WriteFailed => {
+                    d.err = error.OutOfMemory;
+                    return error.ReadFailed;
+                },
+                error.EndOfStream => {
+                    d.err = error.EndOfStream;
+                    return error.ReadFailed;
+                },
+                else => |e| {
+                    d.err = e;
+                    return error.ReadFailed;
+                },
+            }) {
+                .more => return 0,
+                .finished => break :process_next,
+            }
+        }
+
+        if (d.unpacked_size) |unpacked_size| {
+            if (d.buffer.len != unpacked_size) {
+                d.err = error.DecompressedSizeMismatch;
+                return error.ReadFailed;
+            }
+        }
+
+        d.buffer.finish(&allocating.writer) catch |err| switch (err) {
            error.WriteFailed => {
                d.err = error.OutOfMemory;
                return error.ReadFailed;
            },
-            error.EndOfStream => {
-                d.err = error.EndOfStream;
-                return error.ReadFailed;
-            },
-            else => |e| {
-                d.err = e;
-                return error.ReadFailed;
-            },
        };
+        d.decode.state = math.maxInt(usize);
        return 0;
    }
 };
--- a/lib/std/compress/lzma2.zig
+++ b/lib/std/compress/lzma2.zig
@ -6,17 +6,15 @@ const Writer = std.Io.Writer;
 const Reader = std.Io.Reader;

 /// An accumulating buffer for LZ sequences
-pub const LzAccumBuffer = struct {
+pub const AccumBuffer = struct {
    /// Buffer
    buf: ArrayList(u8),
-
    /// Buffer memory limit
    memlimit: usize,
-
    /// Total number of bytes sent through the buffer
    len: usize,

-    pub fn init(memlimit: usize) LzAccumBuffer {
+    pub fn init(memlimit: usize) AccumBuffer {
        return .{
            .buf = .{},
            .memlimit = memlimit,
@ -24,20 +22,20 @@ pub const LzAccumBuffer = struct {
        };
    }

-    pub fn appendByte(self: *LzAccumBuffer, allocator: Allocator, byte: u8) !void {
+    pub fn appendByte(self: *AccumBuffer, allocator: Allocator, byte: u8) !void {
        try self.buf.append(allocator, byte);
        self.len += 1;
    }

    /// Reset the internal dictionary
-    pub fn reset(self: *LzAccumBuffer, writer: *Writer) !void {
+    pub fn reset(self: *AccumBuffer, writer: *Writer) !void {
        try writer.writeAll(self.buf.items);
        self.buf.clearRetainingCapacity();
        self.len = 0;
    }

    /// Retrieve the last byte or return a default
-    pub fn lastOr(self: LzAccumBuffer, lit: u8) u8 {
+    pub fn lastOr(self: AccumBuffer, lit: u8) u8 {
        const buf_len = self.buf.items.len;
        return if (buf_len == 0)
            lit
@ -46,7 +44,7 @@ pub const LzAccumBuffer = struct {
    }

    /// Retrieve the n-th last byte
-    pub fn lastN(self: LzAccumBuffer, dist: usize) !u8 {
+    pub fn lastN(self: AccumBuffer, dist: usize) !u8 {
        const buf_len = self.buf.items.len;
        if (dist > buf_len) {
            return error.CorruptInput;
@ -57,7 +55,7 @@ pub const LzAccumBuffer = struct {

    /// Append a literal
    pub fn appendLiteral(
-        self: *LzAccumBuffer,
+        self: *AccumBuffer,
        allocator: Allocator,
        lit: u8,
        writer: *Writer,
@ -72,7 +70,7 @@ pub const LzAccumBuffer = struct {

    /// Fetch an LZ sequence (length, distance) from inside the buffer
    pub fn appendLz(
-        self: *LzAccumBuffer,
+        self: *AccumBuffer,
        allocator: Allocator,
        len: usize,
        dist: usize,
@ -95,12 +93,12 @@ pub const LzAccumBuffer = struct {
        self.len += len;
    }

-    pub fn finish(self: *LzAccumBuffer, writer: *Writer) !void {
+    pub fn finish(self: *AccumBuffer, writer: *Writer) !void {
        try writer.writeAll(self.buf.items);
        self.buf.clearRetainingCapacity();
    }

-    pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
+    pub fn deinit(self: *AccumBuffer, allocator: Allocator) void {
        self.buf.deinit(allocator);
        self.* = undefined;
    }
@ -109,59 +107,43 @@ pub const LzAccumBuffer = struct {
 pub const Decode = struct {
    lzma_decode: lzma.Decode,

-    pub fn init(allocator: Allocator) !Decode {
-        return Decode{
-            .lzma_decode = try lzma.Decode.init(
-                allocator,
-                .{
-                    .lc = 0,
-                    .lp = 0,
-                    .pb = 0,
-                },
-                null,
-            ),
-        };
+    pub fn init(gpa: Allocator) !Decode {
+        return .{ .lzma_decode = try lzma.Decode.init(gpa, .{ .lc = 0, .lp = 0, .pb = 0 }) };
    }

-    pub fn deinit(self: *Decode, allocator: Allocator) void {
-        self.lzma_decode.deinit(allocator);
+    pub fn deinit(self: *Decode, gpa: Allocator) void {
+        self.lzma_decode.deinit(gpa);
        self.* = undefined;
    }

-    pub fn decompress(
-        self: *Decode,
-        allocator: Allocator,
-        reader: *Reader,
-        writer: *Writer,
-    ) !void {
-        var accum = LzAccumBuffer.init(std.math.maxInt(usize));
-        defer accum.deinit(allocator);
+    pub fn decompress(d: *Decode, reader: *Reader, allocating: *Writer.Allocating) !void {
+        const gpa = allocating.allocator;
+
+        var accum = AccumBuffer.init(std.math.maxInt(usize));
+        defer accum.deinit(gpa);

        while (true) {
-            const status = try reader.readByte();
+            const status = try reader.takeByte();

            switch (status) {
                0 => break,
-                1 => try parseUncompressed(allocator, reader, writer, &accum, true),
-                2 => try parseUncompressed(allocator, reader, writer, &accum, false),
-                else => try self.parseLzma(allocator, reader, writer, &accum, status),
+                1 => try parseUncompressed(reader, allocating, &accum, true),
+                2 => try parseUncompressed(reader, allocating, &accum, false),
+                else => try d.parseLzma(reader, allocating, &accum, status),
            }
        }

-        try accum.finish(writer);
+        try accum.finish(&allocating.writer);
    }

    fn parseLzma(
-        self: *Decode,
-        allocator: Allocator,
+        d: *Decode,
        reader: *Reader,
-        writer: *Writer,
-        accum: *LzAccumBuffer,
+        allocating: *Writer.Allocating,
+        accum: *AccumBuffer,
        status: u8,
    ) !void {
-        if (status & 0x80 == 0) {
-            return error.CorruptInput;
-        }
+        if (status & 0x80 == 0) return error.CorruptInput;

        const Reset = struct {
            dict: bool,
@ -169,23 +151,23 @@ pub const Decode = struct {
            props: bool,
        };

-        const reset = switch ((status >> 5) & 0x3) {
-            0 => Reset{
+        const reset: Reset = switch ((status >> 5) & 0x3) {
+            0 => .{
                .dict = false,
                .state = false,
                .props = false,
            },
-            1 => Reset{
+            1 => .{
                .dict = false,
                .state = true,
                .props = false,
            },
-            2 => Reset{
+            2 => .{
                .dict = false,
                .state = true,
                .props = true,
            },
-            3 => Reset{
+            3 => .{
                .dict = true,
                .state = true,
                .props = true,
@ -196,24 +178,24 @@ pub const Decode = struct {
        const unpacked_size = blk: {
            var tmp: u64 = status & 0x1F;
            tmp <<= 16;
-            tmp |= try reader.readInt(u16, .big);
+            tmp |= try reader.takeInt(u16, .big);
            break :blk tmp + 1;
        };

        const packed_size = blk: {
-            const tmp: u17 = try reader.readInt(u16, .big);
+            const tmp: u17 = try reader.takeInt(u16, .big);
            break :blk tmp + 1;
        };

-        if (reset.dict) {
-            try accum.reset(writer);
-        }
+        if (reset.dict) try accum.reset(&allocating.writer);
+
+        const ld = &d.lzma_decode;

        if (reset.state) {
-            var new_props = self.lzma_decode.properties;
+            var new_props = ld.properties;

            if (reset.props) {
-                var props = try reader.readByte();
+                var props = try reader.takeByte();
                if (props >= 225) {
                    return error.CorruptInput;
                }
@ -231,38 +213,44 @@ pub const Decode = struct {
                new_props = .{ .lc = lc, .lp = lp, .pb = pb };
            }

-            try self.lzma_decode.resetState(allocator, new_props);
+            try ld.resetState(allocating.allocator, new_props);
        }

-        self.lzma_decode.unpacked_size = unpacked_size + accum.len;
+        var range_decoder = try lzma.RangeDecoder.init(reader);

-        var counter = std.io.countingReader(reader);
-        const counter_reader = counter.reader();
-
-        var rangecoder = try lzma.RangeDecoder.init(counter_reader);
-        while (try self.lzma_decode.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
-
-        if (counter.bytes_read != packed_size) {
-            return error.CorruptInput;
+        while (true) {
+            if (accum.len >= unpacked_size) break;
+            if (range_decoder.isFinished()) break;
+            switch (try ld.process(reader, allocating, accum, &range_decoder)) {
+                .more => continue,
+                .finished => break,
+            }
        }
+        if (accum.len != unpacked_size) return error.DecompressedSizeMismatch;
+
+        // TODO restore this error
+        //if (counter.bytes_read != packed_size) {
+        //    return error.CorruptInput;
+        //}
+        _ = packed_size;
    }

    fn parseUncompressed(
-        allocator: Allocator,
        reader: *Reader,
-        writer: *Writer,
-        accum: *LzAccumBuffer,
+        allocating: *Writer.Allocating,
+        accum: *AccumBuffer,
        reset_dict: bool,
    ) !void {
-        const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1;
+        const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1;

-        if (reset_dict) {
-            try accum.reset(writer);
-        }
+        if (reset_dict) try accum.reset(&allocating.writer);

-        var i: @TypeOf(unpacked_size) = 0;
-        while (i < unpacked_size) : (i += 1) {
-            try accum.appendByte(allocator, try reader.readByte());
+        const gpa = allocating.allocator;
+
+        var i = unpacked_size;
+        while (i != 0) {
+            try accum.appendByte(gpa, try reader.takeByte());
+            i -= 1;
        }
    }
 };
@ -273,13 +261,13 @@ test "decompress hello world stream" {

    const gpa = std.testing.allocator;

-    var stream: std.Io.Reader = .fixed(compressed);
-
-    var decode = try Decode.init(gpa, &stream);
+    var decode = try Decode.init(gpa);
    defer decode.deinit(gpa);

-    const result = try decode.reader.allocRemaining(gpa, .unlimited);
-    defer gpa.free(result);
+    var stream: std.Io.Reader = .fixed(compressed);
+    var result: std.Io.Writer.Allocating = .init(gpa);
+    defer result.deinit();

-    try std.testing.expectEqualStrings(expected, result);
+    try decode.decompress(&stream, &result);
+    try std.testing.expectEqualStrings(expected, result.written());
 }