From 8e2af21cd99d1c033146b1ab15ab743533cbd743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fn=20=E2=8C=83=20=E2=8C=A5?= <70830482+FnControlOption@users.noreply.github.com> Date: Thu, 2 Feb 2023 11:59:56 -0800 Subject: [PATCH 01/65] Add LZMA decoder --- build.zig | 2 + lib/std/compress.zig | 6 +- lib/std/compress/lzma.zig | 36 + lib/std/compress/lzma/decode.zig | 16 + lib/std/compress/lzma/decode/lzbuffer.zig | 226 ++++++ lib/std/compress/lzma/decode/lzma.zig | 398 +++++++++++ lib/std/compress/lzma/decode/lzma2.zig | 169 +++++ lib/std/compress/lzma/decode/rangecoder.zig | 184 +++++ lib/std/compress/lzma/lzma2_test.zig | 27 + lib/std/compress/lzma/lzma_test.zig | 87 +++ .../testdata/bad-too_big_size-with_eopm.lzma | Bin 0 -> 37 bytes .../bad-too_small_size-without_eopm-3.lzma | Bin 0 -> 36 bytes .../testdata/good-known_size-with_eopm.lzma | Bin 0 -> 37 bytes .../good-known_size-without_eopm.lzma | Bin 0 -> 31 bytes .../testdata/good-unknown_size-with_eopm.lzma | Bin 0 -> 37 bytes lib/std/compress/lzma/vec2d.zig | 128 ++++ lib/std/compress/xz/block.zig | 144 +--- lib/std/compress/xz/lzma.zig | 658 ------------------ 18 files changed, 1296 insertions(+), 785 deletions(-) create mode 100644 lib/std/compress/lzma.zig create mode 100644 lib/std/compress/lzma/decode.zig create mode 100644 lib/std/compress/lzma/decode/lzbuffer.zig create mode 100644 lib/std/compress/lzma/decode/lzma.zig create mode 100644 lib/std/compress/lzma/decode/lzma2.zig create mode 100644 lib/std/compress/lzma/decode/rangecoder.zig create mode 100644 lib/std/compress/lzma/lzma2_test.zig create mode 100644 lib/std/compress/lzma/lzma_test.zig create mode 100644 lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma create mode 100644 lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma create mode 100644 lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma create mode 100644 lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma create mode 100644 lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma create mode 100644 lib/std/compress/lzma/vec2d.zig delete mode 100644 lib/std/compress/xz/lzma.zig diff --git a/build.zig b/build.zig index 3a7468243f..91aee6062a 100644 --- a/build.zig +++ b/build.zig @@ -122,6 +122,8 @@ pub fn build(b: *Builder) !void { "compress-gettysburg.txt", "compress-pi.txt", "rfc1951.txt", + // exclude files from lib/std/compress/lzma/testdata + ".lzma", // exclude files from lib/std/compress/xz/testdata ".xz", // exclude files from lib/std/tz/ diff --git a/lib/std/compress.zig b/lib/std/compress.zig index 334d7bfcb8..03d05c1ef7 100644 --- a/lib/std/compress.zig +++ b/lib/std/compress.zig @@ -2,8 +2,9 @@ const std = @import("std.zig"); pub const deflate = @import("compress/deflate.zig"); pub const gzip = @import("compress/gzip.zig"); -pub const zlib = @import("compress/zlib.zig"); +pub const lzma = @import("compress/lzma.zig"); pub const xz = @import("compress/xz.zig"); +pub const zlib = @import("compress/zlib.zig"); pub fn HashedReader( comptime ReaderType: anytype, @@ -38,6 +39,7 @@ pub fn hashedReader( test { _ = deflate; _ = gzip; - _ = zlib; + _ = lzma; _ = xz; + _ = zlib; } diff --git a/lib/std/compress/lzma.zig b/lib/std/compress/lzma.zig new file mode 100644 index 0000000000..3cf7f65459 --- /dev/null +++ b/lib/std/compress/lzma.zig @@ -0,0 +1,36 @@ +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const FixedBufferStream = std.io.FixedBufferStream; + +pub const decode = @import("lzma/decode.zig"); +pub const LzmaParams = decode.lzma.LzmaParams; +pub const LzmaDecoder = decode.lzma.LzmaDecoder; +pub const Lzma2Decoder = decode.lzma2.Lzma2Decoder; + +pub fn lzmaDecompress( + allocator: Allocator, + reader: anytype, + writer: anytype, + options: decode.Options, +) !void { + const params = try LzmaParams.readHeader(reader, options); + var decoder = try LzmaDecoder.init(allocator, params, options.memlimit); + defer decoder.deinit(allocator); + return decoder.decompress(allocator, reader, writer); +} + +pub fn lzma2Decompress( + allocator: Allocator, + reader: anytype, + writer: anytype, +) !void { + var decoder = try Lzma2Decoder.init(allocator); + defer decoder.deinit(allocator); + return decoder.decompress(allocator, reader, writer); +} + +test { + _ = @import("lzma/lzma_test.zig"); + _ = @import("lzma/lzma2_test.zig"); + _ = @import("lzma/vec2d.zig"); +} diff --git a/lib/std/compress/lzma/decode.zig b/lib/std/compress/lzma/decode.zig new file mode 100644 index 0000000000..f0615516f7 --- /dev/null +++ b/lib/std/compress/lzma/decode.zig @@ -0,0 +1,16 @@ +pub const lzbuffer = @import("decode/lzbuffer.zig"); +pub const lzma = @import("decode/lzma.zig"); +pub const lzma2 = @import("decode/lzma2.zig"); +pub const rangecoder = @import("decode/rangecoder.zig"); + +pub const Options = struct { + unpacked_size: UnpackedSize = .read_from_header, + memlimit: ?usize = null, + allow_incomplete: bool = false, +}; + +pub const UnpackedSize = union(enum) { + read_from_header, + read_header_but_use_provided: ?u64, + use_provided: ?u64, +}; diff --git a/lib/std/compress/lzma/decode/lzbuffer.zig b/lib/std/compress/lzma/decode/lzbuffer.zig new file mode 100644 index 0000000000..4063c7993a --- /dev/null +++ b/lib/std/compress/lzma/decode/lzbuffer.zig @@ -0,0 +1,226 @@ +const std = @import("../../../std.zig"); +const math = std.math; +const mem = std.mem; +const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; + +/// An accumulating buffer for LZ sequences +pub const LzAccumBuffer = struct { + /// Buffer + buf: ArrayListUnmanaged(u8), + + /// Buffer memory limit + memlimit: usize, + + /// Total number of bytes sent through the buffer + len: usize, + + const Self = @This(); + + pub fn init(memlimit: usize) Self { + return Self{ + .buf = .{}, + .memlimit = memlimit, + .len = 0, + }; + } + + pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void { + try self.buf.append(allocator, byte); + self.len += 1; + } + + /// Reset the internal dictionary + pub fn reset(self: *Self, writer: anytype) !void { + try writer.writeAll(self.buf.items); + self.buf.clearRetainingCapacity(); + self.len = 0; + } + + /// Retrieve the last byte or return a default + pub fn lastOr(self: Self, lit: u8) u8 { + const buf_len = self.buf.items.len; + return if (buf_len == 0) + lit + else + self.buf.items[buf_len - 1]; + } + + /// Retrieve the n-th last byte + pub fn lastN(self: Self, dist: usize) !u8 { + const buf_len = self.buf.items.len; + if (dist > buf_len) { + return error.CorruptInput; + } + + return self.buf.items[buf_len - dist]; + } + + /// Append a literal + pub fn appendLiteral( + self: *Self, + allocator: Allocator, + lit: u8, + writer: anytype, + ) !void { + _ = writer; + if (self.len >= self.memlimit) { + return error.CorruptInput; + } + try self.buf.append(allocator, lit); + self.len += 1; + } + + /// Fetch an LZ sequence (length, distance) from inside the buffer + pub fn appendLz( + self: *Self, + allocator: Allocator, + len: usize, + dist: usize, + writer: anytype, + ) !void { + _ = writer; + + const buf_len = self.buf.items.len; + if (dist > buf_len) { + return error.CorruptInput; + } + + var offset = buf_len - dist; + var i: usize = 0; + while (i < len) : (i += 1) { + const x = self.buf.items[offset]; + try self.buf.append(allocator, x); + offset += 1; + } + self.len += len; + } + + pub fn finish(self: *Self, writer: anytype) !void { + try writer.writeAll(self.buf.items); + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.buf.deinit(allocator); + self.* = undefined; + } +}; + +/// A circular buffer for LZ sequences +pub const LzCircularBuffer = struct { + /// Circular buffer + buf: ArrayListUnmanaged(u8), + + /// Length of the buffer + dict_size: usize, + + /// Buffer memory limit + memlimit: usize, + + /// Current position + cursor: usize, + + /// Total number of bytes sent through the buffer + len: usize, + + const Self = @This(); + + pub fn init(dict_size: usize, memlimit: usize) Self { + return Self{ + .buf = .{}, + .dict_size = dict_size, + .memlimit = memlimit, + .cursor = 0, + .len = 0, + }; + } + + pub fn get(self: Self, index: usize) u8 { + return if (0 <= index and index < self.buf.items.len) + self.buf.items[index] + else + 0; + } + + pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void { + if (index >= self.memlimit) { + return error.CorruptInput; + } + try self.buf.ensureTotalCapacity(allocator, index + 1); + while (self.buf.items.len < index) { + self.buf.appendAssumeCapacity(0); + } + self.buf.appendAssumeCapacity(value); + } + + /// Retrieve the last byte or return a default + pub fn lastOr(self: Self, lit: u8) u8 { + return if (self.len == 0) + lit + else + self.get((self.dict_size + self.cursor - 1) % self.dict_size); + } + + /// Retrieve the n-th last byte + pub fn lastN(self: Self, dist: usize) !u8 { + if (dist > self.dict_size or dist > self.len) { + return error.CorruptInput; + } + + const offset = (self.dict_size + self.cursor - dist) % self.dict_size; + return self.get(offset); + } + + /// Append a literal + pub fn appendLiteral( + self: *Self, + allocator: Allocator, + lit: u8, + writer: anytype, + ) !void { + try self.set(allocator, self.cursor, lit); + self.cursor += 1; + self.len += 1; + + // Flush the circular buffer to the output + if (self.cursor == self.dict_size) { + try writer.writeAll(self.buf.items); + self.cursor = 0; + } + } + + /// Fetch an LZ sequence (length, distance) from inside the buffer + pub fn appendLz( + self: *Self, + allocator: Allocator, + len: usize, + dist: usize, + writer: anytype, + ) !void { + if (dist > self.dict_size or dist > self.len) { + return error.CorruptInput; + } + + var offset = (self.dict_size + self.cursor - dist) % self.dict_size; + var i: usize = 0; + while (i < len) : (i += 1) { + const x = self.get(offset); + try self.appendLiteral(allocator, x, writer); + offset += 1; + if (offset == self.dict_size) { + offset = 0; + } + } + } + + pub fn finish(self: *Self, writer: anytype) !void { + if (self.cursor > 0) { + try writer.writeAll(self.buf.items[0..self.cursor]); + } + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.buf.deinit(allocator); + self.* = undefined; + } +}; diff --git a/lib/std/compress/lzma/decode/lzma.zig b/lib/std/compress/lzma/decode/lzma.zig new file mode 100644 index 0000000000..dfa24f7e91 --- /dev/null +++ b/lib/std/compress/lzma/decode/lzma.zig @@ -0,0 +1,398 @@ +const std = @import("../../../std.zig"); +const assert = std.debug.assert; +const math = std.math; +const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; +const FixedBufferStream = std.io.FixedBufferStream; + +const LzCircularBuffer = @import("lzbuffer.zig").LzCircularBuffer; +const Options = @import("../decode.zig").Options; +const Vec2D = @import("../vec2d.zig").Vec2D; +const rangecoder = @import("rangecoder.zig"); +const BitTree = rangecoder.BitTree; +const LenDecoder = rangecoder.LenDecoder; +const RangeDecoder = rangecoder.RangeDecoder; + +const ProcessingStatus = enum { + continue_, + finished, +}; + +pub const LzmaProperties = struct { + lc: u4, + lp: u3, + pb: u3, + + fn validate(self: LzmaProperties) void { + assert(self.lc <= 8); + assert(self.lp <= 4); + assert(self.pb <= 4); + } +}; + +pub const LzmaParams = struct { + properties: LzmaProperties, + dict_size: u32, + unpacked_size: ?u64, + + pub fn readHeader(reader: anytype, options: Options) !LzmaParams { + var props = try reader.readByte(); + if (props >= 225) { + return error.CorruptInput; + } + + const lc = @intCast(u4, props % 9); + props /= 9; + const lp = @intCast(u3, props % 5); + props /= 5; + const pb = @intCast(u3, props); + + const dict_size_provided = try reader.readIntLittle(u32); + const dict_size = math.max(0x1000, dict_size_provided); + + const unpacked_size = switch (options.unpacked_size) { + .read_from_header => blk: { + const unpacked_size_provided = try reader.readIntLittle(u64); + const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; + break :blk if (marker_mandatory) + null + else + unpacked_size_provided; + }, + .read_header_but_use_provided => |x| blk: { + _ = try reader.readIntLittle(u64); + break :blk x; + }, + .use_provided => |x| x, + }; + + return LzmaParams{ + .properties = LzmaProperties{ .lc = lc, .lp = lp, .pb = pb }, + .dict_size = dict_size, + .unpacked_size = unpacked_size, + }; + } +}; + +pub const DecoderState = struct { + lzma_props: LzmaProperties, + unpacked_size: ?u64, + literal_probs: Vec2D(u16), + pos_slot_decoder: [4]BitTree(6), + align_decoder: BitTree(4), + pos_decoders: [115]u16, + is_match: [192]u16, + is_rep: [12]u16, + is_rep_g0: [12]u16, + is_rep_g1: [12]u16, + is_rep_g2: [12]u16, + is_rep_0long: [192]u16, + state: usize, + rep: [4]usize, + len_decoder: LenDecoder, + rep_len_decoder: LenDecoder, + + pub fn init( + allocator: Allocator, + lzma_props: LzmaProperties, + unpacked_size: ?u64, + ) !DecoderState { + return .{ + .lzma_props = lzma_props, + .unpacked_size = unpacked_size, + .literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }), + .pos_slot_decoder = .{.{}} ** 4, + .align_decoder = .{}, + .pos_decoders = .{0x400} ** 115, + .is_match = .{0x400} ** 192, + .is_rep = .{0x400} ** 12, + .is_rep_g0 = .{0x400} ** 12, + .is_rep_g1 = .{0x400} ** 12, + .is_rep_g2 = .{0x400} ** 12, + .is_rep_0long = .{0x400} ** 192, + .state = 0, + .rep = .{0} ** 4, + .len_decoder = .{}, + .rep_len_decoder = .{}, + }; + } + + pub fn deinit(self: *DecoderState, allocator: Allocator) void { + self.literal_probs.deinit(allocator); + self.* = undefined; + } + + pub fn resetState(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void { + new_props.validate(); + if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) { + self.literal_probs.fill(0x400); + } else { + self.literal_probs.deinit(allocator); + self.literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 }); + } + + self.lzma_props = new_props; + for (self.pos_slot_decoder) |*t| t.reset(); + self.align_decoder.reset(); + self.pos_decoders = .{0x400} ** 115; + self.is_match = .{0x400} ** 192; + self.is_rep = .{0x400} ** 12; + self.is_rep_g0 = .{0x400} ** 12; + self.is_rep_g1 = .{0x400} ** 12; + self.is_rep_g2 = .{0x400} ** 12; + self.is_rep_0long = .{0x400} ** 192; + self.state = 0; + self.rep = .{0} ** 4; + self.len_decoder.reset(); + self.rep_len_decoder.reset(); + } + + fn processNextInner( + self: *DecoderState, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + update: bool, + ) !ProcessingStatus { + const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1); + + if (!try decoder.decodeBit( + reader, + &self.is_match[(self.state << 4) + pos_state], + update, + )) { + const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update); + + if (update) { + try buffer.appendLiteral(allocator, byte, writer); + + self.state = if (self.state < 4) + 0 + else if (self.state < 10) + self.state - 3 + else + self.state - 6; + } + return .continue_; + } + + var len: usize = undefined; + if (try decoder.decodeBit(reader, &self.is_rep[self.state], update)) { + if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], update)) { + if (!try decoder.decodeBit( + reader, + &self.is_rep_0long[(self.state << 4) + pos_state], + update, + )) { + if (update) { + self.state = if (self.state < 7) 9 else 11; + const dist = self.rep[0] + 1; + try buffer.appendLz(allocator, 1, dist, writer); + } + return .continue_; + } + } else { + const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update)) + 1 + else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], update)) + 2 + else + 3; + if (update) { + const dist = self.rep[idx]; + var i = idx; + while (i > 0) : (i -= 1) { + self.rep[i] = self.rep[i - 1]; + } + self.rep[0] = dist; + } + } + + len = try self.rep_len_decoder.decode(reader, decoder, pos_state, update); + + if (update) { + self.state = if (self.state < 7) 8 else 11; + } + } else { + if (update) { + self.rep[3] = self.rep[2]; + self.rep[2] = self.rep[1]; + self.rep[1] = self.rep[0]; + } + + len = try self.len_decoder.decode(reader, decoder, pos_state, update); + + if (update) { + self.state = if (self.state < 7) 7 else 10; + } + + const rep_0 = try self.decodeDistance(reader, decoder, len, update); + + if (update) { + self.rep[0] = rep_0; + if (self.rep[0] == 0xFFFF_FFFF) { + if (decoder.isFinished()) { + return .finished; + } + return error.CorruptInput; + } + } + } + + if (update) { + len += 2; + + const dist = self.rep[0] + 1; + try buffer.appendLz(allocator, len, dist, writer); + } + + return .continue_; + } + + fn processNext( + self: *DecoderState, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + ) !ProcessingStatus { + return self.processNextInner(allocator, reader, writer, buffer, decoder, true); + } + + pub fn process( + self: *DecoderState, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + ) !void { + while (true) { + if (self.unpacked_size) |unpacked_size| { + if (buffer.len >= unpacked_size) { + break; + } + } else if (decoder.isFinished()) { + break; + } + + if (try self.processNext(allocator, reader, writer, buffer, decoder) == .finished) { + break; + } + } + + if (self.unpacked_size) |len| { + if (len != buffer.len) { + return error.CorruptInput; + } + } + } + + fn decodeLiteral( + self: *DecoderState, + reader: anytype, + buffer: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u8 { + const def_prev_byte = 0; + const prev_byte = @as(usize, buffer.lastOr(def_prev_byte)); + + var result: usize = 1; + const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + + (prev_byte >> (8 - self.lzma_props.lc)); + const probs = try self.literal_probs.getMut(lit_state); + + if (self.state >= 7) { + var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1)); + + while (result < 0x100) { + const match_bit = (match_byte >> 7) & 1; + match_byte <<= 1; + const bit = @boolToInt(try decoder.decodeBit( + reader, + &probs[((@as(usize, 1) + match_bit) << 8) + result], + update, + )); + result = (result << 1) ^ bit; + if (match_bit != bit) { + break; + } + } + } + + while (result < 0x100) { + result = (result << 1) ^ @boolToInt(try decoder.decodeBit(reader, &probs[result], update)); + } + + return @truncate(u8, result - 0x100); + } + + fn decodeDistance( + self: *DecoderState, + reader: anytype, + decoder: *RangeDecoder, + length: usize, + update: bool, + ) !usize { + const len_state = if (length > 3) 3 else length; + + const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder, update)); + if (pos_slot < 4) + return pos_slot; + + const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1); + var result = (2 ^ (pos_slot & 1)) << num_direct_bits; + + if (pos_slot < 14) { + result += try decoder.parseReverseBitTree( + reader, + num_direct_bits, + &self.pos_decoders, + result - pos_slot, + update, + ); + } else { + result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4; + result += try self.align_decoder.parseReverse(reader, decoder, update); + } + + return result; + } +}; + +pub const LzmaDecoder = struct { + params: LzmaParams, + memlimit: usize, + state: DecoderState, + + pub fn init(allocator: Allocator, params: LzmaParams, memlimit: ?usize) !LzmaDecoder { + return LzmaDecoder{ + .params = params, + .memlimit = memlimit orelse math.maxInt(usize), + .state = try DecoderState.init(allocator, params.properties, params.unpacked_size), + }; + } + + pub fn deinit(self: *LzmaDecoder, allocator: Allocator) void { + self.state.deinit(allocator); + self.* = undefined; + } + + pub fn decompress( + self: *LzmaDecoder, + allocator: Allocator, + reader: anytype, + writer: anytype, + ) !void { + var buffer = LzCircularBuffer.init(self.params.dict_size, self.memlimit); + defer buffer.deinit(allocator); + + var decoder = try RangeDecoder.init(reader); + try self.state.process(allocator, reader, writer, &buffer, &decoder); + try buffer.finish(writer); + } +}; diff --git a/lib/std/compress/lzma/decode/lzma2.zig b/lib/std/compress/lzma/decode/lzma2.zig new file mode 100644 index 0000000000..cc778b9379 --- /dev/null +++ b/lib/std/compress/lzma/decode/lzma2.zig @@ -0,0 +1,169 @@ +const std = @import("../../../std.zig"); +const Allocator = std.mem.Allocator; + +const lzma = @import("lzma.zig"); +const DecoderState = lzma.DecoderState; +const LzmaProperties = lzma.LzmaProperties; +const LzAccumBuffer = @import("lzbuffer.zig").LzAccumBuffer; +const RangeDecoder = @import("rangecoder.zig").RangeDecoder; + +pub const Lzma2Decoder = struct { + lzma_state: DecoderState, + + pub fn init(allocator: Allocator) !Lzma2Decoder { + return Lzma2Decoder{ + .lzma_state = try DecoderState.init( + allocator, + LzmaProperties{ + .lc = 0, + .lp = 0, + .pb = 0, + }, + null, + ), + }; + } + + pub fn deinit(self: *Lzma2Decoder, allocator: Allocator) void { + self.lzma_state.deinit(allocator); + self.* = undefined; + } + + pub fn decompress( + self: *Lzma2Decoder, + allocator: Allocator, + reader: anytype, + writer: anytype, + ) !void { + var accum = LzAccumBuffer.init(std.math.maxInt(usize)); + defer accum.deinit(allocator); + + while (true) { + const status = try reader.readByte(); + + switch (status) { + 0 => break, + 1 => try parseUncompressed(allocator, reader, writer, &accum, true), + 2 => try parseUncompressed(allocator, reader, writer, &accum, false), + else => try self.parseLzma(allocator, reader, writer, &accum, status), + } + } + + try accum.finish(writer); + } + + fn parseLzma( + self: *Lzma2Decoder, + allocator: Allocator, + reader: anytype, + writer: anytype, + accum: *LzAccumBuffer, + status: u8, + ) !void { + if (status & 0x80 == 0) { + return error.CorruptInput; + } + + const Reset = struct { + dict: bool, + state: bool, + props: bool, + }; + + const reset = switch ((status >> 5) & 0x3) { + 0 => Reset{ + .dict = false, + .state = false, + .props = false, + }, + 1 => Reset{ + .dict = false, + .state = true, + .props = false, + }, + 2 => Reset{ + .dict = false, + .state = true, + .props = true, + }, + 3 => Reset{ + .dict = true, + .state = true, + .props = true, + }, + else => unreachable, + }; + + const unpacked_size = blk: { + var tmp: u64 = status & 0x1F; + tmp <<= 16; + tmp |= try reader.readIntBig(u16); + break :blk tmp + 1; + }; + + const packed_size = blk: { + const tmp: u17 = try reader.readIntBig(u16); + break :blk tmp + 1; + }; + + if (reset.dict) { + try accum.reset(writer); + } + + if (reset.state) { + var new_props = self.lzma_state.lzma_props; + + if (reset.props) { + var props = try reader.readByte(); + if (props >= 225) { + return error.CorruptInput; + } + + const lc = @intCast(u4, props % 9); + props /= 9; + const lp = @intCast(u3, props % 5); + props /= 5; + const pb = @intCast(u3, props); + + if (lc + lp > 4) { + return error.CorruptInput; + } + + new_props = LzmaProperties{ .lc = lc, .lp = lp, .pb = pb }; + } + + try self.lzma_state.resetState(allocator, new_props); + } + + self.lzma_state.unpacked_size = unpacked_size + accum.len; + + var counter = std.io.countingReader(reader); + const counter_reader = counter.reader(); + + var rangecoder = try RangeDecoder.init(counter_reader); + try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder); + + if (counter.bytes_read != packed_size) { + return error.CorruptInput; + } + } + + fn parseUncompressed( + allocator: Allocator, + reader: anytype, + writer: anytype, + accum: *LzAccumBuffer, + reset_dict: bool, + ) !void { + const unpacked_size = try reader.readIntBig(u16) + 1; // TODO: overflow + + if (reset_dict) { + try accum.reset(writer); + } + + var i: @TypeOf(unpacked_size) = 0; + while (i < unpacked_size) : (i += 1) { + try accum.appendByte(allocator, try reader.readByte()); + } + } +}; diff --git a/lib/std/compress/lzma/decode/rangecoder.zig b/lib/std/compress/lzma/decode/rangecoder.zig new file mode 100644 index 0000000000..dc85558e3e --- /dev/null +++ b/lib/std/compress/lzma/decode/rangecoder.zig @@ -0,0 +1,184 @@ +const std = @import("../../../std.zig"); +const mem = std.mem; +const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; +const FixedBufferStream = std.io.FixedBufferStream; + +pub const RangeDecoder = struct { + range: u32, + code: u32, + + pub fn init(reader: anytype) !RangeDecoder { + const reserved = try reader.readByte(); + if (reserved != 0) { + return error.CorruptInput; + } + return RangeDecoder{ + .range = 0xFFFF_FFFF, + .code = try reader.readIntBig(u32), + }; + } + + pub fn fromParts( + range: u32, + code: u32, + ) RangeDecoder { + return .{ + .range = range, + .code = code, + }; + } + + pub fn set(self: *RangeDecoder, range: u32, code: u32) void { + self.range = range; + self.code = code; + } + + pub inline fn isFinished(self: RangeDecoder) bool { + return self.code == 0; + } + + inline fn normalize(self: *RangeDecoder, reader: anytype) !void { + if (self.range < 0x0100_0000) { + self.range <<= 8; + self.code = (self.code << 8) ^ @as(u32, try reader.readByte()); + } + } + + inline fn getBit(self: *RangeDecoder, reader: anytype) !bool { + self.range >>= 1; + + const bit = self.code >= self.range; + if (bit) + self.code -= self.range; + + try self.normalize(reader); + return bit; + } + + pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 { + var result: u32 = 0; + var i: usize = 0; + while (i < count) : (i += 1) + result = (result << 1) ^ @boolToInt(try self.getBit(reader)); + return result; + } + + pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool { + const bound = (self.range >> 11) * prob.*; + + if (self.code < bound) { + if (update) + prob.* += (0x800 - prob.*) >> 5; + self.range = bound; + + try self.normalize(reader); + return false; + } else { + if (update) + prob.* -= prob.* >> 5; + self.code -= bound; + self.range -= bound; + + try self.normalize(reader); + return true; + } + } + + fn parseBitTree( + self: *RangeDecoder, + reader: anytype, + num_bits: u5, + probs: []u16, + update: bool, + ) !u32 { + var tmp: u32 = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = try self.decodeBit(reader, &probs[tmp], update); + tmp = (tmp << 1) ^ @boolToInt(bit); + } + return tmp - (@as(u32, 1) << num_bits); + } + + pub fn parseReverseBitTree( + self: *RangeDecoder, + reader: anytype, + num_bits: u5, + probs: []u16, + offset: usize, + update: bool, + ) !u32 { + var result: u32 = 0; + var tmp: usize = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = @boolToInt(try self.decodeBit(reader, &probs[offset + tmp], update)); + tmp = (tmp << 1) ^ bit; + result ^= @as(u32, bit) << i; + } + return result; + } +}; + +pub fn BitTree(comptime num_bits: usize) type { + return struct { + probs: [1 << num_bits]u16 = .{0x400} ** (1 << num_bits), + + const Self = @This(); + + pub fn parse( + self: *Self, + reader: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseBitTree(reader, num_bits, &self.probs, update); + } + + pub fn parseReverse( + self: *Self, + reader: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update); + } + + pub fn reset(self: *Self) void { + mem.set(u16, &self.probs, 0x400); + } + }; +} + +pub const LenDecoder = struct { + choice: u16 = 0x400, + choice2: u16 = 0x400, + low_coder: [16]BitTree(3) = .{.{}} ** 16, + mid_coder: [16]BitTree(3) = .{.{}} ** 16, + high_coder: BitTree(8) = .{}, + + pub fn decode( + self: *LenDecoder, + reader: anytype, + decoder: *RangeDecoder, + pos_state: usize, + update: bool, + ) !usize { + if (!try decoder.decodeBit(reader, &self.choice, update)) { + return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update)); + } else if (!try decoder.decodeBit(reader, &self.choice2, update)) { + return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8; + } else { + return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16; + } + } + + pub fn reset(self: *LenDecoder) void { + self.choice = 0x400; + self.choice2 = 0x400; + for (self.low_coder) |*t| t.reset(); + for (self.mid_coder) |*t| t.reset(); + self.high_coder.reset(); + } +}; diff --git a/lib/std/compress/lzma/lzma2_test.zig b/lib/std/compress/lzma/lzma2_test.zig new file mode 100644 index 0000000000..47ebe2d031 --- /dev/null +++ b/lib/std/compress/lzma/lzma2_test.zig @@ -0,0 +1,27 @@ +const std = @import("../../std.zig"); +const lzma = @import("../lzma.zig"); + +fn testDecompress(compressed: []const u8, writer: anytype) !void { + const allocator = std.testing.allocator; + var stream = std.io.fixedBufferStream(compressed); + try lzma.lzma2Decompress(allocator, stream.reader(), writer); +} + +fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void { + const allocator = std.testing.allocator; + var decomp = std.ArrayList(u8).init(allocator); + defer decomp.deinit(); + try testDecompress(compressed, decomp.writer()); + try std.testing.expectEqualSlices(u8, expected, decomp.items); +} + +fn testDecompressError(expected: anyerror, compressed: []const u8) !void { + return std.testing.expectError(expected, testDecompress(compressed, std.io.null_writer)); +} + +test { + try testDecompressEqual( + "Hello\nWorld!\n", + &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 }, + ); +} diff --git a/lib/std/compress/lzma/lzma_test.zig b/lib/std/compress/lzma/lzma_test.zig new file mode 100644 index 0000000000..2147ca62c5 --- /dev/null +++ b/lib/std/compress/lzma/lzma_test.zig @@ -0,0 +1,87 @@ +const std = @import("../../std.zig"); +const lzma = @import("../lzma.zig"); + +fn testDecompress(compressed: []const u8, writer: anytype) !void { + const allocator = std.testing.allocator; + var stream = std.io.fixedBufferStream(compressed); + try lzma.lzmaDecompress(allocator, stream.reader(), writer, .{}); +} + +fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void { + const allocator = std.testing.allocator; + var decomp = std.ArrayList(u8).init(allocator); + defer decomp.deinit(); + try testDecompress(compressed, decomp.writer()); + try std.testing.expectEqualSlices(u8, expected, decomp.items); +} + +fn testDecompressError(expected: anyerror, compressed: []const u8) !void { + return std.testing.expectError(expected, testDecompress(compressed, std.io.null_writer)); +} + +test "decompress empty world" { + try testDecompressEqual( + "", + &[_]u8{ + 0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x83, 0xff, + 0xfb, 0xff, 0xff, 0xc0, 0x00, 0x00, 0x00, + }, + ); +} + +test "decompress hello world" { + try testDecompressEqual( + "Hello world\n", + &[_]u8{ + 0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19, + 0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe, + 0xa5, 0xb0, 0x00, + }, + ); +} + +test "decompress huge dict" { + try testDecompressEqual( + "Hello world\n", + &[_]u8{ + 0x5d, 0x7f, 0x7f, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19, + 0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe, + 0xa5, 0xb0, 0x00, + }, + ); +} + +test "unknown size with end of payload marker" { + try testDecompressEqual( + "Hello\nWorld!\n", + @embedFile("testdata/good-unknown_size-with_eopm.lzma"), + ); +} + +test "known size without end of payload marker" { + try testDecompressEqual( + "Hello\nWorld!\n", + @embedFile("testdata/good-known_size-without_eopm.lzma"), + ); +} + +test "known size with end of payload marker" { + try testDecompressEqual( + "Hello\nWorld!\n", + @embedFile("testdata/good-known_size-with_eopm.lzma"), + ); +} + +test "too big uncompressed size in header" { + try testDecompressError( + error.CorruptInput, + @embedFile("testdata/bad-too_big_size-with_eopm.lzma"), + ); +} + +test "too small uncompressed size in header" { + try testDecompressError( + error.CorruptInput, + @embedFile("testdata/bad-too_small_size-without_eopm-3.lzma"), + ); +} diff --git a/lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma b/lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma new file mode 100644 index 0000000000000000000000000000000000000000..b7cd3b05fcead4afd5d75732ab4ef2ad21dc41a9 GIT binary patch literal 37 ncma!L5MW^7V}Jq`NzWPitfK1byk!*^v@*m+|Nk%j!N33jV+IIi literal 0 HcmV?d00001 diff --git a/lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma b/lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma new file mode 100644 index 0000000000000000000000000000000000000000..67a1af34570be3920d7e5184ef040efdfa3015e2 GIT binary patch literal 36 mcma!L5MW^7WPkz{NzWPitfGetG}y&2mi{-rGQ-_3fdK$zQ3!GX literal 0 HcmV?d00001 diff --git a/lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma b/lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma new file mode 100644 index 0000000000000000000000000000000000000000..1b45307930a582815373a9aa7ba4eda42cfa5fba GIT binary patch literal 37 ncma!L5MW^7Wq<+|NzWPitfK1byk!*^v@*m+|Nk%j!N33jV&({B literal 0 HcmV?d00001 diff --git a/lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma b/lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma new file mode 100644 index 0000000000000000000000000000000000000000..83623fde97dbd6e3f939533875e874c1b6a1353d GIT binary patch literal 31 gcma!L5MW^7Wq<+|NzWPitfK1byk!*^G-VYS05o?5p#T5? literal 0 HcmV?d00001 diff --git a/lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma b/lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma new file mode 100644 index 0000000000000000000000000000000000000000..0f4ff822e94e7314aba944420b0480a5652db340 GIT binary patch literal 37 ncma!L5MW^V4+RV=lAbg2Sw+>=dCMv;Xl00t{{LV4gMk46L~#&L literal 0 HcmV?d00001 diff --git a/lib/std/compress/lzma/vec2d.zig b/lib/std/compress/lzma/vec2d.zig new file mode 100644 index 0000000000..1372d3592c --- /dev/null +++ b/lib/std/compress/lzma/vec2d.zig @@ -0,0 +1,128 @@ +const std = @import("../../std.zig"); +const math = std.math; +const mem = std.mem; +const Allocator = std.mem.Allocator; + +pub fn Vec2D(comptime T: type) type { + return struct { + data: []T, + cols: usize, + + const Self = @This(); + + pub fn init(allocator: Allocator, value: T, size: struct { usize, usize }) !Self { + const len = try math.mul(usize, size[0], size[1]); + const data = try allocator.alloc(T, len); + mem.set(T, data, value); + return Self{ + .data = data, + .cols = size[1], + }; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + allocator.free(self.data); + self.* = undefined; + } + + pub fn fill(self: *Self, value: T) void { + mem.set(T, self.data, value); + } + + inline fn _get(self: Self, row: usize) ![]T { + const start_row = try math.mul(usize, row, self.cols); + const end_row = try math.add(usize, start_row, self.cols); + return self.data[start_row..end_row]; + } + + pub fn get(self: Self, row: usize) ![]const T { + return self._get(row); + } + + pub fn getMut(self: *Self, row: usize) ![]T { + return self._get(row); + } + }; +} + +const testing = std.testing; +const expectEqualSlices = std.testing.expectEqualSlices; +const expectError = std.testing.expectError; + +test "Vec2D.init" { + const allocator = testing.allocator; + var vec2d = try Vec2D(i32).init(allocator, 1, .{ 2, 3 }); + defer vec2d.deinit(allocator); + + try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(0)); + try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(1)); +} + +test "Vec2D.init overflow" { + const allocator = testing.allocator; + try expectError( + error.Overflow, + Vec2D(i32).init(allocator, 1, .{ math.maxInt(usize), math.maxInt(usize) }), + ); +} + +test "Vec2D.fill" { + const allocator = testing.allocator; + var vec2d = try Vec2D(i32).init(allocator, 0, .{ 2, 3 }); + defer vec2d.deinit(allocator); + + vec2d.fill(7); + + try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(0)); + try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(1)); +} + +test "Vec2D.get" { + var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 }; + const vec2d = Vec2D(i32){ + .data = &data, + .cols = 2, + }; + + try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0)); + try expectEqualSlices(i32, &.{ 2, 3 }, try vec2d.get(1)); + try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2)); + try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3)); +} + +test "Vec2D.getMut" { + var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 }; + var vec2d = Vec2D(i32){ + .data = &data, + .cols = 2, + }; + + const row = try vec2d.getMut(1); + row[1] = 9; + + try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0)); + // (1, 1) should be 9. + try expectEqualSlices(i32, &.{ 2, 9 }, try vec2d.get(1)); + try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2)); + try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3)); +} + +test "Vec2D.get multiplication overflow" { + const allocator = testing.allocator; + var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 4 }); + defer matrix.deinit(allocator); + + const row = (math.maxInt(usize) / 4) + 1; + try expectError(error.Overflow, matrix.get(row)); + try expectError(error.Overflow, matrix.getMut(row)); +} + +test "Vec2D.get addition overflow" { + const allocator = testing.allocator; + var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 5 }); + defer matrix.deinit(allocator); + + const row = math.maxInt(usize) / 5; + try expectError(error.Overflow, matrix.get(row)); + try expectError(error.Overflow, matrix.getMut(row)); +} diff --git a/lib/std/compress/xz/block.zig b/lib/std/compress/xz/block.zig index 1b909beaf4..47ba9f7463 100644 --- a/lib/std/compress/xz/block.zig +++ b/lib/std/compress/xz/block.zig @@ -1,6 +1,7 @@ const std = @import("../../std.zig"); -const lzma = @import("lzma.zig"); +const lzma = std.compress.lzma; const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; const Crc32 = std.hash.Crc32; const Crc64 = std.hash.crc.Crc64Xz; const Sha256 = std.crypto.hash.sha2.Sha256; @@ -32,8 +33,7 @@ pub fn Decoder(comptime ReaderType: type) type { inner_reader: ReaderType, check: xz.Check, err: ?Error, - accum: lzma.LzAccumBuffer, - lzma_state: lzma.DecoderState, + to_read: ArrayListUnmanaged(u8), block_count: usize, fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self { @@ -42,15 +42,13 @@ pub fn Decoder(comptime ReaderType: type) type { .inner_reader = in_reader, .check = check, .err = null, - .accum = .{}, - .lzma_state = try lzma.DecoderState.init(allocator), + .to_read = .{}, .block_count = 0, }; } pub fn deinit(self: *Self) void { - self.accum.deinit(self.allocator); - self.lzma_state.deinit(self.allocator); + self.to_read.deinit(self.allocator); } pub fn reader(self: *Self) Reader { @@ -59,9 +57,13 @@ pub fn Decoder(comptime ReaderType: type) type { pub fn read(self: *Self, output: []u8) Error!usize { while (true) { - if (self.accum.to_read.items.len > 0) { - const n = self.accum.read(output); - if (self.accum.to_read.items.len == 0 and self.err != null) { + if (self.to_read.items.len > 0) { + const input = self.to_read.items; + const n = std.math.min(input.len, output.len); + std.mem.copy(u8, output[0..n], input[0..n]); + std.mem.copy(u8, input, input[n..]); + self.to_read.shrinkRetainingCapacity(input.len - n); + if (self.to_read.items.len == 0 and self.err != null) { if (self.err.? == DecodeError.EndOfStreamWithNoError) { return n; } @@ -77,15 +79,12 @@ pub fn Decoder(comptime ReaderType: type) type { } self.readBlock() catch |e| { self.err = e; - if (self.accum.to_read.items.len == 0) { - try self.accum.reset(self.allocator); - } }; } } fn readBlock(self: *Self) Error!void { - const unpacked_pos = self.accum.to_read.items.len; + const unpacked_pos = self.to_read.items.len; var block_counter = std.io.countingReader(self.inner_reader); const block_reader = block_counter.reader(); @@ -156,15 +155,18 @@ pub fn Decoder(comptime ReaderType: type) type { // Compressed Data var packed_counter = std.io.countingReader(block_reader); - const packed_reader = packed_counter.reader(); - while (try self.readLzma2Chunk(packed_reader)) {} + try lzma.lzma2Decompress( + self.allocator, + packed_counter.reader(), + self.to_read.writer(self.allocator), + ); if (packed_size) |s| { if (s != packed_counter.bytes_read) return error.CorruptInput; } - const unpacked_bytes = self.accum.to_read.items[unpacked_pos..]; + const unpacked_bytes = self.to_read.items[unpacked_pos..]; if (unpacked_size) |s| { if (s != unpacked_bytes.len) return error.CorruptInput; @@ -205,113 +207,5 @@ pub fn Decoder(comptime ReaderType: type) type { self.block_count += 1; } - - fn readLzma2Chunk(self: *Self, packed_reader: anytype) Error!bool { - const status = try packed_reader.readByte(); - switch (status) { - 0 => { - try self.accum.reset(self.allocator); - return false; - }, - 1, 2 => { - if (status == 1) - try self.accum.reset(self.allocator); - - const size = try packed_reader.readIntBig(u16) + 1; - try self.accum.ensureUnusedCapacity(self.allocator, size); - - var i: usize = 0; - while (i < size) : (i += 1) - self.accum.appendAssumeCapacity(try packed_reader.readByte()); - - return true; - }, - else => { - if (status & 0x80 == 0) - return error.CorruptInput; - - const Reset = struct { - dict: bool, - state: bool, - props: bool, - }; - - const reset = switch ((status >> 5) & 0x3) { - 0 => Reset{ - .dict = false, - .state = false, - .props = false, - }, - 1 => Reset{ - .dict = false, - .state = true, - .props = false, - }, - 2 => Reset{ - .dict = false, - .state = true, - .props = true, - }, - 3 => Reset{ - .dict = true, - .state = true, - .props = true, - }, - else => unreachable, - }; - - const unpacked_size = blk: { - var tmp: u64 = status & 0x1F; - tmp <<= 16; - tmp |= try packed_reader.readIntBig(u16); - break :blk tmp + 1; - }; - - const packed_size = blk: { - const tmp: u17 = try packed_reader.readIntBig(u16); - break :blk tmp + 1; - }; - - if (reset.dict) - try self.accum.reset(self.allocator); - - if (reset.state) { - var new_props = self.lzma_state.lzma_props; - - if (reset.props) { - var props = try packed_reader.readByte(); - if (props >= 225) - return error.CorruptInput; - - const lc = @intCast(u4, props % 9); - props /= 9; - const lp = @intCast(u3, props % 5); - props /= 5; - const pb = @intCast(u3, props); - - if (lc + lp > 4) - return error.CorruptInput; - - new_props = .{ .lc = lc, .lp = lp, .pb = pb }; - } - - try self.lzma_state.reset_state(self.allocator, new_props); - } - - self.lzma_state.unpacked_size = unpacked_size + self.accum.len(); - - const buffer = try self.allocator.alloc(u8, packed_size); - defer self.allocator.free(buffer); - - for (buffer) |*b| - b.* = try packed_reader.readByte(); - - var rangecoder = try lzma.RangeDecoder.init(buffer); - try self.lzma_state.process(self.allocator, &self.accum, &rangecoder); - - return true; - }, - } - } }; } diff --git a/lib/std/compress/xz/lzma.zig b/lib/std/compress/xz/lzma.zig deleted file mode 100644 index 9fe941e2b1..0000000000 --- a/lib/std/compress/xz/lzma.zig +++ /dev/null @@ -1,658 +0,0 @@ -// Ported from https://github.com/gendx/lzma-rs - -const std = @import("../../std.zig"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; -const ArrayListUnmanaged = std.ArrayListUnmanaged; - -const LzmaProperties = struct { - lc: u4, - lp: u3, - pb: u3, - - fn validate(self: LzmaProperties) void { - assert(self.lc <= 8); - assert(self.lp <= 4); - assert(self.pb <= 4); - } -}; - -pub const DecoderState = struct { - lzma_props: LzmaProperties, - unpacked_size: ?u64, - literal_probs: Vec2D(u16), - pos_slot_decoder: [4]BitTree, - align_decoder: BitTree, - pos_decoders: [115]u16, - is_match: [192]u16, - is_rep: [12]u16, - is_rep_g0: [12]u16, - is_rep_g1: [12]u16, - is_rep_g2: [12]u16, - is_rep_0long: [192]u16, - state: usize, - rep: [4]usize, - len_decoder: LenDecoder, - rep_len_decoder: LenDecoder, - - pub fn init(allocator: Allocator) !DecoderState { - return .{ - .lzma_props = LzmaProperties{ .lc = 0, .lp = 0, .pb = 0 }, - .unpacked_size = null, - .literal_probs = try Vec2D(u16).init(allocator, 0x400, 1, 0x300), - .pos_slot_decoder = .{ - try BitTree.init(allocator, 6), - try BitTree.init(allocator, 6), - try BitTree.init(allocator, 6), - try BitTree.init(allocator, 6), - }, - .align_decoder = try BitTree.init(allocator, 4), - .pos_decoders = .{0x400} ** 115, - .is_match = .{0x400} ** 192, - .is_rep = .{0x400} ** 12, - .is_rep_g0 = .{0x400} ** 12, - .is_rep_g1 = .{0x400} ** 12, - .is_rep_g2 = .{0x400} ** 12, - .is_rep_0long = .{0x400} ** 192, - .state = 0, - .rep = .{0} ** 4, - .len_decoder = try LenDecoder.init(allocator), - .rep_len_decoder = try LenDecoder.init(allocator), - }; - } - - pub fn deinit(self: *DecoderState, allocator: Allocator) void { - self.literal_probs.deinit(allocator); - for (self.pos_slot_decoder) |*t| t.deinit(allocator); - self.align_decoder.deinit(allocator); - self.len_decoder.deinit(allocator); - self.rep_len_decoder.deinit(allocator); - } - - pub fn reset_state(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void { - new_props.validate(); - if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) { - self.literal_probs.fill(0x400); - } else { - self.literal_probs.deinit(allocator); - self.literal_probs = try Vec2D(u16).init(allocator, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300); - } - - self.lzma_props = new_props; - for (self.pos_slot_decoder) |*t| t.reset(); - self.align_decoder.reset(); - self.pos_decoders = .{0x400} ** 115; - self.is_match = .{0x400} ** 192; - self.is_rep = .{0x400} ** 12; - self.is_rep_g0 = .{0x400} ** 12; - self.is_rep_g1 = .{0x400} ** 12; - self.is_rep_g2 = .{0x400} ** 12; - self.is_rep_0long = .{0x400} ** 192; - self.state = 0; - self.rep = .{0} ** 4; - self.len_decoder.reset(); - self.rep_len_decoder.reset(); - } - - fn processNextInner( - self: *DecoderState, - allocator: Allocator, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - update: bool, - ) !ProcessingStatus { - const pos_state = output.len() & ((@as(usize, 1) << self.lzma_props.pb) - 1); - - if (!try rangecoder.decodeBit( - &self.is_match[(self.state << 4) + pos_state], - update, - )) { - const byte: u8 = try self.decodeLiteral(output, rangecoder, update); - - if (update) { - try output.appendLiteral(allocator, byte); - - self.state = if (self.state < 4) - 0 - else if (self.state < 10) - self.state - 3 - else - self.state - 6; - } - return .continue_; - } - - var len: usize = undefined; - if (try rangecoder.decodeBit(&self.is_rep[self.state], update)) { - if (!try rangecoder.decodeBit(&self.is_rep_g0[self.state], update)) { - if (!try rangecoder.decodeBit( - &self.is_rep_0long[(self.state << 4) + pos_state], - update, - )) { - if (update) { - self.state = if (self.state < 7) 9 else 11; - const dist = self.rep[0] + 1; - try output.appendLz(allocator, 1, dist); - } - return .continue_; - } - } else { - const idx: usize = if (!try rangecoder.decodeBit(&self.is_rep_g1[self.state], update)) - 1 - else if (!try rangecoder.decodeBit(&self.is_rep_g2[self.state], update)) - 2 - else - 3; - if (update) { - const dist = self.rep[idx]; - var i = idx; - while (i > 0) : (i -= 1) { - self.rep[i] = self.rep[i - 1]; - } - self.rep[0] = dist; - } - } - - len = try self.rep_len_decoder.decode(rangecoder, pos_state, update); - - if (update) { - self.state = if (self.state < 7) 8 else 11; - } - } else { - if (update) { - self.rep[3] = self.rep[2]; - self.rep[2] = self.rep[1]; - self.rep[1] = self.rep[0]; - } - - len = try self.len_decoder.decode(rangecoder, pos_state, update); - - if (update) { - self.state = if (self.state < 7) 7 else 10; - } - - const rep_0 = try self.decodeDistance(rangecoder, len, update); - - if (update) { - self.rep[0] = rep_0; - if (self.rep[0] == 0xFFFF_FFFF) { - if (rangecoder.isFinished()) { - return .finished; - } - return error.CorruptInput; - } - } - } - - if (update) { - len += 2; - - const dist = self.rep[0] + 1; - try output.appendLz(allocator, len, dist); - } - - return .continue_; - } - - fn processNext( - self: *DecoderState, - allocator: Allocator, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - ) !ProcessingStatus { - return self.processNextInner(allocator, output, rangecoder, true); - } - - pub fn process( - self: *DecoderState, - allocator: Allocator, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - ) !void { - while (true) { - if (self.unpacked_size) |unpacked_size| { - if (output.len() >= unpacked_size) { - break; - } - } else if (rangecoder.isFinished()) { - break; - } - - if (try self.processNext(allocator, output, rangecoder) == .finished) { - break; - } - } - - if (self.unpacked_size) |len| { - if (len != output.len()) { - return error.CorruptInput; - } - } - } - - fn decodeLiteral( - self: *DecoderState, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - update: bool, - ) !u8 { - const def_prev_byte = 0; - const prev_byte = @as(usize, output.lastOr(def_prev_byte)); - - var result: usize = 1; - const lit_state = ((output.len() & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + - (prev_byte >> (8 - self.lzma_props.lc)); - const probs = try self.literal_probs.get(lit_state); - - if (self.state >= 7) { - var match_byte = @as(usize, try output.lastN(self.rep[0] + 1)); - - while (result < 0x100) { - const match_bit = (match_byte >> 7) & 1; - match_byte <<= 1; - const bit = @boolToInt(try rangecoder.decodeBit( - &probs[((@as(usize, 1) + match_bit) << 8) + result], - update, - )); - result = (result << 1) ^ bit; - if (match_bit != bit) { - break; - } - } - } - - while (result < 0x100) { - result = (result << 1) ^ @boolToInt(try rangecoder.decodeBit(&probs[result], update)); - } - - return @truncate(u8, result - 0x100); - } - - fn decodeDistance( - self: *DecoderState, - rangecoder: *RangeDecoder, - length: usize, - update: bool, - ) !usize { - const len_state = if (length > 3) 3 else length; - - const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(rangecoder, update)); - if (pos_slot < 4) - return pos_slot; - - const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1); - var result = (2 ^ (pos_slot & 1)) << num_direct_bits; - - if (pos_slot < 14) { - result += try rangecoder.parseReverseBitTree( - num_direct_bits, - &self.pos_decoders, - result - pos_slot, - update, - ); - } else { - result += @as(usize, try rangecoder.get(num_direct_bits - 4)) << 4; - result += try self.align_decoder.parseReverse(rangecoder, update); - } - - return result; - } -}; - -const ProcessingStatus = enum { - continue_, - finished, -}; - -pub const LzAccumBuffer = struct { - to_read: ArrayListUnmanaged(u8) = .{}, - buf: ArrayListUnmanaged(u8) = .{}, - - pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void { - self.to_read.deinit(allocator); - self.buf.deinit(allocator); - } - - pub fn read(self: *LzAccumBuffer, output: []u8) usize { - const input = self.to_read.items; - const n = std.math.min(input.len, output.len); - std.mem.copy(u8, output[0..n], input[0..n]); - std.mem.copy(u8, input, input[n..]); - self.to_read.shrinkRetainingCapacity(input.len - n); - return n; - } - - pub fn ensureUnusedCapacity( - self: *LzAccumBuffer, - allocator: Allocator, - additional_count: usize, - ) !void { - try self.buf.ensureUnusedCapacity(allocator, additional_count); - } - - pub fn appendAssumeCapacity(self: *LzAccumBuffer, byte: u8) void { - self.buf.appendAssumeCapacity(byte); - } - - pub fn reset(self: *LzAccumBuffer, allocator: Allocator) !void { - try self.to_read.appendSlice(allocator, self.buf.items); - self.buf.clearRetainingCapacity(); - } - - pub fn len(self: *const LzAccumBuffer) usize { - return self.buf.items.len; - } - - pub fn lastOr(self: *const LzAccumBuffer, lit: u8) u8 { - const buf_len = self.buf.items.len; - return if (buf_len == 0) - lit - else - self.buf.items[buf_len - 1]; - } - - pub fn lastN(self: *const LzAccumBuffer, dist: usize) !u8 { - const buf_len = self.buf.items.len; - if (dist > buf_len) { - return error.CorruptInput; - } - - return self.buf.items[buf_len - dist]; - } - - pub fn appendLiteral(self: *LzAccumBuffer, allocator: Allocator, lit: u8) !void { - try self.buf.append(allocator, lit); - } - - pub fn appendLz(self: *LzAccumBuffer, allocator: Allocator, length: usize, dist: usize) !void { - const buf_len = self.buf.items.len; - if (dist > buf_len) { - return error.CorruptInput; - } - - var offset = buf_len - dist; - var i: usize = 0; - while (i < length) : (i += 1) { - const x = self.buf.items[offset]; - try self.buf.append(allocator, x); - offset += 1; - } - } -}; - -pub const RangeDecoder = struct { - stream: std.io.FixedBufferStream([]const u8), - range: u32, - code: u32, - - pub fn init(buffer: []const u8) !RangeDecoder { - var dec = RangeDecoder{ - .stream = std.io.fixedBufferStream(buffer), - .range = 0xFFFF_FFFF, - .code = 0, - }; - const reader = dec.stream.reader(); - _ = try reader.readByte(); - dec.code = try reader.readIntBig(u32); - return dec; - } - - pub fn fromParts( - buffer: []const u8, - range: u32, - code: u32, - ) RangeDecoder { - return .{ - .stream = std.io.fixedBufferStream(buffer), - .range = range, - .code = code, - }; - } - - pub fn set(self: *RangeDecoder, range: u32, code: u32) void { - self.range = range; - self.code = code; - } - - pub fn readInto(self: *RangeDecoder, dest: []u8) !usize { - return self.stream.read(dest); - } - - pub inline fn isFinished(self: *const RangeDecoder) bool { - return self.code == 0 and self.isEof(); - } - - pub inline fn isEof(self: *const RangeDecoder) bool { - return self.stream.pos == self.stream.buffer.len; - } - - inline fn normalize(self: *RangeDecoder) !void { - if (self.range < 0x0100_0000) { - self.range <<= 8; - self.code = (self.code << 8) ^ @as(u32, try self.stream.reader().readByte()); - } - } - - inline fn getBit(self: *RangeDecoder) !bool { - self.range >>= 1; - - const bit = self.code >= self.range; - if (bit) - self.code -= self.range; - - try self.normalize(); - return bit; - } - - fn get(self: *RangeDecoder, count: usize) !u32 { - var result: u32 = 0; - var i: usize = 0; - while (i < count) : (i += 1) - result = (result << 1) ^ @boolToInt(try self.getBit()); - return result; - } - - pub inline fn decodeBit(self: *RangeDecoder, prob: *u16, update: bool) !bool { - const bound = (self.range >> 11) * prob.*; - - if (self.code < bound) { - if (update) - prob.* += (0x800 - prob.*) >> 5; - self.range = bound; - - try self.normalize(); - return false; - } else { - if (update) - prob.* -= prob.* >> 5; - self.code -= bound; - self.range -= bound; - - try self.normalize(); - return true; - } - } - - fn parseBitTree( - self: *RangeDecoder, - num_bits: u5, - probs: []u16, - update: bool, - ) !u32 { - var tmp: u32 = 1; - var i: u5 = 0; - while (i < num_bits) : (i += 1) { - const bit = try self.decodeBit(&probs[tmp], update); - tmp = (tmp << 1) ^ @boolToInt(bit); - } - return tmp - (@as(u32, 1) << num_bits); - } - - pub fn parseReverseBitTree( - self: *RangeDecoder, - num_bits: u5, - probs: []u16, - offset: usize, - update: bool, - ) !u32 { - var result: u32 = 0; - var tmp: usize = 1; - var i: u5 = 0; - while (i < num_bits) : (i += 1) { - const bit = @boolToInt(try self.decodeBit(&probs[offset + tmp], update)); - tmp = (tmp << 1) ^ bit; - result ^= @as(u32, bit) << i; - } - return result; - } -}; - -fn Vec2D(comptime T: type) type { - return struct { - data: []T, - cols: usize, - - const Self = @This(); - - pub fn init(allocator: Allocator, data: T, rows: usize, cols: usize) !Self { - const len = try std.math.mul(usize, rows, cols); - var vec2d = Self{ - .data = try allocator.alloc(T, len), - .cols = cols, - }; - vec2d.fill(data); - return vec2d; - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - allocator.free(self.data); - } - - pub fn fill(self: *Self, value: T) void { - std.mem.set(T, self.data, value); - } - - pub fn get(self: *Self, row: usize) ![]T { - const start_row = try std.math.mul(usize, row, self.cols); - return self.data[start_row .. start_row + self.cols]; - } - }; -} - -const BitTree = struct { - num_bits: u5, - probs: ArrayListUnmanaged(u16), - - pub fn init(allocator: Allocator, num_bits: u5) !BitTree { - var probs_len = @as(usize, 1) << num_bits; - var probs = try ArrayListUnmanaged(u16).initCapacity(allocator, probs_len); - while (probs_len > 0) : (probs_len -= 1) - probs.appendAssumeCapacity(0x400); - return .{ .num_bits = num_bits, .probs = probs }; - } - - pub fn deinit(self: *BitTree, allocator: Allocator) void { - self.probs.deinit(allocator); - } - - pub fn parse( - self: *BitTree, - rangecoder: *RangeDecoder, - update: bool, - ) !u32 { - return rangecoder.parseBitTree(self.num_bits, self.probs.items, update); - } - - pub fn parseReverse( - self: *BitTree, - rangecoder: *RangeDecoder, - update: bool, - ) !u32 { - return rangecoder.parseReverseBitTree(self.num_bits, self.probs.items, 0, update); - } - - pub fn reset(self: *BitTree) void { - std.mem.set(u16, self.probs.items, 0x400); - } -}; - -const LenDecoder = struct { - choice: u16, - choice2: u16, - low_coder: [16]BitTree, - mid_coder: [16]BitTree, - high_coder: BitTree, - - pub fn init(allocator: Allocator) !LenDecoder { - return .{ - .choice = 0x400, - .choice2 = 0x400, - .low_coder = .{ - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - }, - .mid_coder = .{ - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - }, - .high_coder = try BitTree.init(allocator, 8), - }; - } - - pub fn deinit(self: *LenDecoder, allocator: Allocator) void { - for (self.low_coder) |*t| t.deinit(allocator); - for (self.mid_coder) |*t| t.deinit(allocator); - self.high_coder.deinit(allocator); - } - - pub fn decode( - self: *LenDecoder, - rangecoder: *RangeDecoder, - pos_state: usize, - update: bool, - ) !usize { - if (!try rangecoder.decodeBit(&self.choice, update)) { - return @as(usize, try self.low_coder[pos_state].parse(rangecoder, update)); - } else if (!try rangecoder.decodeBit(&self.choice2, update)) { - return @as(usize, try self.mid_coder[pos_state].parse(rangecoder, update)) + 8; - } else { - return @as(usize, try self.high_coder.parse(rangecoder, update)) + 16; - } - } - - pub fn reset(self: *LenDecoder) void { - self.choice = 0x400; - self.choice2 = 0x400; - for (self.low_coder) |*t| t.reset(); - for (self.mid_coder) |*t| t.reset(); - self.high_coder.reset(); - } -}; From 73c857415eafc1d0856b6917cada94d8acde41e0 Mon Sep 17 00:00:00 2001 From: John Schmidt Date: Sat, 4 Feb 2023 18:28:36 +0100 Subject: [PATCH 02/65] std.json: fix parsing of structs with default value const pointers --- lib/std/json.zig | 2 +- lib/std/json/test.zig | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/std/json.zig b/lib/std/json.zig index 92afeead90..41bfb30bc5 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1639,7 +1639,7 @@ fn parseInternal( const allocator = options.allocator orelse return error.AllocatorRequired; switch (ptrInfo.size) { .One => { - const r: T = try allocator.create(ptrInfo.child); + const r: *ptrInfo.child = try allocator.create(ptrInfo.child); errdefer allocator.destroy(r); r.* = try parseInternal(ptrInfo.child, token, tokens, options); return r; diff --git a/lib/std/json/test.zig b/lib/std/json/test.zig index 0bf0797587..70385acfb5 100644 --- a/lib/std/json/test.zig +++ b/lib/std/json/test.zig @@ -2238,6 +2238,14 @@ test "parse into struct with no fields" { try testing.expectEqual(T{}, try parse(T, &ts, ParseOptions{})); } +const test_const_value: usize = 123; + +test "parse into struct with default const pointer field" { + const T = struct { a: *const usize = &test_const_value }; + var ts = TokenStream.init("{}"); + try testing.expectEqual(T{}, try parse(T, &ts, .{})); +} + test "parse into struct where destination and source lengths mismatch" { const T = struct { a: [2]u8 }; var ts = TokenStream.init("{\"a\": \"bbb\"}"); From b42caff2a20eb34073f6a766f55d27288028165a Mon Sep 17 00:00:00 2001 From: John Schmidt Date: Sat, 4 Feb 2023 18:44:57 +0100 Subject: [PATCH 03/65] std.json: avoid dangling pointers in ValueTree Closes https://github.com/ziglang/zig/issues/5229. --- lib/std/json.zig | 9 +++++++-- lib/std/json/test.zig | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/std/json.zig b/lib/std/json.zig index 41bfb30bc5..1c4cbdf5cb 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1163,11 +1163,12 @@ const ArrayList = std.ArrayList; const StringArrayHashMap = std.StringArrayHashMap; pub const ValueTree = struct { - arena: ArenaAllocator, + arena: *ArenaAllocator, root: Value, pub fn deinit(self: *ValueTree) void { self.arena.deinit(); + self.arena.child_allocator.destroy(self.arena); } }; @@ -1809,8 +1810,12 @@ pub const Parser = struct { pub fn parse(p: *Parser, input: []const u8) !ValueTree { var s = TokenStream.init(input); - var arena = ArenaAllocator.init(p.allocator); + var arena = try p.allocator.create(ArenaAllocator); + errdefer p.allocator.destroy(arena); + + arena.* = ArenaAllocator.init(p.allocator); errdefer arena.deinit(); + const allocator = arena.allocator(); while (try s.next()) |token| { diff --git a/lib/std/json/test.zig b/lib/std/json/test.zig index 70385acfb5..7f87b2fa52 100644 --- a/lib/std/json/test.zig +++ b/lib/std/json/test.zig @@ -2589,6 +2589,24 @@ test "parsing empty string gives appropriate error" { try testing.expectError(error.UnexpectedEndOfJson, testParse(arena_allocator.allocator(), "")); } +test "parse tree should not contain dangling pointers" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + + var p = json.Parser.init(arena_allocator.allocator(), false); + defer p.deinit(); + + var tree = try p.parse("[]"); + defer tree.deinit(); + + // Allocation should succeed + var i: usize = 0; + while (i < 100) : (i += 1) { + try tree.root.Array.append(std.json.Value{ .Integer = 100 }); + } + try testing.expectEqual(tree.root.Array.items.len, 100); +} + test "integer after float has proper type" { var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena_allocator.deinit(); From b1dd4b17d89633c4d26c39952f69d7ff2efb98ce Mon Sep 17 00:00:00 2001 From: John Schmidt Date: Sat, 4 Feb 2023 14:42:42 +0100 Subject: [PATCH 04/65] std.json: don't free struct default values Closes https://github.com/ziglang/zig/issues/9509. --- lib/std/json.zig | 32 +++++++++++++++++++++++++++++++- lib/std/json/test.zig | 25 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/lib/std/json.zig b/lib/std/json.zig index 1c4cbdf5cb..42c4d98d32 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1745,7 +1745,37 @@ pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void { .Struct => |structInfo| { inline for (structInfo.fields) |field| { if (!field.is_comptime) { - parseFree(field.type, @field(value, field.name), options); + var should_free = true; + if (field.default_value) |default| { + switch (@typeInfo(field.type)) { + // We must not attempt to free pointers to struct default values + .Pointer => |fieldPtrInfo| { + const field_value = @field(value, field.name); + const field_ptr = switch (fieldPtrInfo.size) { + .One => field_value, + .Slice => field_value.ptr, + else => unreachable, // Other pointer types are not parseable + }; + const field_addr = @ptrToInt(field_ptr); + + const casted_default = @ptrCast(*const field.type, @alignCast(@alignOf(field.type), default)).*; + const default_ptr = switch (fieldPtrInfo.size) { + .One => casted_default, + .Slice => casted_default.ptr, + else => unreachable, // Other pointer types are not parseable + }; + const default_addr = @ptrToInt(default_ptr); + + if (field_addr == default_addr) { + should_free = false; + } + }, + else => {}, + } + } + if (should_free) { + parseFree(field.type, @field(value, field.name), options); + } } } }, diff --git a/lib/std/json/test.zig b/lib/std/json/test.zig index 7f87b2fa52..3c9414a59c 100644 --- a/lib/std/json/test.zig +++ b/lib/std/json/test.zig @@ -2246,6 +2246,31 @@ test "parse into struct with default const pointer field" { try testing.expectEqual(T{}, try parse(T, &ts, .{})); } +const test_default_usize: usize = 123; +const test_default_usize_ptr: *align(1) const usize = &test_default_usize; +const test_default_str: []const u8 = "test str"; +const test_default_str_slice: [2][]const u8 = [_][]const u8{ + "test1", + "test2", +}; + +test "freeing parsed structs with pointers to default values" { + const T = struct { + int: *const usize = &test_default_usize, + int_ptr: *allowzero align(1) const usize = test_default_usize_ptr, + str: []const u8 = test_default_str, + str_slice: []const []const u8 = &test_default_str_slice, + }; + + var ts = json.TokenStream.init("{}"); + const options = .{ .allocator = std.heap.page_allocator }; + const parsed = try json.parse(T, &ts, options); + + try testing.expectEqual(T{}, parsed); + + json.parseFree(T, parsed, options); +} + test "parse into struct where destination and source lengths mismatch" { const T = struct { a: [2]u8 }; var ts = TokenStream.init("{\"a\": \"bbb\"}"); From 1876eaec51cab1251a53391f4b4d0fd227e83710 Mon Sep 17 00:00:00 2001 From: Jonathan Marler Date: Sat, 4 Feb 2023 13:07:48 -0700 Subject: [PATCH 05/65] mark deprecated assumeSentinel as pub assumeSentinel was removed and replaced with a compileError, but it's not pub, so the error message indicates it's private rather than providing the compileError message. --- lib/std/meta.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/meta.zig b/lib/std/meta.zig index c7ec4b1702..7ab4c9f25c 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -332,7 +332,7 @@ pub fn Sentinel(comptime T: type, comptime sentinel_val: Elem(T)) type { @compileError("Unable to derive a sentinel pointer type from " ++ @typeName(T)); } -const assumeSentinel = @compileError("This function has been removed, consider using std.mem.sliceTo() or if needed a @ptrCast()"); +pub const assumeSentinel = @compileError("This function has been removed, consider using std.mem.sliceTo() or if needed a @ptrCast()"); pub fn containerLayout(comptime T: type) Type.ContainerLayout { return switch (@typeInfo(T)) { From 289e8fab7949bebfc39209b86a69317a53e747f8 Mon Sep 17 00:00:00 2001 From: ee7 <45465154+ee7@users.noreply.github.com> Date: Sat, 4 Feb 2023 19:08:01 +0100 Subject: [PATCH 06/65] langref, tools: rename --single-threaded to -fsingle-threaded Commit 40f5e5dfc60e ("CLI: introduce -fsingle-threaded/ -fno-single-threaded", 2021-12-01) removed the `--single-threaded` option, but didn't update all mentions of it. $ zig version 0.11.0-dev.1568+c9b957c93 $ zig build-exe --help | grep single -fsingle-threaded Code assumes there is only one thread -fno-single-threaded Code may not assume there is only one thread $ zig build-exe --single-threaded foo.zig error: unrecognized parameter: '--single-threaded' --- doc/langref.html.in | 2 +- tools/update_cpu_features.zig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 36711e555a..5d087f72a2 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -9604,7 +9604,7 @@ pub fn build(b: *std.Build) void { {#header_close#} {#header_open|Single Threaded Builds#} -

Zig has a compile option --single-threaded which has the following effects:

+

Zig has a compile option -fsingle-threaded which has the following effects: