std.compress.zstandard: improve doc comments

2026-02-20 00:08:56 +00:00 · 2023-02-02 20:49:11 +11:00 · 2023-02-02 20:49:11 +11:00 · 89f9c5cb37
commit 89f9c5cb37
parent 7e2755646f
2 changed files with 137 additions and 67 deletions
--- a/lib/std/compress/zstandard/decode/block.zig
+++ b/lib/std/compress/zstandard/decode/block.zig
@ -23,7 +23,6 @@ pub const Error = error{
    ReservedBlock,
    MalformedRleBlock,
    MalformedCompressedBlock,
-    EndOfStream,
 };

 pub const DecodeState = struct {
@ -92,11 +91,17 @@ pub const DecodeState = struct {
    /// stream and Huffman tree from `literals` and reads the FSE tables from
    /// `source`.
    ///
-    /// Errors:
-    ///   - returns `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
-    ///     first byte does not have any bits set.
-    ///   - returns `error.TreelessLiteralsFirst` `literals` is a treeless literals section
-    ///     and the decode state does not have a Huffman tree from a previous block.
+    /// Errors returned:
+    ///   - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
+    ///     first byte does not have any bits set
+    ///   - `error.TreelessLiteralsFirst` `literals` is a treeless literals
+    ///     section and the decode state does not have a Huffman tree from a
+    ///     previous block
+    ///   - `error.RepeatModeFirst` on the first call if one of the sequence FSE
+    ///     tables is set to repeat mode
+    ///   - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy
+    ///   - `error.MalformedFseTable` if there are errors decoding an FSE table
+    ///   - `error.EndOfStream` if `source` ends before all FSE tables are read
    pub fn prepare(
        self: *DecodeState,
        source: anytype,
@ -132,8 +137,10 @@ pub const DecodeState = struct {
        }
    }

-    /// Read initial FSE states for sequence decoding. Returns `error.EndOfStream`
-    /// if `bit_reader` does not contain enough bits.
+    /// Read initial FSE states for sequence decoding.
+    ///
+    /// Errors returned:
+    ///   - `error.EndOfStream` if `bit_reader` does not contain enough bits.
    pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void {
        self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
        self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
@ -308,13 +315,19 @@ pub const DecodeState = struct {
    } || DecodeLiteralsError;

    /// Decode one sequence from `bit_reader` into `dest`, written starting at
-    /// `write_pos` and update FSE states if `last_sequence` is `false`. Returns
-    /// `error.MalformedSequence` error if the decompressed sequence would be longer
-    /// than `sequence_size_limit` or the sequence's offset is too large; returns
-    /// `error.EndOfStream` if `bit_reader` does not contain enough bits; returns
-    /// `error.UnexpectedEndOfLiteralStream` if the decoder state's literal streams
-    /// do not contain enough literals for the sequence (this may mean the literal
-    /// stream or the sequence is malformed).
+    /// `write_pos` and update FSE states if `last_sequence` is `false`.
+    /// `prepare()` must be called for the block before attempting to decode
+    /// sequences.
+    ///
+    /// Errors returned:
+    ///   - `error.MalformedSequence` if the decompressed sequence would be
+    ///     longer than `sequence_size_limit` or the sequence's offset is too
+    ///     large
+    ///   - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal
+    ///     streams do not contain enough literals for the sequence (this may
+    ///     mean the literal stream or the sequence is malformed).
+    ///   - `error.OffsetCodeTooLarge` if an invalid offset code is found
+    ///   - `error.EndOfStream` if `bit_reader` does not contain enough bits
    pub fn decodeSequenceSlice(
        self: *DecodeState,
        dest: []u8,
@ -336,7 +349,8 @@ pub const DecodeState = struct {
        return sequence_length;
    }

-    /// Decode one sequence from `bit_reader` into `dest`; see `decodeSequenceSlice`.
+    /// Decode one sequence from `bit_reader` into `dest`; see
+    /// `decodeSequenceSlice`.
    pub fn decodeSequenceRingBuffer(
        self: *DecodeState,
        dest: *RingBuffer,
@ -364,7 +378,7 @@ pub const DecodeState = struct {
        try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
    }

-    pub fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
+    fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
        try self.literal_stream_reader.init(bytes);
    }

@ -393,12 +407,14 @@ pub const DecodeState = struct {
        PrefixNotFound,
    } || LiteralBitsError;

-    /// Decode `len` bytes of literals into `dest`. `literals` should be the
-    /// `LiteralsSection` that was passed to `prepare()`. Returns
-    /// `error.MalformedLiteralsLength` if the number of literal bytes decoded by
-    /// `self` plus `len` is greater than the regenerated size of `literals`.
-    /// Returns `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
-    /// there are problems decoding Huffman compressed literals.
+    /// Decode `len` bytes of literals into `dest`.
+    ///
+    /// Errors returned:
+    ///   - `error.MalformedLiteralsLength` if the number of literal bytes
+    ///     decoded by `self` plus `len` is greater than the regenerated size of
+    ///     `literals`
+    ///   - `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
+    ///     there are problems decoding Huffman compressed literals
    pub fn decodeLiteralsSlice(
        self: *DecodeState,
        dest: []u8,
@ -561,7 +577,6 @@ pub const DecodeState = struct {
 ///   - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1`
 ///   - `error.MalformedCompressedBlock` if there are errors decoding a
 ///     compressed block
-///   - `error.EndOfStream` if the sequence bit stream ends unexpectedly
 pub fn decodeBlock(
    dest: []u8,
    src: []const u8,
@ -738,7 +753,8 @@ pub fn decodeBlockRingBuffer(
 /// `error.SequenceBufferTooSmall` are returned (the maximum block size is an
 /// upper bound for the size of both buffers). See `decodeBlock`
 /// and `decodeBlockRingBuffer` for function that can decode a block without
-/// these extra copies.
+/// these extra copies. `error.EndOfStream` is returned if `source` does not
+/// contain enough bytes.
 pub fn decodeBlockReader(
    dest: *RingBuffer,
    source: anytype,
@ -820,6 +836,10 @@ pub fn decodeBlockHeader(src: *const [3]u8) frame.ZStandard.Block.Header {
    };
 }

+/// Decode the header of a block.
+///
+/// Errors returned:
+///   - `error.EndOfStream` if `src.len < 3`
 pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandard.Block.Header {
    if (src.len < 3) return error.EndOfStream;
    return decodeBlockHeader(src[0..3]);
@ -828,9 +848,14 @@ pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandar
 /// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
 /// number of bytes the section uses.
 ///
-/// Errors:
-///   - returns `error.MalformedLiteralsHeader` if the header is invalid
-///   - returns `error.MalformedLiteralsSection` if there are errors decoding
+/// Errors returned:
+///   - `error.MalformedLiteralsHeader` if the header is invalid
+///   - `error.MalformedLiteralsSection` if there are decoding errors
+///   - `error.MalformedAccuracyLog` if compressed literals have invalid
+///     accuracy
+///   - `error.MalformedFseTable` if compressed literals have invalid FSE table
+///   - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree
+///   - `error.EndOfStream` if there are not enough bytes in `src`
 pub fn decodeLiteralsSectionSlice(
    src: []const u8,
    consumed_count: *usize,
@ -886,11 +911,7 @@ pub fn decodeLiteralsSectionSlice(
 }

 /// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
-/// number of bytes the section uses.
-///
-/// Errors:
-///   - returns `error.MalformedLiteralsHeader` if the header is invalid
-///   - returns `error.MalformedLiteralsSection` if there are errors decoding
+/// number of bytes the section uses. See `decodeLiterasSectionSlice()`.
 pub fn decodeLiteralsSection(
    source: anytype,
    buffer: []u8,
@ -961,6 +982,9 @@ fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Stre
 }

 /// Decode a literals section header.
+///
+/// Errors returned:
+///   - `error.EndOfStream` if there are not enough bytes in `source`
 pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
    const byte0 = try source.readByte();
    const block_type = @intToEnum(LiteralsSection.BlockType, byte0 & 0b11);
@ -1011,9 +1035,9 @@ pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {

 /// Decode a sequences section header.
 ///
-/// Errors:
-///   - returns `error.ReservedBitSet` is the reserved bit is set
-///   - returns `error.MalformedSequencesHeader` if the header is invalid
+/// Errors returned:
+///   - `error.ReservedBitSet` if the reserved bit is set
+///   - `error.EndOfStream` if there are not enough bytes in `source`
 pub fn decodeSequencesHeader(
    source: anytype,
 ) !SequencesSection.Header {
--- a/lib/std/compress/zstandard/decompress.zig
+++ b/lib/std/compress/zstandard/decompress.zig
@ -25,11 +25,12 @@ pub fn isSkippableMagic(magic: u32) bool {

 /// Returns the kind of frame at the beginning of `src`.
 ///
-/// Errors:
-///   - returns `error.BadMagic` if `source` begins with bytes not equal to the
+/// Errors returned:
+///   - `error.BadMagic` if `source` begins with bytes not equal to the
 ///     Zstandard frame magic number, or outside the range of magic numbers for
 ///     skippable frames.
-pub fn decodeFrameType(source: anytype) !frame.Kind {
+///   - `error.EndOfStream` if `source` contains fewer than 4 bytes
+pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
    const magic = try source.readIntLittle(u32);
    return if (magic == frame.ZStandard.magic_number)
        .zstandard
@ -45,12 +46,23 @@ const ReadWriteCount = struct {
 };

 /// Decodes the frame at the start of `src` into `dest`. Returns the number of
-/// bytes read from `src` and written to `dest`.
+/// bytes read from `src` and written to `dest`. This function can only decode
+/// frames that declare the decompressed content size.
 ///
-/// Errors:
-///   - returns `error.UnknownContentSizeUnsupported`
-///   - returns `error.ContentTooLarge`
-///   - returns `error.BadMagic`
+/// Errors returned:
+///   - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+///     uncompressed content size
+///   - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+///   - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
+///     number for a Zstandard or Skippable frame
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.UnusedBitSet` if the unused bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - an error in `block.Error` if there are errors decoding a block
 pub fn decodeFrame(
    dest: []u8,
    src: []const u8,
@ -66,6 +78,7 @@ pub fn decodeFrame(
    };
 }

+/// Returns the frame checksum corresponding to the data fed into `hasher`
 pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
    const hash = hasher.final();
    return @intCast(u32, hash & 0xFFFFFFFF);
@ -74,20 +87,31 @@ pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
 const FrameError = error{
    DictionaryIdFlagUnsupported,
    ChecksumFailure,
+    EndOfStream,
 } || InvalidBit || block.Error;

 /// Decode a Zstandard frame from `src` into `dest`, returning the number of
-/// bytes read from `src` and written to `dest`; if the frame does not declare
-/// its decompressed content size `error.UnknownContentSizeUnsupported` is
-/// returned. Returns `error.DictionaryIdFlagUnsupported` if the frame uses a
-/// dictionary, and `error.ChecksumFailure` if `verify_checksum` is `true` and
-/// the frame contains a checksum that does not match the checksum computed from
-/// the decompressed frame.
+/// bytes read from `src` and written to `dest`. The first four bytes of `src`
+/// must be the magic number for a Zstandard frame.
+///
+/// Error returned:
+///   - `error.UnknownContentSizeUnsupported` if the frame does not declare the
+///     uncompressed content size
+///   - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
+///     number for a Zstandard or Skippable frame
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.UnusedBitSet` if the unused bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - an error in `block.Error` if there are errors decoding a block
 pub fn decodeZStandardFrame(
    dest: []u8,
    src: []const u8,
    verify_checksum: bool,
-) (error{ UnknownContentSizeUnsupported, ContentTooLarge, EndOfStream } || FrameError)!ReadWriteCount {
+) (error{ UnknownContentSizeUnsupported, ContentTooLarge } || FrameError)!ReadWriteCount {
    assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
    var consumed_count: usize = 4;

@ -127,7 +151,18 @@ pub const FrameContext = struct {
    has_checksum: bool,
    block_size_max: usize,

-    pub fn init(frame_header: frame.ZStandard.Header, window_size_max: usize, verify_checksum: bool) !FrameContext {
+    const Error = error{ DictionaryIdFlagUnsupported, WindowSizeUnknown, WindowTooLarge };
+    /// Validates `frame_header` and returns the associated `FrameContext`.
+    ///
+    /// Errors returned:
+    ///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+    ///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+    ///   - `error.WindowTooLarge` if the window size is larger than
+    pub fn init(
+        frame_header: frame.ZStandard.Header,
+        window_size_max: usize,
+        verify_checksum: bool,
+    ) Error!FrameContext {
        if (frame_header.descriptor.dictionary_id_flag != 0) return error.DictionaryIdFlagUnsupported;

        const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
@ -147,19 +182,29 @@ pub const FrameContext = struct {
 };

 /// Decode a Zstandard from from `src` and return the decompressed bytes; see
-/// `decodeZStandardFrame()`. Returns `error.WindowSizeUnknown` if the frame
-/// does not declare its content size or a window descriptor (this indicates a
-/// malformed frame).
+/// `decodeZStandardFrame()`. `allocator` is used to allocate both the returned
+/// slice and internal buffers used during decoding. The first four bytes of
+/// `src` must be the magic number for a Zstandard frame.
 ///
-/// Errors:
-///   - returns `error.WindowTooLarge`
-///   - returns `error.WindowSizeUnknown`
+/// Errors returned:
+///   - `error.WindowSizeUnknown` if the frame does not have a valid window size
+///   - `error.WindowTooLarge` if the window size is larger than
+///     `window_size_max`
+///   - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
+///   - `error.ChecksumFailure` if `verify_checksum` is true and the frame
+///     contains a checksum that does not match the checksum of the decompressed
+///     data
+///   - `error.ReservedBitSet` if the reserved bit of the frame header is set
+///   - `error.UnusedBitSet` if the unused bit of the frame header is set
+///   - `error.EndOfStream` if `src` does not contain a complete frame
+///   - `error.OutOfMemory` if `allocator` cannot allocate enough memory
+///   - an error in `block.Error` if there are errors decoding a block
 pub fn decodeZStandardFrameAlloc(
    allocator: std.mem.Allocator,
    src: []const u8,
    verify_checksum: bool,
    window_size_max: usize,
-) (error{ WindowSizeUnknown, WindowTooLarge, OutOfMemory, EndOfStream } || FrameError)![]u8 {
+) (error{OutOfMemory} || FrameContext.Error || FrameError)![]u8 {
    var result = std.ArrayList(u8).init(allocator);
    assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
    var consumed_count: usize = 4;
@ -222,7 +267,7 @@ fn decodeFrameBlocks(
    src: []const u8,
    consumed_count: *usize,
    hash: ?*std.hash.XxHash64,
-) block.Error!usize {
+) (error{EndOfStream} || block.Error)!usize {
    // These tables take 7680 bytes
    var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
    var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
@ -252,7 +297,8 @@ fn decodeFrameBlocks(
    return written_count;
 }

-/// Decode the header of a skippable frame.
+/// Decode the header of a skippable frame. The first four bytes of `src` must
+/// be a valid magic number for a Skippable frame.
 pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
    const magic = readInt(u32, src[0..4]);
    assert(isSkippableMagic(magic));
@ -263,8 +309,8 @@ pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
    };
 }

-/// Returns the window size required to decompress a frame, or `null` if it cannot be
-/// determined, which indicates a malformed frame header.
+/// Returns the window size required to decompress a frame, or `null` if it
+/// cannot be determined (which indicates a malformed frame header).
 pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
    if (header.window_descriptor) |descriptor| {
        const exponent = (descriptor & 0b11111000) >> 3;
@ -279,10 +325,10 @@ pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
 const InvalidBit = error{ UnusedBitSet, ReservedBitSet };
 /// Decode the header of a Zstandard frame.
 ///
-/// Errors:
-///   - returns `error.UnusedBitSet` if the unused bits of the header are set
-///   - returns `error.ReservedBitSet` if the reserved bits of the header are
-///     set
+/// Errors returned:
+///   - `error.UnusedBitSet` if the unused bits of the header are set
+///   - `error.ReservedBitSet` if the reserved bits of the header are set
+///   - `error.EndOfStream` if `source` does not contain a complete header
 pub fn decodeZStandardHeader(source: anytype) (error{EndOfStream} || InvalidBit)!frame.ZStandard.Header {
    const descriptor = @bitCast(frame.ZStandard.Header.Descriptor, try source.readByte());