std.compress.zstandard: improve doc comments

This commit is contained in:
dweiller 2023-02-02 20:49:11 +11:00
parent 7e2755646f
commit 89f9c5cb37
2 changed files with 137 additions and 67 deletions

View File

@ -23,7 +23,6 @@ pub const Error = error{
ReservedBlock,
MalformedRleBlock,
MalformedCompressedBlock,
EndOfStream,
};
pub const DecodeState = struct {
@ -92,11 +91,17 @@ pub const DecodeState = struct {
/// stream and Huffman tree from `literals` and reads the FSE tables from
/// `source`.
///
/// Errors:
/// - returns `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
/// first byte does not have any bits set.
/// - returns `error.TreelessLiteralsFirst` `literals` is a treeless literals section
/// and the decode state does not have a Huffman tree from a previous block.
/// Errors returned:
/// - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
/// first byte does not have any bits set
/// - `error.TreelessLiteralsFirst` `literals` is a treeless literals
/// section and the decode state does not have a Huffman tree from a
/// previous block
/// - `error.RepeatModeFirst` on the first call if one of the sequence FSE
/// tables is set to repeat mode
/// - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy
/// - `error.MalformedFseTable` if there are errors decoding an FSE table
/// - `error.EndOfStream` if `source` ends before all FSE tables are read
pub fn prepare(
self: *DecodeState,
source: anytype,
@ -132,8 +137,10 @@ pub const DecodeState = struct {
}
}
/// Read initial FSE states for sequence decoding. Returns `error.EndOfStream`
/// if `bit_reader` does not contain enough bits.
/// Read initial FSE states for sequence decoding.
///
/// Errors returned:
/// - `error.EndOfStream` if `bit_reader` does not contain enough bits.
pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void {
self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
@ -308,13 +315,19 @@ pub const DecodeState = struct {
} || DecodeLiteralsError;
/// Decode one sequence from `bit_reader` into `dest`, written starting at
/// `write_pos` and update FSE states if `last_sequence` is `false`. Returns
/// `error.MalformedSequence` error if the decompressed sequence would be longer
/// than `sequence_size_limit` or the sequence's offset is too large; returns
/// `error.EndOfStream` if `bit_reader` does not contain enough bits; returns
/// `error.UnexpectedEndOfLiteralStream` if the decoder state's literal streams
/// do not contain enough literals for the sequence (this may mean the literal
/// stream or the sequence is malformed).
/// `write_pos` and update FSE states if `last_sequence` is `false`.
/// `prepare()` must be called for the block before attempting to decode
/// sequences.
///
/// Errors returned:
/// - `error.MalformedSequence` if the decompressed sequence would be
/// longer than `sequence_size_limit` or the sequence's offset is too
/// large
/// - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal
/// streams do not contain enough literals for the sequence (this may
/// mean the literal stream or the sequence is malformed).
/// - `error.OffsetCodeTooLarge` if an invalid offset code is found
/// - `error.EndOfStream` if `bit_reader` does not contain enough bits
pub fn decodeSequenceSlice(
self: *DecodeState,
dest: []u8,
@ -336,7 +349,8 @@ pub const DecodeState = struct {
return sequence_length;
}
/// Decode one sequence from `bit_reader` into `dest`; see `decodeSequenceSlice`.
/// Decode one sequence from `bit_reader` into `dest`; see
/// `decodeSequenceSlice`.
pub fn decodeSequenceRingBuffer(
self: *DecodeState,
dest: *RingBuffer,
@ -364,7 +378,7 @@ pub const DecodeState = struct {
try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
}
pub fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
try self.literal_stream_reader.init(bytes);
}
@ -393,12 +407,14 @@ pub const DecodeState = struct {
PrefixNotFound,
} || LiteralBitsError;
/// Decode `len` bytes of literals into `dest`. `literals` should be the
/// `LiteralsSection` that was passed to `prepare()`. Returns
/// `error.MalformedLiteralsLength` if the number of literal bytes decoded by
/// `self` plus `len` is greater than the regenerated size of `literals`.
/// Returns `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
/// there are problems decoding Huffman compressed literals.
/// Decode `len` bytes of literals into `dest`.
///
/// Errors returned:
/// - `error.MalformedLiteralsLength` if the number of literal bytes
/// decoded by `self` plus `len` is greater than the regenerated size of
/// `literals`
/// - `error.UnexpectedEndOfLiteralStream` and `error.PrefixNotFound` if
/// there are problems decoding Huffman compressed literals
pub fn decodeLiteralsSlice(
self: *DecodeState,
dest: []u8,
@ -561,7 +577,6 @@ pub const DecodeState = struct {
/// - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1`
/// - `error.MalformedCompressedBlock` if there are errors decoding a
/// compressed block
/// - `error.EndOfStream` if the sequence bit stream ends unexpectedly
pub fn decodeBlock(
dest: []u8,
src: []const u8,
@ -738,7 +753,8 @@ pub fn decodeBlockRingBuffer(
/// `error.SequenceBufferTooSmall` are returned (the maximum block size is an
/// upper bound for the size of both buffers). See `decodeBlock`
/// and `decodeBlockRingBuffer` for function that can decode a block without
/// these extra copies.
/// these extra copies. `error.EndOfStream` is returned if `source` does not
/// contain enough bytes.
pub fn decodeBlockReader(
dest: *RingBuffer,
source: anytype,
@ -820,6 +836,10 @@ pub fn decodeBlockHeader(src: *const [3]u8) frame.ZStandard.Block.Header {
};
}
/// Decode the header of a block.
///
/// Errors returned:
/// - `error.EndOfStream` if `src.len < 3`
pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandard.Block.Header {
if (src.len < 3) return error.EndOfStream;
return decodeBlockHeader(src[0..3]);
@ -828,9 +848,14 @@ pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.ZStandar
/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
/// number of bytes the section uses.
///
/// Errors:
/// - returns `error.MalformedLiteralsHeader` if the header is invalid
/// - returns `error.MalformedLiteralsSection` if there are errors decoding
/// Errors returned:
/// - `error.MalformedLiteralsHeader` if the header is invalid
/// - `error.MalformedLiteralsSection` if there are decoding errors
/// - `error.MalformedAccuracyLog` if compressed literals have invalid
/// accuracy
/// - `error.MalformedFseTable` if compressed literals have invalid FSE table
/// - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree
/// - `error.EndOfStream` if there are not enough bytes in `src`
pub fn decodeLiteralsSectionSlice(
src: []const u8,
consumed_count: *usize,
@ -886,11 +911,7 @@ pub fn decodeLiteralsSectionSlice(
}
/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
/// number of bytes the section uses.
///
/// Errors:
/// - returns `error.MalformedLiteralsHeader` if the header is invalid
/// - returns `error.MalformedLiteralsSection` if there are errors decoding
/// number of bytes the section uses. See `decodeLiterasSectionSlice()`.
pub fn decodeLiteralsSection(
source: anytype,
buffer: []u8,
@ -961,6 +982,9 @@ fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Stre
}
/// Decode a literals section header.
///
/// Errors returned:
/// - `error.EndOfStream` if there are not enough bytes in `source`
pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
const byte0 = try source.readByte();
const block_type = @intToEnum(LiteralsSection.BlockType, byte0 & 0b11);
@ -1011,9 +1035,9 @@ pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
/// Decode a sequences section header.
///
/// Errors:
/// - returns `error.ReservedBitSet` is the reserved bit is set
/// - returns `error.MalformedSequencesHeader` if the header is invalid
/// Errors returned:
/// - `error.ReservedBitSet` if the reserved bit is set
/// - `error.EndOfStream` if there are not enough bytes in `source`
pub fn decodeSequencesHeader(
source: anytype,
) !SequencesSection.Header {

View File

@ -25,11 +25,12 @@ pub fn isSkippableMagic(magic: u32) bool {
/// Returns the kind of frame at the beginning of `src`.
///
/// Errors:
/// - returns `error.BadMagic` if `source` begins with bytes not equal to the
/// Errors returned:
/// - `error.BadMagic` if `source` begins with bytes not equal to the
/// Zstandard frame magic number, or outside the range of magic numbers for
/// skippable frames.
pub fn decodeFrameType(source: anytype) !frame.Kind {
/// - `error.EndOfStream` if `source` contains fewer than 4 bytes
pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
const magic = try source.readIntLittle(u32);
return if (magic == frame.ZStandard.magic_number)
.zstandard
@ -45,12 +46,23 @@ const ReadWriteCount = struct {
};
/// Decodes the frame at the start of `src` into `dest`. Returns the number of
/// bytes read from `src` and written to `dest`.
/// bytes read from `src` and written to `dest`. This function can only decode
/// frames that declare the decompressed content size.
///
/// Errors:
/// - returns `error.UnknownContentSizeUnsupported`
/// - returns `error.ContentTooLarge`
/// - returns `error.BadMagic`
/// Errors returned:
/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
/// uncompressed content size
/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
/// number for a Zstandard or Skippable frame
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
/// contains a checksum that does not match the checksum of the decompressed
/// data
/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
/// - `error.UnusedBitSet` if the unused bit of the frame header is set
/// - `error.EndOfStream` if `src` does not contain a complete frame
/// - an error in `block.Error` if there are errors decoding a block
pub fn decodeFrame(
dest: []u8,
src: []const u8,
@ -66,6 +78,7 @@ pub fn decodeFrame(
};
}
/// Returns the frame checksum corresponding to the data fed into `hasher`
pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
const hash = hasher.final();
return @intCast(u32, hash & 0xFFFFFFFF);
@ -74,20 +87,31 @@ pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
const FrameError = error{
DictionaryIdFlagUnsupported,
ChecksumFailure,
EndOfStream,
} || InvalidBit || block.Error;
/// Decode a Zstandard frame from `src` into `dest`, returning the number of
/// bytes read from `src` and written to `dest`; if the frame does not declare
/// its decompressed content size `error.UnknownContentSizeUnsupported` is
/// returned. Returns `error.DictionaryIdFlagUnsupported` if the frame uses a
/// dictionary, and `error.ChecksumFailure` if `verify_checksum` is `true` and
/// the frame contains a checksum that does not match the checksum computed from
/// the decompressed frame.
/// bytes read from `src` and written to `dest`. The first four bytes of `src`
/// must be the magic number for a Zstandard frame.
///
/// Error returned:
/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
/// uncompressed content size
/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
/// number for a Zstandard or Skippable frame
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
/// contains a checksum that does not match the checksum of the decompressed
/// data
/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
/// - `error.UnusedBitSet` if the unused bit of the frame header is set
/// - `error.EndOfStream` if `src` does not contain a complete frame
/// - an error in `block.Error` if there are errors decoding a block
pub fn decodeZStandardFrame(
dest: []u8,
src: []const u8,
verify_checksum: bool,
) (error{ UnknownContentSizeUnsupported, ContentTooLarge, EndOfStream } || FrameError)!ReadWriteCount {
) (error{ UnknownContentSizeUnsupported, ContentTooLarge } || FrameError)!ReadWriteCount {
assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
var consumed_count: usize = 4;
@ -127,7 +151,18 @@ pub const FrameContext = struct {
has_checksum: bool,
block_size_max: usize,
pub fn init(frame_header: frame.ZStandard.Header, window_size_max: usize, verify_checksum: bool) !FrameContext {
const Error = error{ DictionaryIdFlagUnsupported, WindowSizeUnknown, WindowTooLarge };
/// Validates `frame_header` and returns the associated `FrameContext`.
///
/// Errors returned:
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
/// - `error.WindowTooLarge` if the window size is larger than
pub fn init(
frame_header: frame.ZStandard.Header,
window_size_max: usize,
verify_checksum: bool,
) Error!FrameContext {
if (frame_header.descriptor.dictionary_id_flag != 0) return error.DictionaryIdFlagUnsupported;
const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
@ -147,19 +182,29 @@ pub const FrameContext = struct {
};
/// Decode a Zstandard from from `src` and return the decompressed bytes; see
/// `decodeZStandardFrame()`. Returns `error.WindowSizeUnknown` if the frame
/// does not declare its content size or a window descriptor (this indicates a
/// malformed frame).
/// `decodeZStandardFrame()`. `allocator` is used to allocate both the returned
/// slice and internal buffers used during decoding. The first four bytes of
/// `src` must be the magic number for a Zstandard frame.
///
/// Errors:
/// - returns `error.WindowTooLarge`
/// - returns `error.WindowSizeUnknown`
/// Errors returned:
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
/// - `error.WindowTooLarge` if the window size is larger than
/// `window_size_max`
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
/// contains a checksum that does not match the checksum of the decompressed
/// data
/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
/// - `error.UnusedBitSet` if the unused bit of the frame header is set
/// - `error.EndOfStream` if `src` does not contain a complete frame
/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
/// - an error in `block.Error` if there are errors decoding a block
pub fn decodeZStandardFrameAlloc(
allocator: std.mem.Allocator,
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
) (error{ WindowSizeUnknown, WindowTooLarge, OutOfMemory, EndOfStream } || FrameError)![]u8 {
) (error{OutOfMemory} || FrameContext.Error || FrameError)![]u8 {
var result = std.ArrayList(u8).init(allocator);
assert(readInt(u32, src[0..4]) == frame.ZStandard.magic_number);
var consumed_count: usize = 4;
@ -222,7 +267,7 @@ fn decodeFrameBlocks(
src: []const u8,
consumed_count: *usize,
hash: ?*std.hash.XxHash64,
) block.Error!usize {
) (error{EndOfStream} || block.Error)!usize {
// These tables take 7680 bytes
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
@ -252,7 +297,8 @@ fn decodeFrameBlocks(
return written_count;
}
/// Decode the header of a skippable frame.
/// Decode the header of a skippable frame. The first four bytes of `src` must
/// be a valid magic number for a Skippable frame.
pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
const magic = readInt(u32, src[0..4]);
assert(isSkippableMagic(magic));
@ -263,8 +309,8 @@ pub fn decodeSkippableHeader(src: *const [8]u8) frame.Skippable.Header {
};
}
/// Returns the window size required to decompress a frame, or `null` if it cannot be
/// determined, which indicates a malformed frame header.
/// Returns the window size required to decompress a frame, or `null` if it
/// cannot be determined (which indicates a malformed frame header).
pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
if (header.window_descriptor) |descriptor| {
const exponent = (descriptor & 0b11111000) >> 3;
@ -279,10 +325,10 @@ pub fn frameWindowSize(header: frame.ZStandard.Header) ?u64 {
const InvalidBit = error{ UnusedBitSet, ReservedBitSet };
/// Decode the header of a Zstandard frame.
///
/// Errors:
/// - returns `error.UnusedBitSet` if the unused bits of the header are set
/// - returns `error.ReservedBitSet` if the reserved bits of the header are
/// set
/// Errors returned:
/// - `error.UnusedBitSet` if the unused bits of the header are set
/// - `error.ReservedBitSet` if the reserved bits of the header are set
/// - `error.EndOfStream` if `source` does not contain a complete header
pub fn decodeZStandardHeader(source: anytype) (error{EndOfStream} || InvalidBit)!frame.ZStandard.Header {
const descriptor = @bitCast(frame.ZStandard.Header.Descriptor, try source.readByte());