std: fix a bunch of compilation errors

This commit is contained in:
Andrew Kelley 2025-05-26 20:31:35 -07:00
parent fd4fb10722
commit da303bdaf1
25 changed files with 1008 additions and 837 deletions

View File

@ -1087,16 +1087,14 @@ pub const Coff = struct {
const pe_pointer_offset = 0x3C;
const pe_magic = "PE\x00\x00";
var stream = std.io.fixedBufferStream(data);
const reader = stream.reader();
try stream.seekTo(pe_pointer_offset);
var reader: std.io.BufferedReader = undefined;
reader.initFixed(data[pe_pointer_offset..]);
const coff_header_offset = try reader.readInt(u32, .little);
try stream.seekTo(coff_header_offset);
var buf: [4]u8 = undefined;
try reader.readNoEof(&buf);
const is_image = mem.eql(u8, pe_magic, &buf);
reader.initFixed(data[coff_header_offset..]);
const magic = try reader.peek(4);
const is_image = mem.eql(u8, pe_magic, magic);
var coff = @This(){
var coff: Coff = .{
.data = data,
.is_image = is_image,
.is_loaded = is_loaded,
@ -1123,16 +1121,16 @@ pub const Coff = struct {
if (@intFromEnum(DirectoryEntry.DEBUG) >= data_dirs.len) return null;
const debug_dir = data_dirs[@intFromEnum(DirectoryEntry.DEBUG)];
var stream = std.io.fixedBufferStream(self.data);
const reader = stream.reader();
var reader: std.io.BufferedReader = undefined;
reader.initFixed(self.data);
if (self.is_loaded) {
try stream.seekTo(debug_dir.virtual_address);
reader.initFixed(self.data[debug_dir.virtual_address..]);
} else {
// Find what section the debug_dir is in, in order to convert the RVA to a file offset
for (self.getSectionHeaders()) |*sect| {
if (debug_dir.virtual_address >= sect.virtual_address and debug_dir.virtual_address < sect.virtual_address + sect.virtual_size) {
try stream.seekTo(sect.pointer_to_raw_data + (debug_dir.virtual_address - sect.virtual_address));
reader.initFixed(self.data[sect.pointer_to_raw_data + (debug_dir.virtual_address - sect.virtual_address) ..]);
break;
}
} else return error.InvalidDebugDirectory;
@ -1143,10 +1141,10 @@ pub const Coff = struct {
const debug_dir_entry_count = debug_dir.size / @sizeOf(DebugDirectoryEntry);
var i: u32 = 0;
while (i < debug_dir_entry_count) : (i += 1) {
const debug_dir_entry = try reader.readStruct(DebugDirectoryEntry);
const debug_dir_entry = try reader.takeStruct(DebugDirectoryEntry);
if (debug_dir_entry.type == .CODEVIEW) {
const dir_offset = if (self.is_loaded) debug_dir_entry.address_of_raw_data else debug_dir_entry.pointer_to_raw_data;
try stream.seekTo(dir_offset);
reader.initFixed(self.data[dir_offset..]);
break;
}
} else return null;

View File

@ -68,81 +68,23 @@ pub const Container = enum {
//
// CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
.zlib => &[_]u8{ 0x78, 0b10_0_11100 },
.raw => &{},
.raw => &.{},
};
}
pub fn parseHeader(comptime wrap: Container, reader: *std.io.BufferedReader) !void {
switch (wrap) {
.gzip => try parseGzipHeader(reader),
.zlib => try parseZlibHeader(reader),
.raw => {},
}
}
fn parseGzipHeader(reader: *std.io.BufferedReader) !void {
const magic1 = try reader.read(u8);
const magic2 = try reader.read(u8);
const method = try reader.read(u8);
const flags = try reader.read(u8);
try reader.skipBytes(6); // mtime(4), xflags, os
if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08)
return error.BadGzipHeader;
// Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5
if (flags != 0) {
if (flags & 0b0000_0100 != 0) { // FEXTRA
const extra_len = try reader.read(u16);
try reader.skipBytes(extra_len);
}
if (flags & 0b0000_1000 != 0) { // FNAME
try reader.skipStringZ();
}
if (flags & 0b0001_0000 != 0) { // FCOMMENT
try reader.skipStringZ();
}
if (flags & 0b0000_0010 != 0) { // FHCRC
try reader.skipBytes(2);
}
}
}
fn parseZlibHeader(reader: *std.io.BufferedReader) !void {
const cm = try reader.read(u4);
const cinfo = try reader.read(u4);
_ = try reader.read(u8);
if (cm != 8 or cinfo > 7) {
return error.BadZlibHeader;
}
}
pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: *std.io.BufferedReader) !void {
switch (wrap) {
.gzip => {
try reader.fill(0);
if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum;
if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize;
},
.zlib => {
const chksum: u32 = @byteSwap(hasher.chksum());
if (try reader.read(u32) != chksum) return error.WrongZlibChecksum;
},
.raw => {},
}
}
pub const Hasher = union(Container) {
raw: void,
gzip: struct {
crc: std.hash.Crc32 = .init(),
count: usize = 0,
},
zlib: std.hash.Adler32,
raw: void,
pub fn init(containter: Container) Hasher {
return switch (containter) {
.gzip => .{ .gzip = .{} },
.zlib => .{ .zlib = .init() },
.raw => {},
.raw => .raw,
};
}
@ -288,15 +230,18 @@ test "compress/decompress" {
// compress original stream to compressed stream
{
var original: std.io.BufferedReader = undefined;
original.initFixed(data);
original.initFixed(@constCast(data));
var compressed: std.io.BufferedWriter = undefined;
compressed.initFixed(&cmp_buf);
try Compress.pump(container, original.reader(), &compressed, .{ .level = level });
var compress: Compress = .init(&original, .raw);
var compress_br = compress.readable(&.{});
const n = try compress_br.readRemaining(&compressed, .{ .level = level });
if (compressed_size == 0) {
if (container == .gzip)
print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
compressed_size = compressed.pos;
}
try testing.expectEqual(compressed_size, n);
try testing.expectEqual(compressed_size, compressed.pos);
}
// decompress compressed stream to decompressed stream
@ -688,9 +633,7 @@ pub const match = struct {
pub const max_distance = 32768;
};
pub const history = struct {
pub const len = match.max_distance;
};
pub const history_len = match.max_distance;
pub const lookup = struct {
pub const bits = 15;
@ -707,7 +650,8 @@ test "zlib should not overshoot" {
0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12,
};
var stream = std.io.fixedBufferStream(data[0..]);
var stream: std.io.BufferedReader = undefined;
stream.initFixed(&data);
const reader = stream.reader();
var dcp = Decompress.init(reader);

View File

@ -51,9 +51,37 @@ const math = std.math;
const Compress = @This();
const Token = @import("Token.zig");
const BlockWriter = @import("BlockWriter.zig");
const Container = std.compress.flate.Container;
const flate = @import("../flate.zig");
const Container = flate.Container;
const Lookup = @import("Lookup.zig");
const huffman = std.compress.flate.huffman;
const huffman = flate.huffman;
lookup: Lookup = .{},
tokens: Tokens = .{},
/// Asserted to have a buffer capacity of at least `flate.max_window_len`.
input: *std.io.BufferedReader,
block_writer: BlockWriter,
level: LevelArgs,
hasher: Container.Hasher,
// Match and literal at the previous position.
// Used for lazy match finding in processWindow.
prev_match: ?Token = null,
prev_literal: ?u8 = null,
pub fn readable(c: *Compress, buffer: []u8) std.io.BufferedReader {
return .{
.unbuffered_reader = .{
.context = c,
.vtable = .{
.read = read,
.readVec = readVec,
.discard = discard,
},
},
.buffer = buffer,
};
}
pub const Options = struct {
level: Level = .default,
@ -77,10 +105,10 @@ pub const Level = enum(u4) {
best = 0xd,
};
// Number of tokens to accumulate in deflate before starting block encoding.
//
// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
// 8 and max 9 that gives 14 or 15 bits.
/// Number of tokens to accumulate in deflate before starting block encoding.
///
/// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
/// 8 and max 9 that gives 14 or 15 bits.
pub const n_tokens = 1 << 15;
/// Algorithm knobs for each level.
@ -102,85 +130,60 @@ const LevelArgs = struct {
}
};
lookup: Lookup = .{},
tokens: Tokens = .{},
output: *std.io.BufferedWriter,
block_writer: BlockWriter,
level: LevelArgs,
hasher: Container.Hasher,
// Match and literal at the previous position.
// Used for lazy match finding in processWindow.
prev_match: ?Token = null,
prev_literal: ?u8 = null,
pub fn init(output: *std.io.BufferedWriter, options: Options) std.io.Writer.Error!Compress {
try output.writeAll(options.container.header(output));
pub fn init(input: *std.io.BufferedReader, options: Options) Compress {
return .{
.output = output,
.block_writer = .init(output),
.input = input,
.block_writer = undefined,
.level = .get(options.level),
.hasher = .init(options.container),
.state = .header,
};
}
const FlushOption = enum { none, flush, final };
// Process data in window and create tokens. If token buffer is full
// flush tokens to the token writer. In the case of `flush` or `final`
// option it will process all data from the window. In the `none` case
// it will preserve some data for the next match.
fn tokenize(self: *Compress, flush_opt: FlushOption) !void {
// flush - process all data from window
const should_flush = (flush_opt != .none);
/// Process data in window and create tokens. If token buffer is full
/// flush tokens to the token writer.
///
/// Returns number of bytes consumed from `lh`.
fn tokenizeSlice(c: *Compress, bw: *std.io.BufferedWriter, limit: std.io.Limit, lh: []const u8) !usize {
_ = bw;
_ = limit;
if (true) @panic("TODO");
var step: u16 = 1; // 1 in the case of literal, match length otherwise
const pos: u16 = c.win.pos();
const literal = lh[0]; // literal at current position
const min_len: u16 = if (c.prev_match) |m| m.length() else 0;
// While there is data in active lookahead buffer.
while (self.win.activeLookahead(should_flush)) |lh| {
var step: u16 = 1; // 1 in the case of literal, match length otherwise
const pos: u16 = self.win.pos();
const literal = lh[0]; // literal at current position
const min_len: u16 = if (self.prev_match) |m| m.length() else 0;
// Try to find match at least min_len long.
if (c.findMatch(pos, lh, min_len)) |match| {
// Found better match than previous.
try c.addPrevLiteral();
// Try to find match at least min_len long.
if (self.findMatch(pos, lh, min_len)) |match| {
// Found better match than previous.
try self.addPrevLiteral();
// Is found match length good enough?
if (match.length() >= self.level.lazy) {
// Don't try to lazy find better match, use this.
step = try self.addMatch(match);
} else {
// Store this match.
self.prev_literal = literal;
self.prev_match = match;
}
// Is found match length good enough?
if (match.length() >= c.level.lazy) {
// Don't try to lazy find better match, use this.
step = try c.addMatch(match);
} else {
// There is no better match at current pos then it was previous.
// Write previous match or literal.
if (self.prev_match) |m| {
// Write match from previous position.
step = try self.addMatch(m) - 1; // we already advanced 1 from previous position
} else {
// No match at previous position.
// Write previous literal if any, and remember this literal.
try self.addPrevLiteral();
self.prev_literal = literal;
}
// Store this match.
c.prev_literal = literal;
c.prev_match = match;
}
} else {
// There is no better match at current pos then it was previous.
// Write previous match or literal.
if (c.prev_match) |m| {
// Write match from previous position.
step = try c.addMatch(m) - 1; // we already advanced 1 from previous position
} else {
// No match at previous position.
// Write previous literal if any, and remember this literal.
try c.addPrevLiteral();
c.prev_literal = literal;
}
// Advance window and add hashes.
self.windowAdvance(step, lh, pos);
}
if (should_flush) {
// In the case of flushing, last few lookahead buffers were smaller then min match len.
// So only last literal can be unwritten.
assert(self.prev_match == null);
try self.addPrevLiteral();
self.prev_literal = null;
try self.flushTokens(flush_opt);
}
// Advance window and add hashes.
c.windowAdvance(step, lh, pos);
}
fn windowAdvance(self: *Compress, step: u16, lh: []const u8, pos: u16) void {
@ -226,7 +229,7 @@ fn findMatch(self: *Compress, pos: u16, lh: []const u8, min_len: u16) ?Token {
// Hot path loop!
while (prev_pos > 0 and chain > 0) : (chain -= 1) {
const distance = pos - prev_pos;
if (distance > std.compress.flate.match.max_distance)
if (distance > flate.match.max_distance)
break;
const new_len = self.win.match(prev_pos, pos, len);
@ -272,33 +275,6 @@ fn slide(self: *Compress) void {
self.lookup.slide(n);
}
/// Compresses as much data as possible, stops when the reader becomes
/// empty. It will introduce some output latency (reading input without
/// producing all output) because some data are still in internal
/// buffers.
///
/// It is up to the caller to call flush (if needed) or finish (required)
/// when is need to output any pending data or complete stream.
///
pub fn compress(self: *Compress, reader: anytype) !void {
while (true) {
// Fill window from reader
const buf = self.win.writable();
if (buf.len == 0) {
try self.tokenize(.none);
self.slide();
continue;
}
const n = try reader.readAll(buf);
self.hasher.update(buf[0..n]);
self.win.written(n);
// Process window
try self.tokenize(.none);
// Exit when no more data in reader
if (n < buf.len) break;
}
}
/// Flushes internal buffers to the output writer. Outputs empty stored
/// block to sync bit stream to the byte boundary, so that the
/// decompressor can get all input data available so far.
@ -311,8 +287,8 @@ pub fn compress(self: *Compress, reader: anytype) !void {
/// stored block that is three zero bits plus filler bits to the next
/// byte, followed by four bytes (00 00 ff ff).
///
pub fn flush(self: *Compress) !void {
try self.tokenize(.flush);
pub fn flush(c: *Compress) !void {
try c.tokenize(.flush);
}
/// Completes deflate bit stream by writing any pending data as deflate
@ -320,9 +296,9 @@ pub fn flush(self: *Compress) !void {
/// the compressor as a signal that next block has to have final bit
/// set.
///
pub fn finish(self: *Compress) !void {
try self.tokenize(.final);
try self.hasher.writeFooter(self.output);
pub fn finish(c: *Compress) !void {
_ = c;
@panic("TODO");
}
/// Use another writer while preserving history. Most probably flush
@ -437,24 +413,6 @@ fn SimpleCompressor(
}
self.wp = 0;
}
// Writes all data from the input reader of uncompressed data.
// It is up to the caller to call flush or finish if there is need to
// output compressed blocks.
pub fn compress(self: *Self, reader: anytype) !void {
while (true) {
// read from rdr into buffer
const buf = self.buffer[self.wp..];
if (buf.len == 0) {
try self.flushBuffer(false);
continue;
}
const n = try reader.readAll(buf);
self.hasher.update(buf[0..n]);
self.wp += n;
if (n < buf.len) break; // no more data in reader
}
}
};
}
@ -811,6 +769,119 @@ fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
return a.freq < b.freq;
}
fn read(
context: ?*anyopaque,
bw: *std.io.BufferedWriter,
limit: std.io.Reader.Limit,
) std.io.Reader.RwError!usize {
const c: *Compress = @ptrCast(@alignCast(context));
switch (c.state) {
.header => |i| {
const header = c.hasher.container().header();
const n = try bw.write(header[i..]);
if (header.len - i - n == 0) {
c.state = .middle;
} else {
c.state.header += n;
}
return n;
},
.middle => {
c.input.fillMore() catch |err| switch (err) {
error.EndOfStream => {
c.state = .final;
return 0;
},
else => |e| return e,
};
const buffer_contents = c.input.bufferContents();
const min_lookahead = flate.match.min_length + flate.match.max_length;
const history_plus_lookahead_len = flate.history_len + min_lookahead;
if (buffer_contents.len < history_plus_lookahead_len) return 0;
const lookahead = buffer_contents[flate.history_len..];
const start = bw.count;
const n = try c.tokenizeSlice(bw, limit, lookahead) catch |err| switch (err) {
error.WriteFailed => return error.WriteFailed,
};
c.hasher.update(lookahead[0..n]);
c.input.toss(n);
return bw.count - start;
},
.final => {
const buffer_contents = c.input.bufferContents();
const start = bw.count;
const n = c.tokenizeSlice(bw, limit, buffer_contents) catch |err| switch (err) {
error.WriteFailed => return error.WriteFailed,
};
if (buffer_contents.len - n == 0) {
c.hasher.update(buffer_contents);
c.input.tossAll();
{
// In the case of flushing, last few lookahead buffers were
// smaller than min match len, so only last literal can be
// unwritten.
assert(c.prev_match == null);
try c.addPrevLiteral();
c.prev_literal = null;
try c.flushTokens(.final);
}
switch (c.hasher) {
.gzip => |*gzip| {
// GZIP 8 bytes footer
// - 4 bytes, CRC32 (CRC-32)
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
comptime assert(c.footer_buffer.len == 8);
std.mem.writeInt(u32, c.footer_buffer[0..4], gzip.final(), .little);
std.mem.writeInt(u32, c.footer_buffer[4..8], gzip.bytes_read, .little);
c.state = .{ .footer = 0 };
},
.zlib => |*zlib| {
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
// 4 bytes of ADLER32 (Adler-32 checksum)
// Checksum value of the uncompressed data (excluding any
// dictionary data) computed according to Adler-32
// algorithm.
comptime assert(c.footer_buffer.len == 8);
std.mem.writeInt(u32, c.footer_buffer[4..8], zlib.final, .big);
c.state = .{ .footer = 4 };
},
.raw => {
c.state = .ended;
},
}
}
return bw.count - start;
},
.ended => return error.EndOfStream,
.footer => |i| {
const remaining = c.footer_buffer[i..];
const n = try bw.write(limit.slice(remaining));
c.state = if (n == remaining) .ended else .{ .footer = i - n };
return n;
},
}
}
fn readVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize {
var bw: std.io.BufferedWriter = undefined;
bw.initVec(data);
return read(context, &bw, .countVec(data)) catch |err| switch (err) {
error.WriteFailed => unreachable, // Prevented by the limit.
else => |e| return e,
};
}
fn discard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize {
var trash_buffer: [64]u8 = undefined;
var null_writer: std.io.Writer.Null = undefined;
var bw = null_writer.writer().buffered(&trash_buffer);
return read(context, &bw, limit) catch |err| switch (err) {
error.WriteFailed => unreachable,
else => |e| return e,
};
}
test "generate a Huffman code from an array of frequencies" {
var freqs: [19]u16 = [_]u16{
8, // 0
@ -1099,7 +1170,8 @@ test "file tokenization" {
const data = case.data;
for (levels, 0..) |level, i| { // for each compression level
var original = io.fixedBufferStream(data);
var original: std.io.BufferedReader = undefined;
original.initFixed(data);
// buffer for decompressed data
var al = std.ArrayList(u8).init(testing.allocator);
@ -1173,21 +1245,22 @@ test "store simple compressor" {
//0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
};
var fbs = std.io.fixedBufferStream(data);
var fbs: std.io.BufferedReader = undefined;
fbs.initFixed(data);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
var cmp = try store.compressor(.raw, al.writer());
try cmp.compress(fbs.reader());
try cmp.compress(&fbs);
try cmp.finish();
try testing.expectEqualSlices(u8, &expected, al.items);
fbs.reset();
fbs.initFixed(data);
try al.resize(0);
// huffman only compresoor will also emit store block for this small sample
var hc = try huffman.compressor(.raw, al.writer());
try hc.compress(fbs.reader());
try hc.compress(&fbs);
try hc.finish();
try testing.expectEqualSlices(u8, &expected, al.items);
}

View File

@ -22,32 +22,38 @@ const flate = std.compress.flate;
const Container = flate.Container;
const Token = @import("Token.zig");
const testing = std.testing;
const Decompress = @This();
input: *std.io.BufferedReader,
// Hashes, produces checksum, of uncompressed data for gzip/zlib footer.
hasher: Container.Hasher(),
hasher: Container.Hasher,
// dynamic block huffman code decoders
lit_dec: LiteralDecoder,
dst_dec: DistanceDecoder,
// current read state
bfinal: u1,
block_type: u2,
state: ReadState,
final_block: bool,
state: State,
read_err: Error!void,
read_err: ?Error,
const ReadState = enum {
const BlockType = enum(u2) {
stored = 0,
fixed = 1,
dynamic = 2,
};
const State = union(enum) {
protocol_header,
block_header,
block,
stored_block: u16,
fixed_block,
dynamic_block,
protocol_footer,
end,
};
const Decompress = @This();
pub const Error = Container.Error || error{
InvalidCode,
InvalidMatch,
@ -61,71 +67,25 @@ pub const Error = Container.Error || error{
MissingEndOfBlockCode,
};
pub fn init(input: *std.io.BufferedReader) Decompress {
pub fn init(input: *std.io.BufferedReader, container: Container) Decompress {
return .{
.input = input,
.hasher = .{},
.hasher = .init(container),
.lit_dec = .{},
.dst_dec = .{},
.bfinal = 0,
.block_type = 0b11,
.final_block = false,
.state = .protocol_header,
.read_err = {},
.read_err = null,
};
}
fn blockHeader(self: *Decompress) Error!void {
self.bfinal = try self.bits.read(u1);
self.block_type = try self.bits.read(u2);
}
fn storedBlock(self: *Decompress) !bool {
self.bits.alignToByte(); // skip padding until byte boundary
// everything after this is byte aligned in stored block
var len = try self.bits.read(u16);
const nlen = try self.bits.read(u16);
if (len != ~nlen) return error.WrongStoredBlockNlen;
while (len > 0) {
const buf = self.hist.getWritable(len);
try self.bits.readAll(buf);
len -= @intCast(buf.len);
}
return true;
}
fn fixedBlock(self: *Decompress) !bool {
while (!self.hist.full()) {
const code = try self.bits.readFixedCode();
switch (code) {
0...255 => self.hist.write(@intCast(code)),
256 => return true, // end of block
257...285 => try self.fixedDistanceCode(@intCast(code - 257)),
else => return error.InvalidCode,
}
}
return false;
}
// Handles fixed block non literal (length) code.
// Length code is followed by 5 bits of distance code.
fn fixedDistanceCode(self: *Decompress, code: u8) !void {
try self.bits.fill(5 + 5 + 13);
const length = try self.decodeLength(code);
const distance = try self.decodeDistance(try self.bits.readF(u5, .{
.buffered = true,
.reverse = true,
}));
try self.hist.writeMatch(length, distance);
}
fn decodeLength(self: *Decompress, code: u8) !u16 {
if (code > 28) return error.InvalidCode;
const ml = Token.matchLength(code);
return if (ml.extra_bits == 0) // 0 - 5 extra bits
ml.base
else
ml.base + try self.bits.readN(ml.extra_bits, .{ .buffered = true });
ml.base + try self.takeNBitsBuffered(ml.extra_bits);
}
fn decodeDistance(self: *Decompress, code: u8) !u16 {
@ -134,42 +94,7 @@ fn decodeDistance(self: *Decompress, code: u8) !u16 {
return if (md.extra_bits == 0) // 0 - 13 extra bits
md.base
else
md.base + try self.bits.readN(md.extra_bits, .{ .buffered = true });
}
fn dynamicBlockHeader(self: *Decompress) !void {
const hlit: u16 = @as(u16, try self.bits.read(u5)) + 257; // number of ll code entries present - 257
const hdist: u16 = @as(u16, try self.bits.read(u5)) + 1; // number of distance code entries - 1
const hclen: u8 = @as(u8, try self.bits.read(u4)) + 4; // hclen + 4 code lengths are encoded
if (hlit > 286 or hdist > 30)
return error.InvalidDynamicBlockHeader;
// lengths for code lengths
var cl_lens = [_]u4{0} ** 19;
for (0..hclen) |i| {
cl_lens[flate.huffman.codegen_order[i]] = try self.bits.read(u3);
}
var cl_dec: CodegenDecoder = .{};
try cl_dec.generate(&cl_lens);
// decoded code lengths
var dec_lens = [_]u4{0} ** (286 + 30);
var pos: usize = 0;
while (pos < hlit + hdist) {
const sym = try cl_dec.find(try self.bits.peekF(u7, .{ .reverse = true }));
try self.bits.shift(sym.code_bits);
pos += try self.dynamicCodeLength(sym.symbol, &dec_lens, pos);
}
if (pos > hlit + hdist) {
return error.InvalidDynamicBlockHeader;
}
// literal code lengths to literal decoder
try self.lit_dec.generate(dec_lens[0..hlit]);
// distance code lengths to distance decoder
try self.dst_dec.generate(dec_lens[hlit .. hlit + hdist]);
md.base + try self.takeNBitsBuffered(md.extra_bits);
}
// Decode code length symbol to code length. Writes decoded length into
@ -188,7 +113,7 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz
16 => {
// Copy the previous code length 3 - 6 times.
// The next 2 bits indicate repeat length
const n: u8 = @as(u8, try self.bits.read(u2)) + 3;
const n: u8 = @as(u8, try self.takeBits(u2)) + 3;
if (pos == 0 or pos + n > lens.len)
return error.InvalidDynamicBlockHeader;
for (0..n) |i| {
@ -197,188 +122,258 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz
return n;
},
// Repeat a code length of 0 for 3 - 10 times. (3 bits of length)
17 => return @as(u8, try self.bits.read(u3)) + 3,
17 => return @as(u8, try self.takeBits(u3)) + 3,
// Repeat a code length of 0 for 11 - 138 times (7 bits of length)
18 => return @as(u8, try self.bits.read(u7)) + 11,
18 => return @as(u8, try self.takeBits(u7)) + 11,
else => return error.InvalidDynamicBlockHeader,
}
}
// In larger archives most blocks are usually dynamic, so decompression
// performance depends on this function.
fn dynamicBlock(self: *Decompress) !bool {
// Hot path loop!
while (!self.hist.full()) {
// optimization so other bit reads can be buffered (avoiding one `if` in hot path)
try self.bits.fill(15);
const sym = try self.decodeSymbol(&self.lit_dec);
switch (sym.kind) {
.literal => self.hist.write(sym.symbol),
.match => {
// Decode match backreference <length, distance>
try self.bits.fill(5 + 15 + 13);
const length = try self.decodeLength(sym.symbol);
const dsm = try self.decodeSymbol(&self.dst_dec);
const distance = try self.decodeDistance(dsm.symbol);
try self.hist.writeMatch(length, distance);
},
.end_of_block => return true,
}
}
return false;
}
// Peek 15 bits from bits reader (maximum code len is 15 bits). Use
// decoder to find symbol for that code. We then know how many bits is
// used. Shift bit reader for that much bits, those bits are used. And
// return symbol.
fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol {
const sym = try decoder.find(try self.bits.peekF(u15, .{ .buffered = true, .reverse = true }));
try self.bits.shift(sym.code_bits);
const sym = try decoder.find(try self.peekBitsReverseBuffered(u15));
try self.shiftBits(sym.code_bits);
return sym;
}
fn step(self: *Decompress) !void {
switch (self.state) {
.protocol_header => {
try self.hasher.container().parseHeader(&self.bits);
self.state = .block_header;
},
.block_header => {
try self.blockHeader();
self.state = .block;
if (self.block_type == 2) try self.dynamicBlockHeader();
},
.block => {
const done = switch (self.block_type) {
0 => try self.storedBlock(),
1 => try self.fixedBlock(),
2 => try self.dynamicBlock(),
else => return error.InvalidBlockType,
};
if (done) {
self.state = if (self.bfinal == 1) .protocol_footer else .block_header;
}
},
.protocol_footer => {
self.bits.alignToByte();
try self.hasher.container().parseFooter(&self.hasher, &self.bits);
self.state = .end;
},
.end => {},
}
}
/// Replaces the inner reader with new reader.
pub fn setReader(self: *Decompress, new_reader: *std.io.BufferedReader) void {
self.bits.forward_reader = new_reader;
if (self.state == .end or self.state == .protocol_footer) {
self.state = .protocol_header;
}
}
// Reads all compressed data from the internal reader and outputs plain
// (uncompressed) data to the provided writer.
pub fn decompress(self: *Decompress, writer: *std.io.BufferedWriter) !void {
while (try self.next()) |buf| {
try writer.writeAll(buf);
}
}
/// Returns the number of bytes that have been read from the internal
/// reader but not yet consumed by the decompressor.
pub fn unreadBytes(self: Decompress) usize {
// There can be no error here: the denominator is not zero, and
// overflow is not possible since the type is unsigned.
return std.math.divCeil(usize, self.bits.nbits, 8) catch unreachable;
}
// Iterator interface
/// Can be used in iterator like loop without memcpy to another buffer:
/// while (try inflate.next()) |buf| { ... }
pub fn next(self: *Decompress) Error!?[]const u8 {
const out = try self.get(0);
if (out.len == 0) return null;
return out;
}
/// Returns decompressed data from internal sliding window buffer.
/// Returned buffer can be any length between 0 and `limit` bytes. 0
/// returned bytes means end of stream reached. With limit=0 returns as
/// much data it can. It newer will be more than 65536 bytes, which is
/// size of internal buffer.
/// TODO merge this logic into readerRead and readerReadVec
pub fn get(self: *Decompress, limit: usize) Error![]const u8 {
while (true) {
const out = self.hist.readAtMost(limit);
if (out.len > 0) {
self.hasher.update(out);
return out;
}
if (self.state == .end) return out;
try self.step();
}
}
fn readerRead(
pub fn read(
context: ?*anyopaque,
bw: *std.io.BufferedWriter,
limit: std.io.Reader.Limit,
) std.io.Reader.RwError!usize {
const self: *Decompress = @alignCast(@ptrCast(context));
const out = try bw.writableSliceGreedy(1);
const in = self.get(limit.minInt(out.len)) catch |err| switch (err) {
const d: *Decompress = @alignCast(@ptrCast(context));
return readInner(d, bw, limit) catch |err| switch (err) {
error.EndOfStream => return error.EndOfStream,
error.ReadFailed => return error.ReadFailed,
error.WriteFailed => return error.WriteFailed,
else => |e| {
self.read_err = e;
return error.ReadFailed;
},
};
if (in.len == 0) return error.EndOfStream;
@memcpy(out[0..in.len], in);
bw.advance(in.len);
return in.len;
}
fn readerReadVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize {
const self: *Decompress = @alignCast(@ptrCast(context));
return readVec(self, data) catch |err| switch (err) {
error.EndOfStream => return error.EndOfStream,
error.ReadFailed => return error.ReadFailed,
else => |e| {
self.read_err = e;
// In the event of an error, state is unmodified so that it can be
// better used to diagnose the failure.
d.read_err = e;
return error.ReadFailed;
},
};
}
fn readerDiscard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize {
fn readInner(
d: *Decompress,
bw: *std.io.BufferedWriter,
limit: std.io.Reader.Limit,
) (Error || error{ WriteFailed, EndOfStream })!usize {
const in = d.input;
sw: switch (d.state) {
.protocol_header => switch (d.hasher.container()) {
.gzip => {
const Header = extern struct {
magic: u16 align(1),
method: u8,
flags: packed struct(u8) {
text: bool,
hcrc: bool,
extra: bool,
name: bool,
comment: bool,
reserved: u3,
},
mtime: u32 align(1),
xfl: u8,
os: u8,
};
const header = try in.takeStructEndian(Header, .little);
if (header.magic != 0x8b1f or header.method != 0x08)
return error.BadGzipHeader;
if (header.flags.extra) {
const extra_len = try in.takeInt(u16, .little);
try in.discardAll(extra_len);
}
if (header.flags.name) {
try in.discardDelimiterInclusive(0);
}
if (header.flags.comment) {
try in.discardDelimiterInclusive(0);
}
if (header.flags.hcrc) {
try in.discardAll(2);
}
continue :sw .block_header;
},
.zlib => {
const Header = extern struct {
cmf: packed struct(u8) {
cm: u4,
cinfo: u4,
},
flg: u8,
};
const header = try in.takeStruct(Header);
if (header.cmf.cm != 8 or header.cmf.cinfo > 7) return error.BadZlibHeader;
continue :sw .block_header;
},
.raw => continue :sw .block_header,
},
.block_header => {
d.final_block = (try d.takeBits(u1)) != 0;
const block_type = try d.takeBits(BlockType);
switch (block_type) {
.stored => {
d.alignBitsToByte(); // skip padding until byte boundary
// everything after this is byte aligned in stored block
const len = try in.takeInt(u16, .little);
const nlen = try in.takeInt(u16, .little);
if (len != ~nlen) return error.WrongStoredBlockNlen;
continue :sw .{ .stored_block = len };
},
.fixed => continue :sw .fixed_block,
.dynamic => {
const hlit: u16 = @as(u16, try d.takeBits(u5)) + 257; // number of ll code entries present - 257
const hdist: u16 = @as(u16, try d.takeBits(u5)) + 1; // number of distance code entries - 1
const hclen: u8 = @as(u8, try d.takeBits(u4)) + 4; // hclen + 4 code lengths are encoded
if (hlit > 286 or hdist > 30)
return error.InvalidDynamicBlockHeader;
// lengths for code lengths
var cl_lens = [_]u4{0} ** 19;
for (0..hclen) |i| {
cl_lens[flate.huffman.codegen_order[i]] = try d.takeBits(u3);
}
var cl_dec: CodegenDecoder = .{};
try cl_dec.generate(&cl_lens);
// decoded code lengths
var dec_lens = [_]u4{0} ** (286 + 30);
var pos: usize = 0;
while (pos < hlit + hdist) {
const sym = try cl_dec.find(try d.peekBitsReverse(u7));
try d.shiftBits(sym.code_bits);
pos += try d.dynamicCodeLength(sym.symbol, &dec_lens, pos);
}
if (pos > hlit + hdist) {
return error.InvalidDynamicBlockHeader;
}
// literal code lengths to literal decoder
try d.lit_dec.generate(dec_lens[0..hlit]);
// distance code lengths to distance decoder
try d.dst_dec.generate(dec_lens[hlit .. hlit + hdist]);
continue :sw .dynamic_block;
},
}
},
.stored_block => |remaining_len| {
const out = try bw.writableSliceGreedyPreserving(flate.history_len, 1);
const limited_out = limit.min(.limited(remaining_len)).slice(out);
const n = try d.input.readVec(bw, &.{limited_out});
if (remaining_len - n == 0) {
d.state = if (d.final_block) .protocol_footer else .block_header;
} else {
d.state = .{ .stored_block = remaining_len - n };
}
bw.advance(n);
return n;
},
.fixed_block => {
const start = bw.count;
while (@intFromEnum(limit) > bw.count - start) {
const code = try d.readFixedCode();
switch (code) {
0...255 => try bw.writeBytePreserving(flate.history_len, @intCast(code)),
256 => {
d.state = if (d.final_block) .protocol_footer else .block_header;
return bw.count - start;
},
257...285 => {
// Handles fixed block non literal (length) code.
// Length code is followed by 5 bits of distance code.
const rebased_code = code - 257;
const length = try d.decodeLength(rebased_code);
const distance = try d.decodeDistance(try d.takeBitsReverseBuffered(u5));
try writeMatch(bw, length, distance);
},
else => return error.InvalidCode,
}
}
d.state = .fixed_block;
return bw.count - start;
},
.dynamic_block => {
// In larger archives most blocks are usually dynamic, so decompression
// performance depends on this logic.
const start = bw.count;
while (@intFromEnum(limit) > bw.count - start) {
const sym = try d.decodeSymbol(&d.lit_dec);
switch (sym.kind) {
.literal => d.hist.write(sym.symbol),
.match => {
// Decode match backreference <length, distance>
const length = try d.decodeLength(sym.symbol);
const dsm = try d.decodeSymbol(&d.dst_dec);
const distance = try d.decodeDistance(dsm.symbol);
try writeMatch(bw, length, distance);
},
.end_of_block => {
d.state = if (d.final_block) .protocol_footer else .block_header;
return bw.count - start;
},
}
}
d.state = .dynamic_block;
return bw.count - start;
},
.protocol_footer => {
d.alignBitsToByte();
switch (d.hasher.container()) {
.gzip => |*gzip| {
if (try reader.read(u32) != gzip.final()) return error.WrongGzipChecksum;
if (try reader.read(u32) != gzip.count) return error.WrongGzipSize;
},
.zlib => |*zlib| {
const chksum: u32 = @byteSwap(zlib.final());
if (try reader.read(u32) != chksum) return error.WrongZlibChecksum;
},
.raw => {},
}
d.state = .end;
return 0;
},
.end => return error.EndOfStream,
}
}
fn readVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize {
_ = context;
_ = data;
@panic("TODO remove readVec primitive");
}
fn discard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize {
_ = context;
_ = limit;
@panic("TODO");
// Problem here is we still need access to the output ring buffer.
@panic("TODO allow discard to be null");
}
pub fn readVec(self: *Decompress, data: []const []u8) Error!usize {
for (data) |out| {
if (out.len == 0) continue;
const in = try self.get(out.len);
@memcpy(out[0..in.len], in);
if (in.len == 0) return error.EndOfStream;
return in.len;
}
return 0;
/// Write match (back-reference to the same data slice) starting at `distance`
/// back from current write position, and `length` of bytes.
fn writeMatch(bw: *std.io.BufferedWriter, length: u16, distance: u16) !void {
_ = bw;
_ = length;
_ = distance;
@panic("TODO");
}
pub fn reader(self: *Decompress) std.io.Reader {
return .{
.context = self,
.vtable = &.{
.read = readerRead,
.readVec = readerReadVec,
.discard = readerDiscard,
.read = read,
.readVec = readVec,
.discard = discard,
},
};
}
@ -387,6 +382,43 @@ pub fn readable(self: *Decompress, buffer: []u8) std.io.BufferedReader {
return reader(self).buffered(buffer);
}
fn takeBits(d: *Decompress, comptime T: type) !T {
_ = d;
@panic("TODO");
}
fn takeNBitsBuffered(d: *Decompress, n: u4) !u16 {
_ = d;
_ = n;
@panic("TODO");
}
fn peekBitsReverse(d: *Decompress, comptime T: type) !T {
_ = d;
@panic("TODO");
}
fn peekBitsReverseBuffered(d: *Decompress, comptime T: type) !T {
_ = d;
@panic("TODO");
}
fn alignBitsToByte(d: *Decompress) void {
_ = d;
@panic("TODO");
}
fn shiftBits(d: *Decompress, n: u6) !void {
_ = d;
_ = n;
@panic("TODO");
}
fn readFixedCode(d: *Decompress) !u16 {
_ = d;
@panic("TODO");
}
pub const Symbol = packed struct {
pub const Kind = enum(u2) {
literal,
@ -712,12 +744,16 @@ test "decompress" {
},
};
for (cases) |c| {
var fb = std.io.fixedBufferStream(c.in);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
var fb: std.io.BufferedReader = undefined;
fb.initFixed(@constCast(c.in));
var aw: std.io.AllocatingWriter = undefined;
aw.init(testing.allocator);
defer aw.deinit();
try decompress(.raw, fb.reader(), al.writer());
try testing.expectEqualStrings(c.out, al.items);
var decompress: Decompress = .init(&fb, .raw);
var decompress_br = decompress.readable(&.{});
_ = try decompress_br.readRemaining(&aw.buffered_writer);
try testing.expectEqualStrings(c.out, aw.getWritten());
}
}
@ -769,12 +805,16 @@ test "gzip decompress" {
},
};
for (cases) |c| {
var fb = std.io.fixedBufferStream(c.in);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
var fb: std.io.BufferedReader = undefined;
fb.initFixed(@constCast(c.in));
var aw: std.io.AllocatingWriter = undefined;
aw.init(testing.allocator);
defer aw.deinit();
try decompress(.gzip, fb.reader(), al.writer());
try testing.expectEqualStrings(c.out, al.items);
var decompress: Decompress = .init(&fb, .gzip);
var decompress_br = decompress.readable(&.{});
_ = try decompress_br.readRemaining(&aw.buffered_writer);
try testing.expectEqualStrings(c.out, aw.getWritten());
}
}
@ -795,12 +835,16 @@ test "zlib decompress" {
},
};
for (cases) |c| {
var fb = std.io.fixedBufferStream(c.in);
var al = std.ArrayList(u8).init(testing.allocator);
defer al.deinit();
var fb: std.io.BufferedReader = undefined;
fb.initFixed(@constCast(c.in));
var aw: std.io.AllocatingWriter = undefined;
aw.init(testing.allocator);
defer aw.deinit();
try decompress(.zlib, fb.reader(), al.writer());
try testing.expectEqualStrings(c.out, al.items);
var decompress: Decompress = .init(&fb, .zlib);
var decompress_br = decompress.readable(&.{});
_ = try decompress_br.readRemaining(&aw.buffered_writer);
try testing.expectEqualStrings(c.out, aw.getWritten());
}
}
@ -853,16 +897,21 @@ test "fuzzing tests" {
};
inline for (cases, 0..) |c, case_no| {
var in = std.io.fixedBufferStream(@embedFile("testdata/fuzz/" ++ c.input ++ ".input"));
var out = std.ArrayList(u8).init(testing.allocator);
defer out.deinit();
var in: std.io.BufferedReader = undefined;
in.initFixed(@constCast(@embedFile("testdata/fuzz/" ++ c.input ++ ".input")));
var aw: std.io.AllocatingWriter = undefined;
aw.init(testing.allocator);
defer aw.deinit();
errdefer std.debug.print("test case failed {}\n", .{case_no});
var decompress: Decompress = .init(&in, .raw);
var decompress_br = decompress.readable(&.{});
if (c.err) |expected_err| {
try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer()));
try testing.expectError(error.ReadFailed, decompress_br.readRemaining(&aw.buffered_writer));
try testing.expectError(expected_err, decompress.read_err.?);
} else {
try decompress(.raw, in.reader(), out.writer());
try testing.expectEqualStrings(c.out, out.items);
_ = try decompress_br.readRemaining(&aw.buffered_writer);
try testing.expectEqualStrings(c.out, aw.getWritten());
}
}
}
@ -871,21 +920,28 @@ test "bug 18966" {
const input = @embedFile("testdata/fuzz/bug_18966.input");
const expect = @embedFile("testdata/fuzz/bug_18966.expect");
var in = std.io.fixedBufferStream(input);
var out = std.ArrayList(u8).init(testing.allocator);
defer out.deinit();
var in: std.io.BufferedReader = undefined;
in.initFixed(@constCast(input));
var aw: std.io.AllocatingWriter = undefined;
aw.init(testing.allocator);
defer aw.deinit();
try decompress(.gzip, in.reader(), out.writer());
try testing.expectEqualStrings(expect, out.items);
var decompress: Decompress = .init(&in, .gzip);
var decompress_br = decompress.readable(&.{});
_ = try decompress_br.readRemaining(&aw.buffered_writer);
try testing.expectEqualStrings(expect, aw.getWritten());
}
test "bug 19895" {
test "reading into empty buffer" {
// Inspired by https://github.com/ziglang/zig/issues/19895
const input = &[_]u8{
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
};
var in = std.io.fixedBufferStream(input);
var decomp = Decompress.init(.raw, in.reader());
var in: std.io.BufferedReader = undefined;
in.initFixed(@constCast(input));
var decomp: Decompress = .init(&in, .raw);
var decompress_br = decomp.readable(&.{});
var buf: [0]u8 = undefined;
try testing.expectEqual(0, try decomp.read(&buf));
try testing.expectEqual(0, try decompress_br.readVec(&.{&buf}));
}

View File

@ -10,7 +10,7 @@ const flate = @import("../flate.zig");
const Lookup = @This();
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
const chain_len = 2 * flate.history.len;
const chain_len = 2 * flate.history_len;
// Maps hash => first position
head: [flate.lookup.len]u16 = [_]u16{0} ** flate.lookup.len,

View File

@ -3,9 +3,10 @@ const testing = std.testing;
const xz = std.compress.xz;
fn decompress(data: []const u8) ![]u8 {
var in_stream = std.io.fixedBufferStream(data);
var in_stream: std.io.BufferedReader = undefined;
in_stream.initFixed(data);
var xz_stream = try xz.decompress(testing.allocator, in_stream.reader());
var xz_stream = try xz.decompress(testing.allocator, &in_stream);
defer xz_stream.deinit();
return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));

View File

@ -267,8 +267,8 @@ fn discard(context: ?*anyopaque, limit: Reader.Limit) Reader.Error!usize {
fn readVec(context: ?*anyopaque, data: []const []u8) Reader.Error!usize {
var bw: BufferedWriter = undefined;
bw.initFixed(data[0]);
return read(context, &bw, .limited(data[0].len)) catch |err| switch (err) {
bw.initVec(data);
return read(context, &bw, .countVec(data)) catch |err| switch (err) {
error.WriteFailed => unreachable,
else => |e| return e,
};

View File

@ -154,7 +154,8 @@ pub const Tag = struct {
test Tag {
const buf = [_]u8{0xa3};
var stream = std.io.fixedBufferStream(&buf);
var stream: std.io.BufferedReader = undefined;
stream.initFixed(&buf);
const t = Tag.decode(stream.reader());
try std.testing.expectEqual(Tag.init(@enumFromInt(3), true, .context_specific), t);
}
@ -184,8 +185,8 @@ pub const Element = struct {
/// - Ensures length is within `bytes`
/// - Ensures length is less than `std.math.maxInt(Index)`
pub fn decode(bytes: []const u8, index: Index) DecodeError!Element {
var stream = std.io.fixedBufferStream(bytes[index..]);
var reader = stream.reader();
var reader: std.io.BufferedReader = undefined;
reader.initFixed(bytes[index..]);
const tag = try Tag.decode(reader);
const size_or_len_size = try reader.readByte();

View File

@ -1227,8 +1227,8 @@ fn read(context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: Reader.Limit) R
fn readVec(context: ?*anyopaque, data: []const []u8) Reader.Error!usize {
var bw: std.io.BufferedWriter = undefined;
bw.initFixed(data[0]);
return read(context, &bw, .limited(data[0].len)) catch |err| switch (err) {
bw.initVec(data);
return read(context, &bw, .countVec(data)) catch |err| switch (err) {
error.WriteFailed => unreachable,
else => |e| return e,
};

View File

@ -2241,7 +2241,7 @@ pub const ElfModule = struct {
if (chdr.ch_type != .ZLIB) continue;
const ch_size = chdr.ch_size;
var zlib_stream: std.compress.zlib.Decompressor = .init(&section_reader);
var zlib_stream: std.compress.flate.Decompress = .init(&section_reader, .zlib);
const decompressed_section = zlib_stream.reader().readRemainingAlloc(gpa, .limited(ch_size)) catch continue;
if (decompressed_section.len != ch_size) {

View File

@ -67,7 +67,7 @@ pub fn parseDbiStream(self: *Pdb) !void {
return error.InvalidDebugInfo;
const reader = stream.reader();
const header = try reader.readStruct(std.pdb.DbiStreamHeader);
const header = try reader.takeStruct(std.pdb.DbiStreamHeader);
if (header.version_header != 19990903) // V70, only value observed by LLVM team
return error.UnknownPDBVersion;
// if (header.Age != age)
@ -82,7 +82,7 @@ pub fn parseDbiStream(self: *Pdb) !void {
// Module Info Substream
var mod_info_offset: usize = 0;
while (mod_info_offset != mod_info_size) {
const mod_info = try reader.readStruct(pdb.ModInfo);
const mod_info = try reader.takeStruct(pdb.ModInfo);
var this_record_len: usize = @sizeOf(pdb.ModInfo);
const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024);
@ -131,7 +131,7 @@ pub fn parseDbiStream(self: *Pdb) !void {
}
while (sect_cont_offset != section_contrib_size) {
const entry = try sect_contribs.addOne();
entry.* = try reader.readStruct(pdb.SectionContribEntry);
entry.* = try reader.takeStruct(pdb.SectionContribEntry);
sect_cont_offset += @sizeOf(pdb.SectionContribEntry);
if (sect_cont_offset > section_contrib_size)
@ -175,7 +175,7 @@ pub fn parseInfoStream(self: *Pdb) !void {
return cap * 2 / 3 + 1;
}
};
const hash_tbl_hdr = try reader.readStruct(HashTableHeader);
const hash_tbl_hdr = try reader.takeStruct(HashTableHeader);
if (hash_tbl_hdr.capacity == 0)
return error.InvalidDebugInfo;
@ -397,7 +397,7 @@ const Msf = struct {
fn init(allocator: Allocator, file: File) !Msf {
const in = file.reader();
const superblock = try in.readStruct(pdb.SuperBlock);
const superblock = try in.takeStruct(pdb.SuperBlock);
// Sanity checks
if (!std.mem.eql(u8, &superblock.file_magic, pdb.SuperBlock.expect_magic))

View File

@ -494,30 +494,39 @@ pub const Header = struct {
shnum: u16,
shstrndx: u16,
pub fn program_header_iterator(self: Header, parse_source: anytype) ProgramHeaderIterator(@TypeOf(parse_source)) {
return ProgramHeaderIterator(@TypeOf(parse_source)){
.elf_header = self,
.parse_source = parse_source,
pub fn iterateProgramHeaders(h: Header, file_reader: *std.fs.File.Reader) ProgramHeaderIterator {
return .{
.elf_header = h,
.file_reader = file_reader,
};
}
pub fn section_header_iterator(self: Header, parse_source: anytype) SectionHeaderIterator(@TypeOf(parse_source)) {
return SectionHeaderIterator(@TypeOf(parse_source)){
.elf_header = self,
.parse_source = parse_source,
pub fn iterateSectionHeaders(h: Header, file_reader: *std.fs.File.Reader) SectionHeaderIterator {
return .{
.elf_header = h,
.file_reader = file_reader,
};
}
pub fn read(parse_source: anytype) !Header {
var hdr_buf: [@sizeOf(Elf64_Ehdr)]u8 align(@alignOf(Elf64_Ehdr)) = undefined;
try parse_source.seekableStream().seekTo(0);
try parse_source.reader().readNoEof(&hdr_buf);
return Header.parse(&hdr_buf);
pub const ReadError = std.io.Reader.Error || ParseError;
pub fn read(br: *std.io.BufferedReader) ReadError!Header {
const buf = try br.peek(@sizeOf(Elf64_Ehdr));
const result = try parse(@ptrCast(buf));
br.toss(if (result.is_64) @sizeOf(Elf64_Ehdr) else @sizeOf(Elf32_Ehdr));
return result;
}
pub fn parse(hdr_buf: *align(@alignOf(Elf64_Ehdr)) const [@sizeOf(Elf64_Ehdr)]u8) !Header {
const hdr32 = @as(*const Elf32_Ehdr, @ptrCast(hdr_buf));
const hdr64 = @as(*const Elf64_Ehdr, @ptrCast(hdr_buf));
pub const ParseError = error{
InvalidElfMagic,
InvalidElfVersion,
InvalidElfClass,
InvalidElfEndian,
};
pub fn parse(hdr_buf: *align(@alignOf(Elf64_Ehdr)) const [@sizeOf(Elf64_Ehdr)]u8) ParseError!Header {
const hdr32: *const Elf32_Ehdr = @ptrCast(hdr_buf);
const hdr64: *const Elf64_Ehdr = @ptrCast(hdr_buf);
if (!mem.eql(u8, hdr32.e_ident[0..4], MAGIC)) return error.InvalidElfMagic;
if (hdr32.e_ident[EI_VERSION] != 1) return error.InvalidElfVersion;
@ -541,19 +550,19 @@ pub const Header = struct {
// The meaning of this value depends on `os_abi` so just make it available as `u8`.
const abi_version = hdr32.e_ident[EI_ABIVERSION];
const @"type" = if (need_bswap) blk: {
const @"type": ET = if (need_bswap) blk: {
comptime assert(!@typeInfo(ET).@"enum".is_exhaustive);
const value = @intFromEnum(hdr32.e_type);
break :blk @as(ET, @enumFromInt(@byteSwap(value)));
break :blk @enumFromInt(@byteSwap(value));
} else hdr32.e_type;
const machine = if (need_bswap) blk: {
const machine: EM = if (need_bswap) blk: {
comptime assert(!@typeInfo(EM).@"enum".is_exhaustive);
const value = @intFromEnum(hdr32.e_machine);
break :blk @as(EM, @enumFromInt(@byteSwap(value)));
break :blk @enumFromInt(@byteSwap(value));
} else hdr32.e_machine;
return @as(Header, .{
return .{
.is_64 = is_64,
.endian = endian,
.os_abi = os_abi,
@ -568,111 +577,91 @@ pub const Header = struct {
.shentsize = int(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize),
.shnum = int(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum),
.shstrndx = int(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx),
});
};
}
};
pub fn ProgramHeaderIterator(comptime ParseSource: anytype) type {
return struct {
elf_header: Header,
parse_source: ParseSource,
index: usize = 0,
pub const ProgramHeaderIterator = struct {
elf_header: Header,
file_reader: *std.fs.File.Reader,
index: usize = 0,
pub fn next(self: *@This()) !?Elf64_Phdr {
if (self.index >= self.elf_header.phnum) return null;
defer self.index += 1;
pub fn next(it: *ProgramHeaderIterator) !?Elf64_Phdr {
if (it.index >= it.elf_header.phnum) return null;
defer it.index += 1;
if (self.elf_header.is_64) {
var phdr: Elf64_Phdr = undefined;
const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.reader().readNoEof(mem.asBytes(&phdr));
// ELF endianness matches native endianness.
if (self.elf_header.endian == native_endian) return phdr;
// Convert fields to native endianness.
if (it.elf_header.is_64) {
var phdr: Elf64_Phdr = undefined;
const offset = it.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * it.index;
try it.file_reader.seekTo(offset);
var br = it.file_reader.readable(&.{});
try br.readSlice(@ptrCast(&phdr));
if (it.elf_header.endian != native_endian)
mem.byteSwapAllFields(Elf64_Phdr, &phdr);
return phdr;
}
var phdr: Elf32_Phdr = undefined;
const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.reader().readNoEof(mem.asBytes(&phdr));
// ELF endianness does NOT match native endianness.
if (self.elf_header.endian != native_endian) {
// Convert fields to native endianness.
mem.byteSwapAllFields(Elf32_Phdr, &phdr);
}
// Convert 32-bit header to 64-bit.
return Elf64_Phdr{
.p_type = phdr.p_type,
.p_offset = phdr.p_offset,
.p_vaddr = phdr.p_vaddr,
.p_paddr = phdr.p_paddr,
.p_filesz = phdr.p_filesz,
.p_memsz = phdr.p_memsz,
.p_flags = phdr.p_flags,
.p_align = phdr.p_align,
};
return phdr;
}
};
}
pub fn SectionHeaderIterator(comptime ParseSource: anytype) type {
return struct {
elf_header: Header,
parse_source: ParseSource,
index: usize = 0,
var phdr: Elf32_Phdr = undefined;
const offset = it.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * it.index;
try it.file_reader.seekTo(offset);
var br = it.file_reader.readable(&.{});
try br.readSlice(@ptrCast(&phdr));
if (it.elf_header.endian != native_endian)
mem.byteSwapAllFields(Elf32_Phdr, &phdr);
return .{
.p_type = phdr.p_type,
.p_offset = phdr.p_offset,
.p_vaddr = phdr.p_vaddr,
.p_paddr = phdr.p_paddr,
.p_filesz = phdr.p_filesz,
.p_memsz = phdr.p_memsz,
.p_flags = phdr.p_flags,
.p_align = phdr.p_align,
};
}
};
pub fn next(self: *@This()) !?Elf64_Shdr {
if (self.index >= self.elf_header.shnum) return null;
defer self.index += 1;
pub const SectionHeaderIterator = struct {
elf_header: Header,
file_reader: *std.fs.File.Reader,
index: usize = 0,
if (self.elf_header.is_64) {
var shdr: Elf64_Shdr = undefined;
const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.reader().readNoEof(mem.asBytes(&shdr));
pub fn next(it: *SectionHeaderIterator) !?Elf64_Shdr {
if (it.index >= it.elf_header.shnum) return null;
defer it.index += 1;
// ELF endianness matches native endianness.
if (self.elf_header.endian == native_endian) return shdr;
// Convert fields to native endianness.
if (it.elf_header.is_64) {
var shdr: Elf64_Shdr = undefined;
const offset = it.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * it.index;
try it.file_reader.seekTo(offset);
var br = it.file_reader.readable(&.{});
try br.readSlice(@ptrCast(&shdr));
if (it.elf_header.endian != native_endian)
mem.byteSwapAllFields(Elf64_Shdr, &shdr);
return shdr;
}
var shdr: Elf32_Shdr = undefined;
const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.reader().readNoEof(mem.asBytes(&shdr));
// ELF endianness does NOT match native endianness.
if (self.elf_header.endian != native_endian) {
// Convert fields to native endianness.
mem.byteSwapAllFields(Elf32_Shdr, &shdr);
}
// Convert 32-bit header to 64-bit.
return Elf64_Shdr{
.sh_name = shdr.sh_name,
.sh_type = shdr.sh_type,
.sh_flags = shdr.sh_flags,
.sh_addr = shdr.sh_addr,
.sh_offset = shdr.sh_offset,
.sh_size = shdr.sh_size,
.sh_link = shdr.sh_link,
.sh_info = shdr.sh_info,
.sh_addralign = shdr.sh_addralign,
.sh_entsize = shdr.sh_entsize,
};
return shdr;
}
};
}
var shdr: Elf32_Shdr = undefined;
const offset = it.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * it.index;
try it.file_reader.seekTo(offset);
var br = it.file_reader.readable(&.{});
try br.readSlice(@ptrCast(&shdr));
if (it.elf_header.endian != native_endian)
mem.byteSwapAllFields(Elf32_Shdr, &shdr);
return .{
.sh_name = shdr.sh_name,
.sh_type = shdr.sh_type,
.sh_flags = shdr.sh_flags,
.sh_addr = shdr.sh_addr,
.sh_offset = shdr.sh_offset,
.sh_size = shdr.sh_size,
.sh_link = shdr.sh_link,
.sh_info = shdr.sh_info,
.sh_addralign = shdr.sh_addralign,
.sh_entsize = shdr.sh_entsize,
};
}
};
fn int(is_64: bool, need_bswap: bool, int_32: anytype, int_64: anytype) @TypeOf(int_64) {
if (is_64) {

View File

@ -828,8 +828,7 @@ pub const BufPrintError = error{
NoSpaceLeft,
};
/// Print a Formatter string into `buf`. Actually just a thin wrapper around `format` and `fixedBufferStream`.
/// Returns a slice of the bytes printed to.
/// Print a Formatter string into `buf`. Returns a slice of the bytes printed.
pub fn bufPrint(buf: []u8, comptime fmt: []const u8, args: anytype) BufPrintError![]u8 {
var bw: std.io.BufferedWriter = undefined;
bw.initFixed(buf);
@ -1015,18 +1014,18 @@ test "int.padded" {
test "buffer" {
{
var buf1: [32]u8 = undefined;
var fbs = std.io.fixedBufferStream(&buf1);
var bw = fbs.writer();
var bw: std.io.BufferedWriter = undefined;
bw.initFixed(&buf1);
try bw.printValue("", .{}, 1234, std.options.fmt_max_depth);
try std.testing.expectEqualStrings("1234", fbs.getWritten());
try std.testing.expectEqualStrings("1234", bw.getWritten());
fbs.reset();
bw.initFixed(&buf1);
try bw.printValue("c", .{}, 'a', std.options.fmt_max_depth);
try std.testing.expectEqualStrings("a", fbs.getWritten());
try std.testing.expectEqualStrings("a", bw.getWritten());
fbs.reset();
bw.initFixed(&buf1);
try bw.printValue("b", .{}, 0b1100, std.options.fmt_max_depth);
try std.testing.expectEqualStrings("1100", fbs.getWritten());
try std.testing.expectEqualStrings("1100", bw.getWritten());
}
}

View File

@ -9,6 +9,66 @@ const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const Alignment = std.mem.Alignment;
pub const Limit = enum(usize) {
nothing = 0,
unlimited = std.math.maxInt(usize),
_,
/// `std.math.maxInt(usize)` is interpreted to mean `.unlimited`.
pub fn limited(n: usize) Limit {
return @enumFromInt(n);
}
pub fn countVec(data: []const []const u8) Limit {
var total: usize = 0;
for (data) |d| total += d.len;
return .limited(total);
}
pub fn min(a: Limit, b: Limit) Limit {
return @enumFromInt(@min(@intFromEnum(a), @intFromEnum(b)));
}
pub fn minInt(l: Limit, n: usize) usize {
return @min(n, @intFromEnum(l));
}
pub fn slice(l: Limit, s: []u8) []u8 {
return s[0..l.minInt(s.len)];
}
pub fn sliceConst(l: Limit, s: []const u8) []const u8 {
return s[0..l.minInt(s.len)];
}
pub fn toInt(l: Limit) ?usize {
return switch (l) {
else => @intFromEnum(l),
.unlimited => null,
};
}
/// Reduces a slice to account for the limit, leaving room for one extra
/// byte above the limit, allowing for the use case of differentiating
/// between end-of-stream and reaching the limit.
pub fn slice1(l: Limit, non_empty_buffer: []u8) []u8 {
assert(non_empty_buffer.len >= 1);
return non_empty_buffer[0..@min(@intFromEnum(l) +| 1, non_empty_buffer.len)];
}
pub fn nonzero(l: Limit) bool {
return @intFromEnum(l) > 0;
}
/// Return a new limit reduced by `amount` or return `null` indicating
/// limit would be exceeded.
pub fn subtract(l: Limit, amount: usize) ?Limit {
if (l == .unlimited) return .unlimited;
if (amount > @intFromEnum(l)) return null;
return @enumFromInt(@intFromEnum(l) - amount);
}
};
pub const Reader = @import("io/Reader.zig");
pub const Writer = @import("io/Writer.zig");

View File

@ -130,7 +130,7 @@ pub fn clearRetainingCapacity(aw: *AllocatingWriter) void {
}
fn writeSplat(context: ?*anyopaque, data: []const []const u8, splat: usize) std.io.Writer.Error!usize {
if (data.len == 0 and splat == 0) return 0;
assert(data.len != 0);
const aw: *AllocatingWriter = @alignCast(@ptrCast(context));
const start_len = aw.written.len;
const bw = &aw.buffered_writer;

View File

@ -253,6 +253,12 @@ pub fn toss(br: *BufferedReader, n: usize) void {
assert(br.seek <= br.end);
}
/// Equivalent to `toss(br.bufferedLen())`.
pub fn tossAll(br: *BufferedReader) void {
br.seek = 0;
br.end = 0;
}
/// Equivalent to `peek` followed by `toss`.
///
/// The data returned is invalidated by the next call to `take`, `peek`,
@ -791,8 +797,9 @@ pub fn fill(br: *BufferedReader, n: usize) Reader.Error!void {
}
}
/// Fills the buffer with at least one more byte of data, without advancing the
/// seek position, doing exactly one underlying read.
/// Without advancing the seek position, does exactly one underlying read, filling the buffer as
/// much as possible. This may result in zero bytes added to the buffer, which is not an end of
/// stream condition. End of stream is communicated via returning `error.EndOfStream`.
///
/// Asserts buffer capacity is at least 1.
pub fn fillMore(br: *BufferedReader) Reader.Error!void {

View File

@ -14,14 +14,17 @@ vtable: *const VTable,
pub const VTable = struct {
/// Writes bytes from the internally tracked stream position to `bw`.
///
/// Returns the number of bytes written, which will be at minimum `0` and at
/// most `limit`. The number of bytes read, including zero, does not
/// indicate end of stream.
/// Returns the number of bytes written, which will be at minimum `0` and
/// at most `limit`. The number returned, including zero, does not indicate
/// end of stream. `limit` is guaranteed to be at least as large as the
/// buffer capacity of `bw`.
///
/// If the reader has an internal seek position, it moves forward in
/// accordance with the number of bytes return from this function.
/// The reader's internal logical seek position moves forward in accordance
/// with the number of bytes returned from this function.
///
/// The implementation should do a maximum of one underlying read call.
/// Implementations are encouraged to utilize mandatory minimum buffer
/// sizes combined with short reads (returning a value less than `limit`)
/// in order to minimize complexity.
read: *const fn (context: ?*anyopaque, bw: *BufferedWriter, limit: Limit) RwError!usize,
/// Writes bytes from the internally tracked stream position to `data`.
@ -30,10 +33,12 @@ pub const VTable = struct {
/// at most the sum of each data slice length. The number of bytes read,
/// including zero, does not indicate end of stream.
///
/// If the reader has an internal seek position, it moves forward in
/// accordance with the number of bytes return from this function.
/// The reader's internal logical seek position moves forward in accordance
/// with the number of bytes returned from this function.
///
/// The implementation should do a maximum of one underlying read call.
/// Implementations are encouraged to utilize mandatory minimum buffer
/// sizes combined with short reads (returning a value less than the total
/// buffer capacity inside `data`) in order to minimize complexity.
readVec: *const fn (context: ?*anyopaque, data: []const []u8) Error!usize,
/// Consumes bytes from the internally tracked stream position without
@ -43,10 +48,12 @@ pub const VTable = struct {
/// at most `limit`. The number of bytes returned, including zero, does not
/// indicate end of stream.
///
/// If the reader has an internal seek position, it moves forward in
/// accordance with the number of bytes return from this function.
/// The reader's internal logical seek position moves forward in accordance
/// with the number of bytes returned from this function.
///
/// The implementation should do a maximum of one underlying read call.
/// Implementations are encouraged to utilize mandatory minimum buffer
/// sizes combined with short reads (returning a value less than `limit`)
/// in order to minimize complexity.
discard: *const fn (context: ?*anyopaque, limit: Limit) Error!usize,
};
@ -78,59 +85,8 @@ pub const ShortError = error{
ReadFailed,
};
pub const Limit = enum(usize) {
nothing = 0,
unlimited = std.math.maxInt(usize),
_,
/// `std.math.maxInt(usize)` is interpreted to mean `.unlimited`.
pub fn limited(n: usize) Limit {
return @enumFromInt(n);
}
pub fn min(a: Limit, b: Limit) Limit {
return @enumFromInt(@min(@intFromEnum(a), @intFromEnum(b)));
}
pub fn minInt(l: Limit, n: usize) usize {
return @min(n, @intFromEnum(l));
}
pub fn slice(l: Limit, s: []u8) []u8 {
return s[0..l.minInt(s.len)];
}
pub fn sliceConst(l: Limit, s: []const u8) []const u8 {
return s[0..l.minInt(s.len)];
}
pub fn toInt(l: Limit) ?usize {
return switch (l) {
else => @intFromEnum(l),
.unlimited => null,
};
}
/// Reduces a slice to account for the limit, leaving room for one extra
/// byte above the limit, allowing for the use case of differentiating
/// between end-of-stream and reaching the limit.
pub fn slice1(l: Limit, non_empty_buffer: []u8) []u8 {
assert(non_empty_buffer.len >= 1);
return non_empty_buffer[0..@min(@intFromEnum(l) +| 1, non_empty_buffer.len)];
}
pub fn nonzero(l: Limit) bool {
return @intFromEnum(l) > 0;
}
/// Return a new limit reduced by `amount` or return `null` indicating
/// limit would be exceeded.
pub fn subtract(l: Limit, amount: usize) ?Limit {
if (l == .unlimited) return .unlimited;
if (amount > @intFromEnum(l)) return null;
return @enumFromInt(@intFromEnum(l) - amount);
}
};
/// TODO: no pub
pub const Limit = std.io.Limit;
pub fn read(r: Reader, bw: *BufferedWriter, limit: Limit) RwError!usize {
const before = bw.count;

View File

@ -22,7 +22,7 @@ fn passthruRead(context: ?*anyopaque, bw: *BufferedWriter, limit: Reader.Limit)
const l: *Limited = @alignCast(@ptrCast(context));
const combined_limit = limit.min(l.remaining);
const n = try l.unlimited_reader.read(bw, combined_limit);
l.remaining.subtract(n);
l.remaining = l.remaining.subtract(n).?;
return n;
}
@ -30,24 +30,24 @@ fn passthruDiscard(context: ?*anyopaque, limit: Reader.Limit) Reader.Error!usize
const l: *Limited = @alignCast(@ptrCast(context));
const combined_limit = limit.min(l.remaining);
const n = try l.unlimited_reader.discard(combined_limit);
l.remaining.subtract(n);
l.remaining = l.remaining.subtract(n).?;
return n;
}
fn passthruReadVec(context: ?*anyopaque, data: []const []u8) Reader.Error!usize {
const l: *Limited = @alignCast(@ptrCast(context));
if (data.len == 0) return 0;
if (data[0].len >= @intFromEnum(l.limit)) {
const n = try l.unlimited_reader.readVec(&.{l.limit.slice(data[0])});
l.remaining.subtract(n);
if (data[0].len >= @intFromEnum(l.remaining)) {
const n = try l.unlimited_reader.readVec(&.{l.remaining.slice(data[0])});
l.remaining = l.remaining.subtract(n).?;
return n;
}
var total: usize = 0;
for (data, 0..) |buf, i| {
total += buf.len;
if (total > @intFromEnum(l.limit)) {
if (total > @intFromEnum(l.remaining)) {
const n = try l.unlimited_reader.readVec(data[0..i]);
l.remaining.subtract(n);
l.remaining = l.remaining.subtract(n).?;
return n;
}
}

View File

@ -68,7 +68,8 @@ pub const FileError = std.fs.File.PReadError || error{
Unimplemented,
};
pub const Limit = std.io.Reader.Limit;
/// TODO: no pub
pub const Limit = std.io.Limit;
pub const Offset = enum(u64) {
zero = 0,

View File

@ -79,30 +79,6 @@ pub const File = extern struct {
VolumeFull,
};
pub const SeekableStream = io.SeekableStream(
*File,
SeekError,
SeekError,
setPosition,
seekBy,
getPosition,
getEndPos,
);
pub const Reader = io.Reader(*File, ReadError, read);
pub const Writer = io.Writer(*File, WriteError, write);
pub fn seekableStream(self: *File) SeekableStream {
return .{ .context = self };
}
pub fn reader(self: *File) Reader {
return .{ .context = self };
}
pub fn writer(self: *File) Writer {
return .{ .context = self };
}
pub fn open(
self: *const File,
file_name: [*:0]const u16,

View File

@ -609,6 +609,11 @@ pub const TmpDir = struct {
self.parent_dir.close();
self.* = undefined;
}
pub fn createFile(td: *TmpDir) std.fs.File {
_ = td;
@panic("TODO");
}
};
pub fn tmpDir(opts: std.fs.Dir.OpenOptions) TmpDir {

View File

@ -54,8 +54,8 @@ pub const Tz = struct {
},
};
pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz {
var legacy_header = try reader.readStruct(Header);
pub fn parse(allocator: std.mem.Allocator, reader: *std.io.BufferedReader) !Tz {
var legacy_header = try reader.takeStruct(Header);
if (!std.mem.eql(u8, &legacy_header.magic, "TZif")) return error.BadHeader;
if (legacy_header.version != 0 and legacy_header.version != '2' and legacy_header.version != '3') return error.BadVersion;
@ -70,7 +70,7 @@ pub const Tz = struct {
const skipv = legacy_header.counts.timecnt * 5 + legacy_header.counts.typecnt * 6 + legacy_header.counts.charcnt + legacy_header.counts.leapcnt * 8 + legacy_header.counts.isstdcnt + legacy_header.counts.isutcnt;
try reader.skipBytes(skipv, .{});
var header = try reader.readStruct(Header);
var header = try reader.takeStruct(Header);
if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader;
if (header.version != '2' and header.version != '3') return error.BadVersion;
if (builtin.target.cpu.arch.endian() != std.builtin.Endian.big) {
@ -215,9 +215,10 @@ pub const Tz = struct {
test "slim" {
const data = @embedFile("tz/asia_tokyo.tzif");
var in_stream = std.io.fixedBufferStream(data);
var in_stream: std.io.BufferedReader = undefined;
in_stream.initFixed(data);
var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader());
var tz = try std.Tz.parse(std.testing.allocator, &in_stream);
defer tz.deinit();
try std.testing.expectEqual(tz.transitions.len, 9);
@ -228,9 +229,10 @@ test "slim" {
test "fat" {
const data = @embedFile("tz/antarctica_davis.tzif");
var in_stream = std.io.fixedBufferStream(data);
var in_stream: std.io.BufferedReader = undefined;
in_stream.initFixed(data);
var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader());
var tz = try std.Tz.parse(std.testing.allocator, &in_stream);
defer tz.deinit();
try std.testing.expectEqual(tz.transitions.len, 8);
@ -241,9 +243,10 @@ test "fat" {
test "legacy" {
// Taken from Slackware 8.0, from 2001
const data = @embedFile("tz/europe_vatican.tzif");
var in_stream = std.io.fixedBufferStream(data);
var in_stream: std.io.BufferedReader = undefined;
in_stream.initFixed(data);
var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader());
var tz = try std.Tz.parse(std.testing.allocator, &in_stream);
defer tz.deinit();
try std.testing.expectEqual(tz.transitions.len, 170);

View File

@ -112,7 +112,7 @@ pub const EndRecord = extern struct {
return record;
}
pub const FindFileError = File.GetEndPosError || File.SeekError || error{
pub const FindFileError = File.GetEndPosError || File.SeekError || File.ReadError || error{
ZipNoEndRecord,
EndOfStream,
};
@ -138,6 +138,7 @@ pub const EndRecord = extern struct {
var br = fr.interface().unbuffered();
br.readSlice(read_buf) catch |err| switch (err) {
error.ReadFailed => return fr.err.?,
error.EndOfStream => return error.EndOfStream,
};
loaded_len = new_loaded_len;
}
@ -158,45 +159,83 @@ pub const EndRecord = extern struct {
}
};
/// Decompresses the given data from `reader` into `writer`. Stops early if more
/// than `uncompressed_size` bytes are processed and verifies that exactly that
/// number of bytes are decompressed. Returns the CRC-32 of the uncompressed data.
/// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method.
pub fn decompress(
method: CompressionMethod,
uncompressed_size: u64,
reader: *std.io.BufferedReader,
writer: *std.io.BufferedWriter,
compressed_remaining: *u64,
) !u32 {
var hash = std.hash.Crc32.init();
var total_uncompressed: u64 = 0;
switch (method) {
.store => {
reader.writeAll(writer, .limited(compressed_remaining.*)) catch |err| switch (err) {
error.EndOfStream => return error.ZipDecompressTruncated,
else => |e| return e,
};
total_uncompressed += compressed_remaining.*;
},
.deflate => {
var decompressor: std.compress.flate.Decompressor = .init(reader);
while (try decompressor.next()) |chunk| {
try writer.writeAll(chunk);
hash.update(chunk);
total_uncompressed += @intCast(chunk.len);
if (total_uncompressed > uncompressed_size)
return error.ZipUncompressSizeTooSmall;
compressed_remaining.* -= chunk.len;
}
},
_ => return error.UnsupportedCompressionMethod,
}
if (total_uncompressed != uncompressed_size)
return error.ZipUncompressSizeMismatch;
pub const Decompress = union {
inflate: std.compress.flate.Decompress,
store: *std.io.BufferedReader,
return hash.final();
}
fn readable(
d: *Decompress,
reader: *std.io.BufferedReader,
method: CompressionMethod,
buffer: []u8,
) std.io.BufferedReader {
switch (method) {
.store => {
d.* = .{ .store = reader };
return .{
.unbuffered_reader = .{
.context = d,
.vtable = &.{
.read = readStore,
.readVec = readVecUnimplemented,
.discard = discardUnimplemented,
},
},
.buffer = buffer,
.end = 0,
.seek = 0,
};
},
.deflate => {
d.* = .{ .inflate = .init(reader, .raw) };
return .{
.unbuffered_reader = .{
.context = d,
.vtable = &.{
.read = readDeflate,
.readVec = readVecUnimplemented,
.discard = discardUnimplemented,
},
},
.buffer = buffer,
.end = 0,
.seek = 0,
};
},
else => unreachable,
}
}
fn readStore(
context: ?*anyopaque,
writer: *std.io.BufferedWriter,
limit: std.io.Limit,
) std.io.Reader.RwError!usize {
const d: *Decompress = @ptrCast(@alignCast(context));
return d.store.read(writer, limit);
}
fn readDeflate(
context: ?*anyopaque,
writer: *std.io.BufferedWriter,
limit: std.io.Limit,
) std.io.Reader.RwError!usize {
const d: *Decompress = @ptrCast(@alignCast(context));
return std.compress.flate.Decompress.read(&d.inflate, writer, limit);
}
fn readVecUnimplemented(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize {
_ = context;
_ = data;
@panic("TODO remove readVec primitive");
}
fn discardUnimplemented(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize {
_ = context;
_ = limit;
@panic("TODO allow discard to be null");
}
};
fn isBadFilename(filename: []const u8) bool {
if (filename.len == 0 or filename[0] == '/')
@ -299,8 +338,9 @@ pub const Iterator = struct {
return error.ZipTruncated;
try input.seekTo(stream_len - locator_end_offset);
var br = input.interface().unbuffered();
const locator = br.readStructEndian(EndLocator64, .little) catch |err| switch (err) {
const locator = br.takeStructEndian(EndLocator64, .little) catch |err| switch (err) {
error.ReadFailed => return input.err.?,
error.EndOfStream => return error.EndOfStream,
};
if (!std.mem.eql(u8, &locator.signature, &end_locator64_sig))
return error.ZipBadLocatorSig;
@ -311,8 +351,9 @@ pub const Iterator = struct {
try input.seekTo(locator.record_file_offset);
const record64 = br.readStructEndian(EndRecord64, .little) catch |err| switch (err) {
const record64 = br.takeStructEndian(EndRecord64, .little) catch |err| switch (err) {
error.ReadFailed => return input.err.?,
error.EndOfStream => return error.EndOfStream,
};
if (!std.mem.eql(u8, &record64.signature, &end_record64_sig))
@ -367,8 +408,9 @@ pub const Iterator = struct {
const input = self.input;
try input.seekTo(header_zip_offset);
var br = input.interface().unbuffered();
const header = br.readStructEndian(CentralDirectoryFileHeader, .little) catch |err| switch (err) {
const header = br.takeStructEndian(CentralDirectoryFileHeader, .little) catch |err| switch (err) {
error.ReadFailed => return input.err.?,
error.EndOfStream => return error.EndOfStream,
};
if (!std.mem.eql(u8, &header.signature, &central_file_header_sig))
return error.ZipBadCdOffset;
@ -399,6 +441,7 @@ pub const Iterator = struct {
try input.seekTo(header_zip_offset + @sizeOf(CentralDirectoryFileHeader) + header.filename_len);
br.readSlice(extra) catch |err| switch (err) {
error.ReadFailed => return input.err.?,
error.EndOfStream => return error.EndOfStream,
};
var extra_offset: usize = 0;
@ -454,20 +497,23 @@ pub const Iterator = struct {
) !u32 {
if (filename_buf.len < self.filename_len)
return error.ZipInsufficientBuffer;
switch (self.compression_method) {
.store, .deflate => {},
else => return error.UnsupportedCompressionMethod,
}
const filename = filename_buf[0..self.filename_len];
try stream.seekTo(self.header_zip_offset + @sizeOf(CentralDirectoryFileHeader));
{
const len = try stream.context.reader().readAll(filename);
if (len != filename.len)
return error.ZipBadFileOffset;
try stream.seekTo(self.header_zip_offset + @sizeOf(CentralDirectoryFileHeader));
var stream_br = stream.readable(&.{});
try stream_br.readSlice(filename);
}
const local_data_header_offset: u64 = local_data_header_offset: {
const local_header = blk: {
try stream.seekTo(self.file_offset);
break :blk try stream.context.reader().readStructEndian(LocalFileHeader, .little);
var read_buffer: [@sizeOf(LocalFileHeader)]u8 = undefined;
var stream_br = stream.readable(&read_buffer);
break :blk try stream_br.takeStructEndian(LocalFileHeader, .little);
};
if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig))
return error.ZipBadFileOffset;
@ -493,9 +539,8 @@ pub const Iterator = struct {
{
try stream.seekTo(self.file_offset + @sizeOf(LocalFileHeader) + local_header.filename_len);
const len = try stream.context.reader().readAll(extra);
if (len != extra.len)
return error.ZipTruncated;
var stream_br = stream.readable(&.{});
try stream_br.readSlice(extra);
}
var extra_offset: usize = 0;
@ -557,21 +602,31 @@ pub const Iterator = struct {
break :blk try dest.createFile(filename, .{ .exclusive = true });
};
defer out_file.close();
var file_writer = out_file.writer();
var file_bw = file_writer.writable(&.{});
const local_data_file_offset: u64 =
@as(u64, self.file_offset) +
@as(u64, @sizeOf(LocalFileHeader)) +
local_data_header_offset;
try stream.seekTo(local_data_file_offset);
var compressed_remaining: u64 = self.compressed_size;
const crc = try decompress(
self.compression_method,
self.uncompressed_size,
stream.context.reader(),
out_file.writer(),
&compressed_remaining,
);
if (compressed_remaining != 0) return error.ZipDecompressTruncated;
return crc;
var limited_file_reader = stream.interface().limited(.limited(self.compressed_size));
var file_read_buffer: [1000]u8 = undefined;
var decompress_read_buffer: [1000]u8 = undefined;
var limited_br = limited_file_reader.reader().buffered(&file_read_buffer);
var decompress: Decompress = undefined;
var decompress_br = decompress.readable(&limited_br, self.compression_method, &decompress_read_buffer);
const start_out = file_bw.count;
var hash_writer = file_bw.hashed(std.hash.Crc32.init());
var hash_bw = hash_writer.writable(&.{});
decompress_br.readAll(&hash_bw, .limited(self.uncompressed_size)) catch |err| switch (err) {
error.ReadFailed => return stream.err.?,
error.WriteFailed => return file_writer.err.?,
error.EndOfStream => return error.ZipDecompressTruncated,
};
if (limited_file_reader.remaining.nonzero()) return error.ZipDecompressTruncated;
const written = file_bw.count - start_out;
if (written != self.uncompressed_size) return error.ZipUncompressSizeMismatch;
return hash_writer.hasher.final();
}
};
};

View File

@ -33,8 +33,10 @@ fn expectFiles(
std.mem.replaceScalar(u8, normalized_sub_path, '\\', '/');
var file = try dir.openFile(normalized_sub_path, .{});
defer file.close();
var file_reader = file.reader();
var file_br = file_reader.readable(&.{});
var content_buf: [4096]u8 = undefined;
const n = try file.reader().readAll(&content_buf);
const n = try file_br.readSliceShort(&content_buf);
try testing.expectEqualStrings(test_file.content, content_buf[0..n]);
}
}
@ -49,24 +51,21 @@ const FileStore = struct {
uncompressed_size: usize,
};
fn makeZip(buf: []u8, files: []const File, options: WriteZipOptions) !std.io.BufferedReader {
fn makeZip(file_writer: *std.fs.File.Writer, files: []const File, options: WriteZipOptions) !std.io.BufferedReader {
const store = try std.testing.allocator.alloc(FileStore, files.len);
defer std.testing.allocator.free(store);
return makeZipWithStore(buf, files, options, store);
return makeZipWithStore(file_writer, files, options, store);
}
fn makeZipWithStore(
buf: []u8,
file_writer: *std.fs.File.Writer,
files: []const File,
options: WriteZipOptions,
store: []FileStore,
) !std.io.BufferedReader {
var out: std.io.BufferedWriter = undefined;
out.initFixed(buf);
try writeZip(&out, files, store, options);
var result: std.io.BufferedReader = undefined;
result.initFixed(buf[0..out.end]);
return result;
) !void {
var buffer: [200]u8 = undefined;
var bw = file_writer.writable(&buffer);
try writeZip(&bw, files, store, options);
}
const WriteZipOptions = struct {
@ -201,9 +200,12 @@ const Zipper = struct {
const offset = writer.count;
var br: std.io.BufferedReader = undefined;
br.initFixed(@constCast(opt.content));
try std.compress.flate.deflate.compress(.raw, &br, writer, .{});
var compress: std.compress.flate.Compress = .init(&br, .{});
var compress_br = compress.readable(&.{});
const n = try compress_br.readRemaining(writer);
assert(br.seek == opt.content.len);
compressed_size = @intCast(writer.count - offset);
try testing.expectEqual(n, writer.count - offset);
compressed_size = @intCast(n);
},
else => unreachable,
}
@ -306,21 +308,27 @@ fn testZipWithStore(
write_opt: WriteZipOptions,
store: []FileStore,
) !void {
var zip_buf: [4096]u8 = undefined;
var fbs = try makeZipWithStore(&zip_buf, test_files, write_opt, store);
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
try zip.extract(tmp.dir, fbs.seekableStream(), options);
var file = tmp.createFile();
defer file.close();
var file_writer = file.writer();
try makeZipWithStore(&file_writer, test_files, write_opt, store);
var file_reader = file_writer.moveToReader();
try zip.extract(tmp.dir, &file_reader, options);
try expectFiles(test_files, tmp.dir, .{});
}
fn testZipError(expected_error: anyerror, file: File, options: zip.ExtractOptions) !void {
var zip_buf: [4096]u8 = undefined;
var store: [1]FileStore = undefined;
var fbs = try makeZipWithStore(&zip_buf, &[_]File{file}, .{}, &store);
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
try testing.expectError(expected_error, zip.extract(tmp.dir, fbs.seekableStream(), options));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writer();
var store: [1]FileStore = undefined;
try makeZipWithStore(&file_writer, &[_]File{file}, .{}, &store);
var file_reader = file_writer.moveToReader();
try testing.expectError(expected_error, zip.extract(tmp.dir, &file_reader, options));
}
test "zip one file" {
@ -416,53 +424,93 @@ test "zip64" {
test "bad zip files" {
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
var zip_buf: [4096]u8 = undefined;
var buffer: [4096]u8 = undefined;
const file_a = [_]File{.{ .name = "a", .content = "", .compression = .store }};
{
var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .sig = [_]u8{ 1, 2, 3, 4 } } });
try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &.{}, .{ .end = .{ .sig = [_]u8{ 1, 2, 3, 4 } } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .comment_len = 1 } });
try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &.{}, .{ .end = .{ .comment_len = 1 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .comment = "a", .comment_len = 0 } });
try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &.{}, .{ .end = .{ .comment = "a", .comment_len = 0 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .disk_number = 1 } });
try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &.{}, .{ .end = .{ .disk_number = 1 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_disk_number = 1 } });
try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &.{}, .{ .end = .{ .central_directory_disk_number = 1 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .record_count_disk = 1 } });
try testing.expectError(error.ZipDiskRecordCountTooLarge, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &.{}, .{ .end = .{ .record_count_disk = 1 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipDiskRecordCountTooLarge, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_size = 1 } });
try testing.expectError(error.ZipCdOversized, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &.{}, .{ .end = .{ .central_directory_size = 1 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipCdOversized, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_size = 0 } });
try testing.expectError(error.ZipCdUndersized, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &file_a, .{ .end = .{ .central_directory_size = 0 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipCdUndersized, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_offset = 0 } });
try testing.expectError(error.ZipBadCdOffset, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &file_a, .{ .end = .{ .central_directory_offset = 0 } });
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipBadCdOffset, zip.extract(tmp.dir, &file_reader, .{}));
}
{
var fbs = try makeZip(&zip_buf, &file_a, .{
const tmp_file = tmp.createFile();
defer tmp_file.close();
var file_writer = tmp_file.writable(&buffer);
try makeZip(&file_writer, &file_a, .{
.end = .{
.zip64 = .{ .locator_sig = [_]u8{ 1, 2, 3, 4 } },
.central_directory_size = std.math.maxInt(u32), // trigger 64
},
});
try testing.expectError(error.ZipBadLocatorSig, zip.extract(tmp.dir, fbs.seekableStream(), .{}));
var file_reader = file_writer.moveToReader();
try testing.expectError(error.ZipBadLocatorSig, zip.extract(tmp.dir, &file_reader, .{}));
}
}

View File

@ -1357,8 +1357,7 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource.Git) anyerror!U
defer pack_dir.close();
var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true });
defer pack_file.close();
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(resource.fetch_stream.reader(), pack_file.writer());
_ = try resource.fetch_stream.reader().readRemaining(pack_file.writer());
try pack_file.sync();
var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });