From e76afef8477ae6f1df7c8132342aa6ddea51d4fb Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 6 May 2025 20:21:01 -0700 Subject: [PATCH] std.zip: operate on fs.File.Reader rather than anytype --- lib/std/io/BufferedReader.zig | 15 - lib/std/io/Writer/Null.zig | 2 +- lib/std/zip.zig | 895 ++++++++++++++-------------------- lib/std/zip/test.zig | 277 +++++++++-- lib/std/zon/parse.zig | 21 +- lib/std/zon/stringify.zig | 75 +-- 6 files changed, 649 insertions(+), 636 deletions(-) diff --git a/lib/std/io/BufferedReader.zig b/lib/std/io/BufferedReader.zig index 8f9c13c193..1369f69c1a 100644 --- a/lib/std/io/BufferedReader.zig +++ b/lib/std/io/BufferedReader.zig @@ -192,21 +192,6 @@ fn passthruReadVec(context: ?*anyopaque, data: []const []u8) Reader.Error!usize return readVecLimit(br, data, .unlimited); } -pub fn seekBy(br: *BufferedReader, seek_by: i64) !void { - if (seek_by < 0) try br.seekBackwardBy(@abs(seek_by)) else try br.seekForwardBy(@abs(seek_by)); -} - -pub fn seekBackwardBy(br: *BufferedReader, seek_by: u64) !void { - if (seek_by > br.end - br.seek) return error.Unseekable; // TODO - br.seek += @abs(seek_by); -} - -pub fn seekForwardBy(br: *BufferedReader, seek_by: u64) !void { - const seek, const need_unbuffered_seek = @subWithOverflow(br.seek, @abs(seek_by)); - if (need_unbuffered_seek > 0) return error.Unseekable; // TODO - br.seek = seek; -} - /// Returns the next `len` bytes from `unbuffered_reader`, filling the buffer as /// necessary. /// diff --git a/lib/std/io/Writer/Null.zig b/lib/std/io/Writer/Null.zig index 445484f315..4ae90844d3 100644 --- a/lib/std/io/Writer/Null.zig +++ b/lib/std/io/Writer/Null.zig @@ -35,7 +35,7 @@ fn writeFile( limit: Writer.Limit, headers_and_trailers: []const []const u8, headers_len: usize, -) Writer.Error!usize { +) Writer.FileError!usize { const nw: *NullWriter = @alignCast(@ptrCast(context)); var n: usize = 0; if (offset == .none) { diff --git a/lib/std/zip.zig b/lib/std/zip.zig index 66a7da1021..5226fe6909 100644 --- a/lib/std/zip.zig +++ b/lib/std/zip.zig @@ -5,11 +5,8 @@ const builtin = @import("builtin"); const std = @import("std"); -const testing = std.testing; - -pub const testutil = @import("zip/test.zig"); -const File = testutil.File; -const FileStore = testutil.FileStore; +const File = std.fs.File; +const is_le = builtin.target.cpu.arch.endian() == .little; pub const CompressionMethod = enum(u16) { store = 0, @@ -95,57 +92,71 @@ pub const EndRecord = extern struct { central_directory_size: u32 align(1), central_directory_offset: u32 align(1), comment_len: u16 align(1), + pub fn need_zip64(self: EndRecord) bool { return isMaxInt(self.record_count_disk) or isMaxInt(self.record_count_total) or isMaxInt(self.central_directory_size) or isMaxInt(self.central_directory_offset); } -}; -/// Find and return the end record for the given seekable zip stream. -/// Note that `seekable_stream` must be an instance of `std.io.SeekableStream` and -/// its context must also have a `.reader()` method that returns an instance of -/// `std.io.Reader`. -pub fn findEndRecord(seekable_stream: anytype, stream_len: u64) !EndRecord { - var buf: [@sizeOf(EndRecord) + std.math.maxInt(u16)]u8 = undefined; - const record_len_max = @min(stream_len, buf.len); - var loaded_len: u32 = 0; + pub const FindBufferError = error{ ZipNoEndRecord, ZipTruncated }; - var comment_len: u16 = 0; - while (true) { - const record_len: u32 = @as(u32, comment_len) + @sizeOf(EndRecord); - if (record_len > record_len_max) - return error.ZipNoEndRecord; - - if (record_len > loaded_len) { - const new_loaded_len = @min(loaded_len + 300, record_len_max); - const read_len = new_loaded_len - loaded_len; - - try seekable_stream.seekTo(stream_len - @as(u64, new_loaded_len)); - const read_buf: []u8 = buf[buf.len - new_loaded_len ..][0..read_len]; - const len = try seekable_stream.context.reader().readAll(read_buf); - if (len != read_len) - return error.ZipTruncated; - loaded_len = new_loaded_len; - } - - const record_bytes = buf[buf.len - record_len ..][0..@sizeOf(EndRecord)]; - if (std.mem.eql(u8, record_bytes[0..4], &end_record_sig) and - std.mem.readInt(u16, record_bytes[20..22], .little) == comment_len) - { - const record: *align(1) EndRecord = @ptrCast(record_bytes.ptr); - if (builtin.target.cpu.arch.endian() != .little) { - std.mem.byteSwapAllFields(@TypeOf(record.*), record); - } - return record.*; - } - - if (comment_len == std.math.maxInt(u16)) - return error.ZipNoEndRecord; - comment_len += 1; + /// TODO audit this logic + pub fn findBuffer(buffer: []const u8) FindBufferError!EndRecord { + const pos = std.mem.lastIndexOf(u8, buffer, &end_record_sig) orelse return error.ZipNoEndRecord; + if (pos + @sizeOf(EndRecord) > buffer.len) return error.EndOfStream; + const record_ptr: *EndRecord = @ptrCast(buffer[pos..][0..@sizeOf(EndRecord)]); + var record = record_ptr.*; + if (!is_le) std.mem.byteSwapAllFields(EndRecord, &record); + return record; } -} + + pub const FindFileError = File.GetEndPosError || File.SeekError || error{ + ZipNoEndRecord, + EndOfStream, + }; + + pub fn findFile(fr: *File.Reader) FindFileError!EndRecord { + const end_pos = try fr.getSize(); + + var buf: [@sizeOf(EndRecord) + std.math.maxInt(u16)]u8 = undefined; + const record_len_max = @min(end_pos, buf.len); + var loaded_len: u32 = 0; + var comment_len: u16 = 0; + while (true) { + const record_len: u32 = @as(u32, comment_len) + @sizeOf(EndRecord); + if (record_len > record_len_max) + return error.ZipNoEndRecord; + + if (record_len > loaded_len) { + const new_loaded_len = @min(loaded_len + 300, record_len_max); + const read_len = new_loaded_len - loaded_len; + + try fr.seekTo(end_pos - @as(u64, new_loaded_len)); + const read_buf: []u8 = buf[buf.len - new_loaded_len ..][0..read_len]; + var br = fr.interface().unbuffered(); + br.readSlice(read_buf) catch |err| switch (err) { + error.ReadFailed => return fr.err.?, + }; + loaded_len = new_loaded_len; + } + + const record_bytes = buf[buf.len - record_len ..][0..@sizeOf(EndRecord)]; + if (std.mem.eql(u8, record_bytes[0..4], &end_record_sig) and + std.mem.readInt(u16, record_bytes[20..22], .little) == comment_len) + { + const record: *align(1) EndRecord = @ptrCast(record_bytes.ptr); + if (!is_le) std.mem.byteSwapAllFields(EndRecord, record); + return record.*; + } + + if (comment_len == std.math.maxInt(u16)) + return error.ZipNoEndRecord; + comment_len += 1; + } + } +}; /// Decompresses the given data from `reader` into `writer`. Stops early if more /// than `uncompressed_size` bytes are processed and verifies that exactly that @@ -248,319 +259,322 @@ fn readZip64FileExtents(comptime T: type, header: T, extents: *FileExtents, data } } -pub fn Iterator(comptime SeekableStream: type) type { - return struct { - stream: SeekableStream, +pub const Iterator = struct { + input: *File.Reader, - cd_record_count: u64, - cd_zip_offset: u64, - cd_size: u64, + cd_record_count: u64, + cd_zip_offset: u64, + cd_size: u64, - cd_record_index: u64 = 0, - cd_record_offset: u64 = 0, + cd_record_index: u64 = 0, + cd_record_offset: u64 = 0, - const Self = @This(); + pub fn init(input: *File.Reader) !Iterator { + const end_record = try EndRecord.findFile(input); - pub fn init(stream: SeekableStream) !Self { - const stream_len = try stream.getEndPos(); + if (!isMaxInt(end_record.record_count_disk) and end_record.record_count_disk > end_record.record_count_total) + return error.ZipDiskRecordCountTooLarge; - const end_record = try findEndRecord(stream, stream_len); + if (end_record.disk_number != 0 or end_record.central_directory_disk_number != 0) + return error.ZipMultiDiskUnsupported; - if (!isMaxInt(end_record.record_count_disk) and end_record.record_count_disk > end_record.record_count_total) - return error.ZipDiskRecordCountTooLarge; - - if (end_record.disk_number != 0 or end_record.central_directory_disk_number != 0) + { + const counts_valid = !isMaxInt(end_record.record_count_disk) and !isMaxInt(end_record.record_count_total); + if (counts_valid and end_record.record_count_disk != end_record.record_count_total) return error.ZipMultiDiskUnsupported; - - { - const counts_valid = !isMaxInt(end_record.record_count_disk) and !isMaxInt(end_record.record_count_total); - if (counts_valid and end_record.record_count_disk != end_record.record_count_total) - return error.ZipMultiDiskUnsupported; - } - - var result = Self{ - .stream = stream, - .cd_record_count = end_record.record_count_total, - .cd_zip_offset = end_record.central_directory_offset, - .cd_size = end_record.central_directory_size, - }; - if (!end_record.need_zip64()) return result; - - const locator_end_offset: u64 = @as(u64, end_record.comment_len) + @sizeOf(EndRecord) + @sizeOf(EndLocator64); - if (locator_end_offset > stream_len) - return error.ZipTruncated; - try stream.seekTo(stream_len - locator_end_offset); - const locator = try stream.context.reader().readStructEndian(EndLocator64, .little); - if (!std.mem.eql(u8, &locator.signature, &end_locator64_sig)) - return error.ZipBadLocatorSig; - if (locator.zip64_disk_count != 0) - return error.ZipUnsupportedZip64DiskCount; - if (locator.total_disk_count != 1) - return error.ZipMultiDiskUnsupported; - - try stream.seekTo(locator.record_file_offset); - - const record64 = try stream.context.reader().readStructEndian(EndRecord64, .little); - - if (!std.mem.eql(u8, &record64.signature, &end_record64_sig)) - return error.ZipBadEndRecord64Sig; - - if (record64.end_record_size < @sizeOf(EndRecord64) - 12) - return error.ZipEndRecord64SizeTooSmall; - if (record64.end_record_size > @sizeOf(EndRecord64) - 12) - return error.ZipEndRecord64UnhandledExtraData; - - if (record64.version_needed_to_extract > 45) - return error.ZipUnsupportedVersion; - - { - const is_multidisk = record64.disk_number != 0 or - record64.central_directory_disk_number != 0 or - record64.record_count_disk != record64.record_count_total; - if (is_multidisk) - return error.ZipMultiDiskUnsupported; - } - - if (isMaxInt(end_record.record_count_total)) { - result.cd_record_count = record64.record_count_total; - } else if (end_record.record_count_total != record64.record_count_total) - return error.Zip64RecordCountTotalMismatch; - - if (isMaxInt(end_record.central_directory_offset)) { - result.cd_zip_offset = record64.central_directory_offset; - } else if (end_record.central_directory_offset != record64.central_directory_offset) - return error.Zip64CentralDirectoryOffsetMismatch; - - if (isMaxInt(end_record.central_directory_size)) { - result.cd_size = record64.central_directory_size; - } else if (end_record.central_directory_size != record64.central_directory_size) - return error.Zip64CentralDirectorySizeMismatch; - - return result; } - pub fn next(self: *Self) !?Entry { - if (self.cd_record_index == self.cd_record_count) { - if (self.cd_record_offset != self.cd_size) - return if (self.cd_size > self.cd_record_offset) - error.ZipCdOversized - else - error.ZipCdUndersized; - - return null; - } - - const header_zip_offset = self.cd_zip_offset + self.cd_record_offset; - try self.stream.seekTo(header_zip_offset); - const header = try self.stream.context.reader().readStructEndian(CentralDirectoryFileHeader, .little); - if (!std.mem.eql(u8, &header.signature, ¢ral_file_header_sig)) - return error.ZipBadCdOffset; - - self.cd_record_index += 1; - self.cd_record_offset += @sizeOf(CentralDirectoryFileHeader) + header.filename_len + header.extra_len + header.comment_len; - - // Note: checking the version_needed_to_extract doesn't seem to be helpful, i.e. the zip file - // at https://github.com/ninja-build/ninja/releases/download/v1.12.0/ninja-linux.zip - // has an undocumented version 788 but extracts just fine. - - if (header.flags.encrypted) - return error.ZipEncryptionUnsupported; - // TODO: check/verify more flags - if (header.disk_number != 0) - return error.ZipMultiDiskUnsupported; - - var extents: FileExtents = .{ - .uncompressed_size = header.uncompressed_size, - .compressed_size = header.compressed_size, - .local_file_header_offset = header.local_file_header_offset, - }; - - if (header.extra_len > 0) { - var extra_buf: [std.math.maxInt(u16)]u8 = undefined; - const extra = extra_buf[0..header.extra_len]; - - { - try self.stream.seekTo(header_zip_offset + @sizeOf(CentralDirectoryFileHeader) + header.filename_len); - const len = try self.stream.context.reader().readAll(extra); - if (len != extra.len) - return error.ZipTruncated; - } - - var extra_offset: usize = 0; - while (extra_offset + 4 <= extra.len) { - const header_id = std.mem.readInt(u16, extra[extra_offset..][0..2], .little); - const data_size = std.mem.readInt(u16, extra[extra_offset..][2..4], .little); - const end = extra_offset + 4 + data_size; - if (end > extra.len) - return error.ZipBadExtraFieldSize; - const data = extra[extra_offset + 4 .. end]; - switch (@as(ExtraHeader, @enumFromInt(header_id))) { - .zip64_info => try readZip64FileExtents(CentralDirectoryFileHeader, header, &extents, data), - else => {}, // ignore - } - extra_offset = end; - } - } - - return .{ - .version_needed_to_extract = header.version_needed_to_extract, - .flags = header.flags, - .compression_method = header.compression_method, - .last_modification_time = header.last_modification_time, - .last_modification_date = header.last_modification_date, - .header_zip_offset = header_zip_offset, - .crc32 = header.crc32, - .filename_len = header.filename_len, - .compressed_size = extents.compressed_size, - .uncompressed_size = extents.uncompressed_size, - .file_offset = extents.local_file_header_offset, - }; - } - - pub const Entry = struct { - version_needed_to_extract: u16, - flags: GeneralPurposeFlags, - compression_method: CompressionMethod, - last_modification_time: u16, - last_modification_date: u16, - header_zip_offset: u64, - crc32: u32, - filename_len: u32, - compressed_size: u64, - uncompressed_size: u64, - file_offset: u64, - - pub fn extract( - self: Entry, - stream: SeekableStream, - options: ExtractOptions, - filename_buf: []u8, - dest: std.fs.Dir, - ) !u32 { - if (filename_buf.len < self.filename_len) - return error.ZipInsufficientBuffer; - const filename = filename_buf[0..self.filename_len]; - - try stream.seekTo(self.header_zip_offset + @sizeOf(CentralDirectoryFileHeader)); - - { - const len = try stream.context.reader().readAll(filename); - if (len != filename.len) - return error.ZipBadFileOffset; - } - - const local_data_header_offset: u64 = local_data_header_offset: { - const local_header = blk: { - try stream.seekTo(self.file_offset); - break :blk try stream.context.reader().readStructEndian(LocalFileHeader, .little); - }; - if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig)) - return error.ZipBadFileOffset; - if (local_header.version_needed_to_extract != self.version_needed_to_extract) - return error.ZipMismatchVersionNeeded; - if (local_header.last_modification_time != self.last_modification_time) - return error.ZipMismatchModTime; - if (local_header.last_modification_date != self.last_modification_date) - return error.ZipMismatchModDate; - - if (@as(u16, @bitCast(local_header.flags)) != @as(u16, @bitCast(self.flags))) - return error.ZipMismatchFlags; - if (local_header.crc32 != 0 and local_header.crc32 != self.crc32) - return error.ZipMismatchCrc32; - var extents: FileExtents = .{ - .uncompressed_size = local_header.uncompressed_size, - .compressed_size = local_header.compressed_size, - .local_file_header_offset = 0, - }; - if (local_header.extra_len > 0) { - var extra_buf: [std.math.maxInt(u16)]u8 = undefined; - const extra = extra_buf[0..local_header.extra_len]; - - { - try stream.seekTo(self.file_offset + @sizeOf(LocalFileHeader) + local_header.filename_len); - const len = try stream.context.reader().readAll(extra); - if (len != extra.len) - return error.ZipTruncated; - } - - var extra_offset: usize = 0; - while (extra_offset + 4 <= local_header.extra_len) { - const header_id = std.mem.readInt(u16, extra[extra_offset..][0..2], .little); - const data_size = std.mem.readInt(u16, extra[extra_offset..][2..4], .little); - const end = extra_offset + 4 + data_size; - if (end > local_header.extra_len) - return error.ZipBadExtraFieldSize; - const data = extra[extra_offset + 4 .. end]; - switch (@as(ExtraHeader, @enumFromInt(header_id))) { - .zip64_info => try readZip64FileExtents(LocalFileHeader, local_header, &extents, data), - else => {}, // ignore - } - extra_offset = end; - } - } - - if (extents.compressed_size != 0 and - extents.compressed_size != self.compressed_size) - return error.ZipMismatchCompLen; - if (extents.uncompressed_size != 0 and - extents.uncompressed_size != self.uncompressed_size) - return error.ZipMismatchUncompLen; - - if (local_header.filename_len != self.filename_len) - return error.ZipMismatchFilenameLen; - - break :local_data_header_offset @as(u64, local_header.filename_len) + - @as(u64, local_header.extra_len); - }; - - if (isBadFilename(filename)) - return error.ZipBadFilename; - - if (options.allow_backslashes) { - std.mem.replaceScalar(u8, filename, '\\', '/'); - } else { - if (std.mem.indexOfScalar(u8, filename, '\\')) |_| - return error.ZipFilenameHasBackslash; - } - - // All entries that end in '/' are directories - if (filename[filename.len - 1] == '/') { - if (self.uncompressed_size != 0) - return error.ZipBadDirectorySize; - try dest.makePath(filename[0 .. filename.len - 1]); - return std.hash.Crc32.hash(&.{}); - } - - const out_file = blk: { - if (std.fs.path.dirname(filename)) |dirname| { - var parent_dir = try dest.makeOpenPath(dirname, .{}); - defer parent_dir.close(); - - const basename = std.fs.path.basename(filename); - break :blk try parent_dir.createFile(basename, .{ .exclusive = true }); - } - break :blk try dest.createFile(filename, .{ .exclusive = true }); - }; - defer out_file.close(); - const local_data_file_offset: u64 = - @as(u64, self.file_offset) + - @as(u64, @sizeOf(LocalFileHeader)) + - local_data_header_offset; - try stream.seekTo(local_data_file_offset); - var compressed_remaining: u64 = self.compressed_size; - const crc = try decompress( - self.compression_method, - self.uncompressed_size, - stream.context.reader(), - out_file.writer(), - &compressed_remaining, - ); - if (compressed_remaining != 0) return error.ZipDecompressTruncated; - return crc; - } + var result: Iterator = .{ + .input = input, + .cd_record_count = end_record.record_count_total, + .cd_zip_offset = end_record.central_directory_offset, + .cd_size = end_record.central_directory_size, }; + if (!end_record.need_zip64()) return result; + + const locator_end_offset: u64 = @as(u64, end_record.comment_len) + @sizeOf(EndRecord) + @sizeOf(EndLocator64); + const stream_len = try input.getSize(); + + if (locator_end_offset > stream_len) + return error.ZipTruncated; + try input.seekTo(stream_len - locator_end_offset); + var br = input.interface().unbuffered(); + const locator = br.readStructEndian(EndLocator64, .little) catch |err| switch (err) { + error.ReadFailed => return input.err.?, + }; + if (!std.mem.eql(u8, &locator.signature, &end_locator64_sig)) + return error.ZipBadLocatorSig; + if (locator.zip64_disk_count != 0) + return error.ZipUnsupportedZip64DiskCount; + if (locator.total_disk_count != 1) + return error.ZipMultiDiskUnsupported; + + try input.seekTo(locator.record_file_offset); + + const record64 = br.readStructEndian(EndRecord64, .little) catch |err| switch (err) { + error.ReadFailed => return input.err.?, + }; + + if (!std.mem.eql(u8, &record64.signature, &end_record64_sig)) + return error.ZipBadEndRecord64Sig; + + if (record64.end_record_size < @sizeOf(EndRecord64) - 12) + return error.ZipEndRecord64SizeTooSmall; + if (record64.end_record_size > @sizeOf(EndRecord64) - 12) + return error.ZipEndRecord64UnhandledExtraData; + + if (record64.version_needed_to_extract > 45) + return error.ZipUnsupportedVersion; + + { + const is_multidisk = record64.disk_number != 0 or + record64.central_directory_disk_number != 0 or + record64.record_count_disk != record64.record_count_total; + if (is_multidisk) + return error.ZipMultiDiskUnsupported; + } + + if (isMaxInt(end_record.record_count_total)) { + result.cd_record_count = record64.record_count_total; + } else if (end_record.record_count_total != record64.record_count_total) + return error.Zip64RecordCountTotalMismatch; + + if (isMaxInt(end_record.central_directory_offset)) { + result.cd_zip_offset = record64.central_directory_offset; + } else if (end_record.central_directory_offset != record64.central_directory_offset) + return error.Zip64CentralDirectoryOffsetMismatch; + + if (isMaxInt(end_record.central_directory_size)) { + result.cd_size = record64.central_directory_size; + } else if (end_record.central_directory_size != record64.central_directory_size) + return error.Zip64CentralDirectorySizeMismatch; + + return result; + } + + pub fn next(self: *Iterator) !?Entry { + if (self.cd_record_index == self.cd_record_count) { + if (self.cd_record_offset != self.cd_size) + return if (self.cd_size > self.cd_record_offset) + error.ZipCdOversized + else + error.ZipCdUndersized; + + return null; + } + + const header_zip_offset = self.cd_zip_offset + self.cd_record_offset; + const input = self.input; + try input.seekTo(header_zip_offset); + var br = input.interface().unbuffered(); + const header = br.readStructEndian(CentralDirectoryFileHeader, .little) catch |err| switch (err) { + error.ReadFailed => return input.err.?, + }; + if (!std.mem.eql(u8, &header.signature, ¢ral_file_header_sig)) + return error.ZipBadCdOffset; + + self.cd_record_index += 1; + self.cd_record_offset += @sizeOf(CentralDirectoryFileHeader) + header.filename_len + header.extra_len + header.comment_len; + + // Note: checking the version_needed_to_extract doesn't seem to be helpful, i.e. the zip file + // at https://github.com/ninja-build/ninja/releases/download/v1.12.0/ninja-linux.zip + // has an undocumented version 788 but extracts just fine. + + if (header.flags.encrypted) + return error.ZipEncryptionUnsupported; + // TODO: check/verify more flags + if (header.disk_number != 0) + return error.ZipMultiDiskUnsupported; + + var extents: FileExtents = .{ + .uncompressed_size = header.uncompressed_size, + .compressed_size = header.compressed_size, + .local_file_header_offset = header.local_file_header_offset, + }; + + if (header.extra_len > 0) { + var extra_buf: [std.math.maxInt(u16)]u8 = undefined; + const extra = extra_buf[0..header.extra_len]; + + try input.seekTo(header_zip_offset + @sizeOf(CentralDirectoryFileHeader) + header.filename_len); + br.readSlice(extra) catch |err| switch (err) { + error.ReadFailed => return input.err.?, + }; + + var extra_offset: usize = 0; + while (extra_offset + 4 <= extra.len) { + const header_id = std.mem.readInt(u16, extra[extra_offset..][0..2], .little); + const data_size = std.mem.readInt(u16, extra[extra_offset..][2..4], .little); + const end = extra_offset + 4 + data_size; + if (end > extra.len) + return error.ZipBadExtraFieldSize; + const data = extra[extra_offset + 4 .. end]; + switch (@as(ExtraHeader, @enumFromInt(header_id))) { + .zip64_info => try readZip64FileExtents(CentralDirectoryFileHeader, header, &extents, data), + else => {}, // ignore + } + extra_offset = end; + } + } + + return .{ + .version_needed_to_extract = header.version_needed_to_extract, + .flags = header.flags, + .compression_method = header.compression_method, + .last_modification_time = header.last_modification_time, + .last_modification_date = header.last_modification_date, + .header_zip_offset = header_zip_offset, + .crc32 = header.crc32, + .filename_len = header.filename_len, + .compressed_size = extents.compressed_size, + .uncompressed_size = extents.uncompressed_size, + .file_offset = extents.local_file_header_offset, + }; + } + + pub const Entry = struct { + version_needed_to_extract: u16, + flags: GeneralPurposeFlags, + compression_method: CompressionMethod, + last_modification_time: u16, + last_modification_date: u16, + header_zip_offset: u64, + crc32: u32, + filename_len: u32, + compressed_size: u64, + uncompressed_size: u64, + file_offset: u64, + + pub fn extract( + self: Entry, + stream: *File.Reader, + options: ExtractOptions, + filename_buf: []u8, + dest: std.fs.Dir, + ) !u32 { + if (filename_buf.len < self.filename_len) + return error.ZipInsufficientBuffer; + const filename = filename_buf[0..self.filename_len]; + + try stream.seekTo(self.header_zip_offset + @sizeOf(CentralDirectoryFileHeader)); + + { + const len = try stream.context.reader().readAll(filename); + if (len != filename.len) + return error.ZipBadFileOffset; + } + + const local_data_header_offset: u64 = local_data_header_offset: { + const local_header = blk: { + try stream.seekTo(self.file_offset); + break :blk try stream.context.reader().readStructEndian(LocalFileHeader, .little); + }; + if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig)) + return error.ZipBadFileOffset; + if (local_header.version_needed_to_extract != self.version_needed_to_extract) + return error.ZipMismatchVersionNeeded; + if (local_header.last_modification_time != self.last_modification_time) + return error.ZipMismatchModTime; + if (local_header.last_modification_date != self.last_modification_date) + return error.ZipMismatchModDate; + + if (@as(u16, @bitCast(local_header.flags)) != @as(u16, @bitCast(self.flags))) + return error.ZipMismatchFlags; + if (local_header.crc32 != 0 and local_header.crc32 != self.crc32) + return error.ZipMismatchCrc32; + var extents: FileExtents = .{ + .uncompressed_size = local_header.uncompressed_size, + .compressed_size = local_header.compressed_size, + .local_file_header_offset = 0, + }; + if (local_header.extra_len > 0) { + var extra_buf: [std.math.maxInt(u16)]u8 = undefined; + const extra = extra_buf[0..local_header.extra_len]; + + { + try stream.seekTo(self.file_offset + @sizeOf(LocalFileHeader) + local_header.filename_len); + const len = try stream.context.reader().readAll(extra); + if (len != extra.len) + return error.ZipTruncated; + } + + var extra_offset: usize = 0; + while (extra_offset + 4 <= local_header.extra_len) { + const header_id = std.mem.readInt(u16, extra[extra_offset..][0..2], .little); + const data_size = std.mem.readInt(u16, extra[extra_offset..][2..4], .little); + const end = extra_offset + 4 + data_size; + if (end > local_header.extra_len) + return error.ZipBadExtraFieldSize; + const data = extra[extra_offset + 4 .. end]; + switch (@as(ExtraHeader, @enumFromInt(header_id))) { + .zip64_info => try readZip64FileExtents(LocalFileHeader, local_header, &extents, data), + else => {}, // ignore + } + extra_offset = end; + } + } + + if (extents.compressed_size != 0 and + extents.compressed_size != self.compressed_size) + return error.ZipMismatchCompLen; + if (extents.uncompressed_size != 0 and + extents.uncompressed_size != self.uncompressed_size) + return error.ZipMismatchUncompLen; + + if (local_header.filename_len != self.filename_len) + return error.ZipMismatchFilenameLen; + + break :local_data_header_offset @as(u64, local_header.filename_len) + + @as(u64, local_header.extra_len); + }; + + if (isBadFilename(filename)) + return error.ZipBadFilename; + + if (options.allow_backslashes) { + std.mem.replaceScalar(u8, filename, '\\', '/'); + } else { + if (std.mem.indexOfScalar(u8, filename, '\\')) |_| + return error.ZipFilenameHasBackslash; + } + + // All entries that end in '/' are directories + if (filename[filename.len - 1] == '/') { + if (self.uncompressed_size != 0) + return error.ZipBadDirectorySize; + try dest.makePath(filename[0 .. filename.len - 1]); + return std.hash.Crc32.hash(&.{}); + } + + const out_file = blk: { + if (std.fs.path.dirname(filename)) |dirname| { + var parent_dir = try dest.makeOpenPath(dirname, .{}); + defer parent_dir.close(); + + const basename = std.fs.path.basename(filename); + break :blk try parent_dir.createFile(basename, .{ .exclusive = true }); + } + break :blk try dest.createFile(filename, .{ .exclusive = true }); + }; + defer out_file.close(); + const local_data_file_offset: u64 = + @as(u64, self.file_offset) + + @as(u64, @sizeOf(LocalFileHeader)) + + local_data_header_offset; + try stream.seekTo(local_data_file_offset); + var compressed_remaining: u64 = self.compressed_size; + const crc = try decompress( + self.compression_method, + self.uncompressed_size, + stream.context.reader(), + out_file.writer(), + &compressed_remaining, + ); + if (compressed_remaining != 0) return error.ZipDecompressTruncated; + return crc; + } }; -} +}; // returns true if `filename` starts with `root` followed by a forward slash fn filenameInRoot(filename: []const u8, root: []const u8) bool { @@ -609,17 +623,13 @@ pub const ExtractOptions = struct { diagnostics: ?*Diagnostics = null, }; -/// Extract the zipped files inside `seekable_stream` to the given `dest` directory. -/// Note that `seekable_stream` must be an instance of `std.io.SeekableStream` and -/// its context must also have a `.reader()` method that returns an instance of -/// `std.io.Reader`. -pub fn extract(dest: std.fs.Dir, seekable_stream: anytype, options: ExtractOptions) !void { - const SeekableStream = @TypeOf(seekable_stream); - var iter = try Iterator(SeekableStream).init(seekable_stream); +/// Extract the zipped files to the given `dest` directory. +pub fn extract(dest: std.fs.Dir, fr: *File.Reader, options: ExtractOptions) !void { + var iter = try Iterator.init(fr); var filename_buf: [std.fs.max_path_bytes]u8 = undefined; while (try iter.next()) |entry| { - const crc32 = try entry.extract(seekable_stream, options, &filename_buf, dest); + const crc32 = try entry.extract(fr, options, &filename_buf, dest); if (crc32 != entry.crc32) return error.ZipCrcMismatch; if (options.diagnostics) |d| { @@ -628,173 +638,6 @@ pub fn extract(dest: std.fs.Dir, seekable_stream: anytype, options: ExtractOptio } } -fn testZip(options: ExtractOptions, comptime files: []const File, write_opt: testutil.WriteZipOptions) !void { - var store: [files.len]FileStore = undefined; - try testZipWithStore(options, files, write_opt, &store); -} -fn testZipWithStore( - options: ExtractOptions, - test_files: []const File, - write_opt: testutil.WriteZipOptions, - store: []FileStore, -) !void { - var zip_buf: [4096]u8 = undefined; - var fbs = try testutil.makeZipWithStore(&zip_buf, test_files, write_opt, store); - - var tmp = testing.tmpDir(.{ .no_follow = true }); - defer tmp.cleanup(); - try extract(tmp.dir, fbs.seekableStream(), options); - try testutil.expectFiles(test_files, tmp.dir, .{}); -} -fn testZipError(expected_error: anyerror, file: File, options: ExtractOptions) !void { - var zip_buf: [4096]u8 = undefined; - var store: [1]FileStore = undefined; - var fbs = try testutil.makeZipWithStore(&zip_buf, &[_]File{file}, .{}, &store); - var tmp = testing.tmpDir(.{ .no_follow = true }); - defer tmp.cleanup(); - try testing.expectError(expected_error, extract(tmp.dir, fbs.seekableStream(), options)); -} - -test "zip one file" { - try testZip(.{}, &[_]File{ - .{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store }, - }, .{}); -} -test "zip multiple files" { - try testZip(.{ .allow_backslashes = true }, &[_]File{ - .{ .name = "foo", .content = "a foo file\n", .compression = .store }, - .{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store }, - .{ .name = "subdir\\whoa", .content = "you can do backslashes", .compression = .store }, - .{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store }, - }, .{}); -} -test "zip deflated" { - try testZip(.{}, &[_]File{ - .{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate }, - // TODO: re-enable this if/when we add support for deflate64 - //.{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 }, - .{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store }, - }, .{}); -} -test "zip verify filenames" { - // no empty filenames - try testZipError(error.ZipBadFilename, .{ .name = "", .content = "", .compression = .store }, .{}); - // no absolute paths - try testZipError(error.ZipBadFilename, .{ .name = "/", .content = "", .compression = .store }, .{}); - try testZipError(error.ZipBadFilename, .{ .name = "/foo", .content = "", .compression = .store }, .{}); - try testZipError(error.ZipBadFilename, .{ .name = "/foo/bar", .content = "", .compression = .store }, .{}); - // no '..' components - try testZipError(error.ZipBadFilename, .{ .name = "..", .content = "", .compression = .store }, .{}); - try testZipError(error.ZipBadFilename, .{ .name = "foo/..", .content = "", .compression = .store }, .{}); - try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/..", .content = "", .compression = .store }, .{}); - try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/../", .content = "", .compression = .store }, .{}); - // no backslashes - try testZipError(error.ZipFilenameHasBackslash, .{ .name = "foo\\bar", .content = "", .compression = .store }, .{}); -} - -test "zip64" { - const test_files = [_]File{ - .{ .name = "fram", .content = "fram foo fro fraba", .compression = .store }, - .{ .name = "subdir/barro", .content = "aljdk;jal;jfd;lajkf", .compression = .store }, - }; - - try testZip(.{}, &test_files, .{ - .end = .{ - .zip64 = .{}, - .record_count_disk = std.math.maxInt(u16), // trigger zip64 - }, - }); - try testZip(.{}, &test_files, .{ - .end = .{ - .zip64 = .{}, - .record_count_total = std.math.maxInt(u16), // trigger zip64 - }, - }); - try testZip(.{}, &test_files, .{ - .end = .{ - .zip64 = .{}, - .record_count_disk = std.math.maxInt(u16), // trigger zip64 - .record_count_total = std.math.maxInt(u16), // trigger zip64 - }, - }); - try testZip(.{}, &test_files, .{ - .end = .{ - .zip64 = .{}, - .central_directory_size = std.math.maxInt(u32), // trigger zip64 - }, - }); - try testZip(.{}, &test_files, .{ - .end = .{ - .zip64 = .{}, - .central_directory_offset = std.math.maxInt(u32), // trigger zip64 - }, - }); - try testZip(.{}, &test_files, .{ - .end = .{ - .zip64 = .{}, - .central_directory_offset = std.math.maxInt(u32), // trigger zip64 - }, - .local_header = .{ - .zip64 = .{ // trigger local header zip64 - .data_size = 16, - }, - .compressed_size = std.math.maxInt(u32), - .uncompressed_size = std.math.maxInt(u32), - .extra_len = 20, - }, - }); -} - -test "bad zip files" { - var tmp = testing.tmpDir(.{ .no_follow = true }); - defer tmp.cleanup(); - var zip_buf: [4096]u8 = undefined; - - const file_a = [_]File{.{ .name = "a", .content = "", .compression = .store }}; - - { - var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .sig = [_]u8{ 1, 2, 3, 4 } } }); - try testing.expectError(error.ZipNoEndRecord, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .comment_len = 1 } }); - try testing.expectError(error.ZipNoEndRecord, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .comment = "a", .comment_len = 0 } }); - try testing.expectError(error.ZipNoEndRecord, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .disk_number = 1 } }); - try testing.expectError(error.ZipMultiDiskUnsupported, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_disk_number = 1 } }); - try testing.expectError(error.ZipMultiDiskUnsupported, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .record_count_disk = 1 } }); - try testing.expectError(error.ZipDiskRecordCountTooLarge, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_size = 1 } }); - try testing.expectError(error.ZipCdOversized, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_size = 0 } }); - try testing.expectError(error.ZipCdUndersized, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_offset = 0 } }); - try testing.expectError(error.ZipBadCdOffset, extract(tmp.dir, fbs.seekableStream(), .{})); - } - { - var fbs = try testutil.makeZip(&zip_buf, &file_a, .{ - .end = .{ - .zip64 = .{ .locator_sig = [_]u8{ 1, 2, 3, 4 } }, - .central_directory_size = std.math.maxInt(u32), // trigger 64 - }, - }); - try testing.expectError(error.ZipBadLocatorSig, extract(tmp.dir, fbs.seekableStream(), .{})); - } +test { + _ = @import("zip/test.zig"); } diff --git a/lib/std/zip/test.zig b/lib/std/zip/test.zig index ce1c6198e7..8db603651d 100644 --- a/lib/std/zip/test.zig +++ b/lib/std/zip/test.zig @@ -2,14 +2,15 @@ const std = @import("std"); const testing = std.testing; const zip = @import("../zip.zig"); const maxInt = std.math.maxInt; +const assert = std.debug.assert; -pub const File = struct { +const File = struct { name: []const u8, content: []const u8, compression: zip.CompressionMethod, }; -pub fn expectFiles( +fn expectFiles( test_files: []const File, dir: std.fs.Dir, opt: struct { @@ -40,7 +41,7 @@ pub fn expectFiles( // Used to store any data from writing a file to the zip archive that's needed // when writing the corresponding central directory record. -pub const FileStore = struct { +const FileStore = struct { compression: zip.CompressionMethod, file_offset: u64, crc32: u32, @@ -48,40 +49,40 @@ pub const FileStore = struct { uncompressed_size: usize, }; -pub fn makeZip( - buf: []u8, - comptime files: []const File, - options: WriteZipOptions, -) !std.io.FixedBufferStream([]u8) { - var store: [files.len]FileStore = undefined; - return try makeZipWithStore(buf, files, options, &store); +fn makeZip(buf: []u8, files: []const File, options: WriteZipOptions) !std.io.BufferedReader { + const store = try std.testing.allocator.alloc(FileStore, files.len); + defer std.testing.allocator.free(store); + return makeZipWithStore(buf, files, options, store); } -pub fn makeZipWithStore( +fn makeZipWithStore( buf: []u8, files: []const File, options: WriteZipOptions, store: []FileStore, -) !std.io.FixedBufferStream([]u8) { - var fbs = std.io.fixedBufferStream(buf); - try writeZip(fbs.writer(), files, store, options); - return std.io.fixedBufferStream(buf[0..fbs.pos]); +) !std.io.BufferedReader { + var out: std.io.BufferedWriter = undefined; + out.initFixed(buf); + try writeZip(&out, files, store, options); + var result: std.io.BufferedReader = undefined; + result.initFixed(buf[0..out.end]); + return result; } -pub const WriteZipOptions = struct { +const WriteZipOptions = struct { end: ?EndRecordOptions = null, local_header: ?LocalHeaderOptions = null, }; -pub const LocalHeaderOptions = struct { +const LocalHeaderOptions = struct { zip64: ?LocalHeaderZip64Options = null, compressed_size: ?u32 = null, uncompressed_size: ?u32 = null, extra_len: ?u16 = null, }; -pub const LocalHeaderZip64Options = struct { +const LocalHeaderZip64Options = struct { data_size: ?u16 = null, }; -pub const EndRecordOptions = struct { +const EndRecordOptions = struct { zip64: ?Zip64Options = null, sig: ?[4]u8 = null, disk_number: ?u16 = null, @@ -93,7 +94,7 @@ pub const EndRecordOptions = struct { comment_len: ?u16 = null, comment: ?[]const u8 = null, }; -pub const Zip64Options = struct { +const Zip64Options = struct { locator_sig: ?[4]u8 = null, locator_zip64_disk_count: ?u32 = null, locator_record_file_offset: ?u64 = null, @@ -102,7 +103,7 @@ pub const Zip64Options = struct { central_directory_size: ?u64 = null, }; -pub fn writeZip( +fn writeZip( writer: *std.io.BufferedWriter, files: []const File, store: []FileStore, @@ -128,21 +129,19 @@ pub fn writeZip( /// Provides methods to format and write the contents of a zip archive /// to the underlying Writer. -pub const Zipper = struct { +const Zipper = struct { writer: *std.io.BufferedWriter, - bytes_written: u64, + init_count: u64, central_count: u64 = 0, first_central_offset: ?u64 = null, last_central_limit: ?u64 = null, - const Self = @This(); - - pub fn init(writer: *std.io.BufferedWriter) Zipper { - return .{ .writer = writer, .bytes_written = 0 }; + fn init(writer: *std.io.BufferedWriter) Zipper { + return .{ .writer = writer, .init_count = writer.count }; } - pub fn writeFile( - self: *Self, + fn writeFile( + self: *Zipper, opt: struct { name: []const u8, content: []const u8, @@ -152,7 +151,7 @@ pub const Zipper = struct { ) !FileStore { const writer = self.writer; - const file_offset: u64 = @intCast(self.bytes_written); + const file_offset: u64 = writer.count - self.init_count; const crc32 = std.hash.Crc32.hash(opt.content); const header_options = opt.write_options.local_header; @@ -178,32 +177,33 @@ pub const Zipper = struct { .filename_len = @intCast(opt.name.len), .extra_len = extra_len, }; - self.bytes_written += try writer.writeStructEndian(hdr, .little); + try writer.writeStructEndian(hdr, .little); } - self.bytes_written += try writer.writeAll(opt.name); + try writer.writeAll(opt.name); if (header_options) |hdr| { if (hdr.zip64) |options| { - self.bytes_written += try writer.writeInt(u16, 0x0001, .little); + try writer.writeInt(u16, 0x0001, .little); const data_size = if (options.data_size) |size| size else 8; - self.bytes_written += try writer.writeInt(u16, data_size, .little); - self.bytes_written += try writer.writeInt(u64, 0, .little); - self.bytes_written += try writer.writeInt(u64, @intCast(opt.content.len), .little); + try writer.writeInt(u16, data_size, .little); + try writer.writeInt(u64, 0, .little); + try writer.writeInt(u64, @intCast(opt.content.len), .little); } } var compressed_size: u32 = undefined; switch (opt.compression) { .store => { - self.bytes_written += try writer.writeAll(opt.content); + try writer.writeAll(opt.content); compressed_size = @intCast(opt.content.len); }, .deflate => { - const offset = self.bytes_written; - var fbs = std.io.fixedBufferStream(opt.content); - self.bytes_written += try std.compress.flate.deflate.compress(.raw, fbs.reader(), writer, .{}); - std.debug.assert(fbs.pos == opt.content.len); - compressed_size = @intCast(self.bytes_written - offset); + const offset = writer.count; + var br: std.io.BufferedReader = undefined; + br.initFixed(@constCast(opt.content)); + try std.compress.flate.deflate.compress(.raw, &br, writer, .{}); + assert(br.seek == opt.content.len); + compressed_size = @intCast(writer.count - offset); }, else => unreachable, } @@ -216,8 +216,8 @@ pub const Zipper = struct { }; } - pub fn writeCentralRecord( - self: *Self, + fn writeCentralRecord( + self: *Zipper, store: FileStore, opt: struct { name: []const u8, @@ -225,7 +225,7 @@ pub const Zipper = struct { }, ) !void { if (self.first_central_offset == null) { - self.first_central_offset = self.bytes_written; + self.first_central_offset = self.writer.count - self.init_count; } self.central_count += 1; @@ -248,12 +248,12 @@ pub const Zipper = struct { .external_file_attributes = 0, .local_file_header_offset = @intCast(store.file_offset), }; - self.bytes_written += try self.writer.writeStructEndian(hdr, .little); - self.bytes_written += try self.writer.writeAll(opt.name); - self.last_central_limit = self.bytes_written; + try self.writer.writeStructEndian(hdr, .little); + try self.writer.writeAll(opt.name); + self.last_central_limit = self.writer.count - self.init_count; } - pub fn writeEndRecord(self: *Self, opt: EndRecordOptions) !void { + fn writeEndRecord(self: *Zipper, opt: EndRecordOptions) !void { const cd_offset = self.first_central_offset orelse 0; const cd_end = self.last_central_limit orelse 0; @@ -271,14 +271,14 @@ pub const Zipper = struct { .central_directory_size = @intCast(cd_end - cd_offset), .central_directory_offset = @intCast(cd_offset), }; - self.bytes_written += try self.writer.writeStructEndian(fixed, .little); + try self.writer.writeStructEndian(fixed, .little); const locator: zip.EndLocator64 = .{ .signature = if (zip64.locator_sig) |s| s else zip.end_locator64_sig, .zip64_disk_count = if (zip64.locator_zip64_disk_count) |c| c else 0, .record_file_offset = if (zip64.locator_record_file_offset) |o| o else @intCast(end64_off), .total_disk_count = if (zip64.locator_total_disk_count) |c| c else 1, }; - self.bytes_written += try self.writer.writeStructEndian(locator, .little); + try self.writer.writeStructEndian(locator, .little); } const hdr: zip.EndRecord = .{ .signature = if (opt.sig) |s| s else zip.end_record_sig, @@ -290,8 +290,179 @@ pub const Zipper = struct { .central_directory_offset = if (opt.central_directory_offset) |o| o else @intCast(cd_offset), .comment_len = if (opt.comment_len) |l| l else (if (opt.comment) |c| @as(u16, @intCast(c.len)) else 0), }; - self.bytes_written += try self.writer.writeStructEndian(hdr, .little); + try self.writer.writeStructEndian(hdr, .little); if (opt.comment) |c| - self.bytes_written += try self.writer.writeAll(c); + try self.writer.writeAll(c); } }; + +fn testZip(options: zip.ExtractOptions, comptime files: []const File, write_opt: WriteZipOptions) !void { + var store: [files.len]FileStore = undefined; + try testZipWithStore(options, files, write_opt, &store); +} +fn testZipWithStore( + options: zip.ExtractOptions, + test_files: []const File, + write_opt: WriteZipOptions, + store: []FileStore, +) !void { + var zip_buf: [4096]u8 = undefined; + var fbs = try makeZipWithStore(&zip_buf, test_files, write_opt, store); + + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + try zip.extract(tmp.dir, fbs.seekableStream(), options); + try expectFiles(test_files, tmp.dir, .{}); +} +fn testZipError(expected_error: anyerror, file: File, options: zip.ExtractOptions) !void { + var zip_buf: [4096]u8 = undefined; + var store: [1]FileStore = undefined; + var fbs = try makeZipWithStore(&zip_buf, &[_]File{file}, .{}, &store); + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + try testing.expectError(expected_error, zip.extract(tmp.dir, fbs.seekableStream(), options)); +} + +test "zip one file" { + try testZip(.{}, &[_]File{ + .{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store }, + }, .{}); +} +test "zip multiple files" { + try testZip(.{ .allow_backslashes = true }, &[_]File{ + .{ .name = "foo", .content = "a foo file\n", .compression = .store }, + .{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store }, + .{ .name = "subdir\\whoa", .content = "you can do backslashes", .compression = .store }, + .{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store }, + }, .{}); +} +test "zip deflated" { + try testZip(.{}, &[_]File{ + .{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate }, + // TODO: re-enable this if/when we add support for deflate64 + //.{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 }, + .{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store }, + }, .{}); +} +test "zip verify filenames" { + // no empty filenames + try testZipError(error.ZipBadFilename, .{ .name = "", .content = "", .compression = .store }, .{}); + // no absolute paths + try testZipError(error.ZipBadFilename, .{ .name = "/", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "/foo", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "/foo/bar", .content = "", .compression = .store }, .{}); + // no '..' components + try testZipError(error.ZipBadFilename, .{ .name = "..", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "foo/..", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/..", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/../", .content = "", .compression = .store }, .{}); + // no backslashes + try testZipError(error.ZipFilenameHasBackslash, .{ .name = "foo\\bar", .content = "", .compression = .store }, .{}); +} + +test "zip64" { + const test_files = [_]File{ + .{ .name = "fram", .content = "fram foo fro fraba", .compression = .store }, + .{ .name = "subdir/barro", .content = "aljdk;jal;jfd;lajkf", .compression = .store }, + }; + + try testZip(.{}, &test_files, .{ + .end = .{ + .zip64 = .{}, + .record_count_disk = std.math.maxInt(u16), // trigger zip64 + }, + }); + try testZip(.{}, &test_files, .{ + .end = .{ + .zip64 = .{}, + .record_count_total = std.math.maxInt(u16), // trigger zip64 + }, + }); + try testZip(.{}, &test_files, .{ + .end = .{ + .zip64 = .{}, + .record_count_disk = std.math.maxInt(u16), // trigger zip64 + .record_count_total = std.math.maxInt(u16), // trigger zip64 + }, + }); + try testZip(.{}, &test_files, .{ + .end = .{ + .zip64 = .{}, + .central_directory_size = std.math.maxInt(u32), // trigger zip64 + }, + }); + try testZip(.{}, &test_files, .{ + .end = .{ + .zip64 = .{}, + .central_directory_offset = std.math.maxInt(u32), // trigger zip64 + }, + }); + try testZip(.{}, &test_files, .{ + .end = .{ + .zip64 = .{}, + .central_directory_offset = std.math.maxInt(u32), // trigger zip64 + }, + .local_header = .{ + .zip64 = .{ // trigger local header zip64 + .data_size = 16, + }, + .compressed_size = std.math.maxInt(u32), + .uncompressed_size = std.math.maxInt(u32), + .extra_len = 20, + }, + }); +} + +test "bad zip files" { + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + var zip_buf: [4096]u8 = undefined; + + const file_a = [_]File{.{ .name = "a", .content = "", .compression = .store }}; + + { + var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .sig = [_]u8{ 1, 2, 3, 4 } } }); + try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .comment_len = 1 } }); + try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .comment = "a", .comment_len = 0 } }); + try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .disk_number = 1 } }); + try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_disk_number = 1 } }); + try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .record_count_disk = 1 } }); + try testing.expectError(error.ZipDiskRecordCountTooLarge, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_size = 1 } }); + try testing.expectError(error.ZipCdOversized, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_size = 0 } }); + try testing.expectError(error.ZipCdUndersized, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_offset = 0 } }); + try testing.expectError(error.ZipBadCdOffset, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } + { + var fbs = try makeZip(&zip_buf, &file_a, .{ + .end = .{ + .zip64 = .{ .locator_sig = [_]u8{ 1, 2, 3, 4 } }, + .central_directory_size = std.math.maxInt(u32), // trigger 64 + }, + }); + try testing.expectError(error.ZipBadLocatorSig, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + } +} diff --git a/lib/std/zon/parse.zig b/lib/std/zon/parse.zig index 9a07726b2d..70c756bb9b 100644 --- a/lib/std/zon/parse.zig +++ b/lib/std/zon/parse.zig @@ -440,11 +440,9 @@ const Parser = struct { }; } - fn parseExprInner( - self: *@This(), - T: type, - node: Zoir.Node.Index, - ) error{ ParseZon, OutOfMemory, WrongType }!T { + const InnerError = error{ ParseZon, OutOfMemory, WrongType }; + + fn parseExprInner(self: *@This(), T: type, node: Zoir.Node.Index) InnerError!T { if (T == Zoir.Node.Index) { return node; } @@ -624,7 +622,7 @@ const Parser = struct { } } - fn parseSlicePointer(self: *@This(), T: type, node: Zoir.Node.Index) !T { + fn parseSlicePointer(self: *@This(), T: type, node: Zoir.Node.Index) InnerError!T { switch (node.get(self.zoir)) { .string_literal => return self.parseString(T, node), .array_literal => |nodes| return self.parseSlice(T, nodes), @@ -633,7 +631,7 @@ const Parser = struct { } } - fn parseString(self: *@This(), T: type, node: Zoir.Node.Index) !T { + fn parseString(self: *@This(), T: type, node: Zoir.Node.Index) InnerError!T { const ast_node = node.getAstNode(self.zoir); const pointer = @typeInfo(T).pointer; var size_hint = ZonGen.strLitSizeHint(self.ast, ast_node); @@ -643,7 +641,10 @@ const Parser = struct { var aw: std.io.AllocatingWriter = undefined; try aw.initCapacity(gpa, size_hint); defer aw.deinit(); - switch (try ZonGen.parseStrLit(self.ast, ast_node, &aw.buffered_writer)) { + const parsed = ZonGen.parseStrLit(self.ast, ast_node, &aw.buffered_writer) catch |err| switch (err) { + error.WriteFailed => return error.OutOfMemory, + }; + switch (parsed) { .success => {}, .failure => |err| { const token = self.ast.nodeMainToken(ast_node); @@ -662,9 +663,9 @@ const Parser = struct { } if (pointer.sentinel() != null) { - return aw.toOwnedSliceSentinel(gpa, 0); + return aw.toOwnedSliceSentinel(0); } else { - return aw.toOwnedSlice(gpa); + return aw.toOwnedSlice(); } } diff --git a/lib/std/zon/stringify.zig b/lib/std/zon/stringify.zig index 08e57bc56d..434a33eee7 100644 --- a/lib/std/zon/stringify.zig +++ b/lib/std/zon/stringify.zig @@ -22,6 +22,7 @@ const std = @import("std"); const assert = std.debug.assert; +const BufferedWriter = std.io.BufferedWriter; /// Options for `serialize`. pub const SerializeOptions = struct { @@ -40,7 +41,7 @@ pub const SerializeOptions = struct { /// Serialize the given value as ZON. /// /// It is asserted at comptime that `@TypeOf(val)` is not a recursive type. -pub fn serialize(val: anytype, options: SerializeOptions, writer: *std.io.BufferedWriter) std.io.Writer.Error!void { +pub fn serialize(val: anytype, options: SerializeOptions, writer: *BufferedWriter) std.io.Writer.Error!void { var s: Serializer = .{ .writer = writer, .options = .{ .whitespace = options.whitespace }, @@ -59,9 +60,9 @@ pub fn serialize(val: anytype, options: SerializeOptions, writer: *std.io.Buffer pub fn serializeMaxDepth( val: anytype, options: SerializeOptions, - writer: *std.io.BufferedWriter, + writer: *BufferedWriter, depth: usize, -) std.io.Writer.Error!void { +) Serializer.DepthError!void { var s: Serializer = .{ .writer = writer, .options = .{ .whitespace = options.whitespace }, @@ -79,8 +80,8 @@ pub fn serializeMaxDepth( pub fn serializeArbitraryDepth( val: anytype, options: SerializeOptions, - writer: *std.io.BufferedWriter, -) std.io.Writer.Error!void { + writer: *BufferedWriter, +) Serializer.Error!void { var s: Serializer = .{ .writer = writer, .options = .{ .whitespace = options.whitespace }, @@ -436,9 +437,10 @@ pub const SerializeContainerOptions = struct { pub const Serializer = struct { options: Options = .{}, indent_level: u8 = 0, - writer: *std.io.BufferedWriter, + writer: *BufferedWriter, pub const Error = std.io.Writer.Error; + pub const DepthError = Error || error{ExceededMaxDepth}; pub const Options = struct { /// If false, only syntactically necessary whitespace is emitted. @@ -453,7 +455,7 @@ pub const Serializer = struct { /// Serialize a value, similar to `serializeMaxDepth`. /// Can return `error.ExceededMaxDepth`. - pub fn valueMaxDepth(self: *Serializer, val: anytype, options: ValueOptions, depth: usize) Error!void { + pub fn valueMaxDepth(self: *Serializer, val: anytype, options: ValueOptions, depth: usize) DepthError!void { try checkValueDepth(val, depth); return self.valueArbitraryDepth(val, options); } @@ -618,13 +620,12 @@ pub const Serializer = struct { try self.writer.print(".{fp_}", .{std.zig.fmtId(name)}); } + pub const CodePointError = Error || error{InvalidCodepoint}; + /// Serialize `val` as a Unicode codepoint. /// /// Returns `error.InvalidCodepoint` if `val` is not a valid Unicode codepoint. - pub fn codePoint( - self: *Serializer, - val: u21, - ) Error!void { + pub fn codePoint(self: *Serializer, val: u21) CodePointError!void { var buf: [8]u8 = undefined; const len = std.unicode.utf8Encode(val, &buf) catch return error.InvalidCodepoint; const str = buf[0..len]; @@ -647,7 +648,7 @@ pub const Serializer = struct { val: anytype, options: ValueOptions, depth: usize, - ) Error!void { + ) DepthError!void { try checkValueDepth(val, depth); try self.tupleArbitraryDepth(val, options); } @@ -697,6 +698,8 @@ pub const Serializer = struct { top_level: bool = false, }; + pub const MultilineStringError = Error || error{InnerCarriageReturn}; + /// Like `value`, but always serializes to a multiline string literal. /// /// Returns `error.InnerCarriageReturn` if `val` contains a CR not followed by a newline, @@ -705,7 +708,7 @@ pub const Serializer = struct { self: *Serializer, val: []const u8, options: MultilineStringOptions, - ) Error!void { + ) MultilineStringError!void { // Make sure the string does not contain any carriage returns not followed by a newline var i: usize = 0; while (i < val.len) : (i += 1) { @@ -818,7 +821,7 @@ pub const Serializer = struct { val: anytype, options: ValueOptions, depth: usize, - ) Error!void { + ) DepthError!void { try self.container.fieldMaxDepth(null, val, options, depth); } @@ -893,7 +896,7 @@ pub const Serializer = struct { val: anytype, options: ValueOptions, depth: usize, - ) Error!void { + ) DepthError!void { try self.container.fieldMaxDepth(name, val, options, depth); } @@ -1012,7 +1015,7 @@ pub const Serializer = struct { val: anytype, options: ValueOptions, depth: usize, - ) Error!void { + ) DepthError!void { try checkValueDepth(val, depth); try self.fieldArbitraryDepth(name, val, options); } @@ -1037,13 +1040,13 @@ pub const Serializer = struct { }; test Serializer { - var s: Serializer = .{ - .writer = std.io.null_writer, - }; + var null_writer: std.io.Writer.Null = undefined; + var bw = null_writer.writer().unbuffered(); + var s: Serializer = .{ .writer = &bw }; var vec2 = try s.beginStruct(.{}); try vec2.field("x", 1.5, .{}); - try vec2.fieldPrefix(); - try s.value(2.5); + try vec2.fieldPrefix("prefix"); + try s.value(2.5, .{}); try vec2.end(); } @@ -1053,7 +1056,8 @@ fn expectSerializeEqual( options: SerializeOptions, ) !void { var aw: std.io.AllocatingWriter = undefined; - const bw = aw.init(std.testing.allocator); + aw.init(std.testing.allocator); + const bw = &aw.buffered_writer; defer aw.deinit(); try serialize(value, options, bw); @@ -1155,7 +1159,8 @@ test "std.zon stringify whitespace, high level API" { test "std.zon stringify whitespace, low level API" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); for ([2]bool{ true, false }) |whitespace| { @@ -1512,7 +1517,8 @@ test "std.zon stringify whitespace, low level API" { test "std.zon stringify utf8 codepoints" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); // Printable ASCII @@ -1622,7 +1628,8 @@ test "std.zon stringify utf8 codepoints" { test "std.zon stringify strings" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); // Minimal case @@ -1692,7 +1699,8 @@ test "std.zon stringify strings" { test "std.zon stringify multiline strings" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); inline for (.{ true, false }) |whitespace| { @@ -1912,7 +1920,8 @@ test "std.zon stringify skip default fields" { test "std.zon depth limits" { var aw: std.io.AllocatingWriter = undefined; - const bw = aw.init(std.testing.allocator); + aw.init(std.testing.allocator); + const bw = &aw.buffered_writer; defer aw.deinit(); const Recurse = struct { r: []const @This() }; @@ -2173,7 +2182,8 @@ test "std.zon stringify primitives" { test "std.zon stringify ident" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); try expectSerializeEqual(".{ .a = 0 }", .{ .a = 0 }, .{}); @@ -2220,7 +2230,8 @@ test "std.zon stringify ident" { test "std.zon stringify as tuple" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); // Tuples @@ -2241,7 +2252,8 @@ test "std.zon stringify as tuple" { test "std.zon stringify as float" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); // Comptime float @@ -2345,7 +2357,8 @@ test "std.zon pointers" { test "std.zon tuple/struct field" { var aw: std.io.AllocatingWriter = undefined; - var s: Serializer = .{ .writer = aw.init(std.testing.allocator) }; + aw.init(std.testing.allocator); + var s: Serializer = .{ .writer = &aw.buffered_writer }; defer aw.deinit(); // Test on structs