std.zip: Add ZIP64 support for local file header extra field

This PR adds support for handling ZIP64 format in local file headers,
when a zip file contains entries where the compressed or uncompressed
size fields are set to 0xFFFFFFFF, and the extra field contains ZIP64
extended information tag (0x0001)

The code now:

Reads the actual sizes from the ZIP64 extra field data
Validates these sizes against the entry's compressed and uncompressed sizes

Zip file format spec.: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT

This change allows proper extraction of ZIP files that use ZIP64 format in their
local file headers.

Fixes: #22329
This commit is contained in:
axel escalada 2025-01-17 18:55:55 -03:00 committed by GitHub
parent c748eb2416
commit f9a43770c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 108 additions and 24 deletions

View File

@ -215,7 +215,7 @@ const FileExtents = struct {
local_file_header_offset: u64,
};
fn readZip64FileExtents(header: CentralDirectoryFileHeader, extents: *FileExtents, data: []u8) !void {
fn readZip64FileExtents(comptime T: type, header: T, extents: *FileExtents, data: []u8) !void {
var data_offset: usize = 0;
if (isMaxInt(header.uncompressed_size)) {
if (data_offset + 8 > data.len)
@ -229,22 +229,28 @@ fn readZip64FileExtents(header: CentralDirectoryFileHeader, extents: *FileExtent
extents.compressed_size = std.mem.readInt(u64, data[data_offset..][0..8], .little);
data_offset += 8;
}
if (isMaxInt(header.local_file_header_offset)) {
if (data_offset + 8 > data.len)
return error.ZipBadCd64Size;
extents.local_file_header_offset = std.mem.readInt(u64, data[data_offset..][0..8], .little);
data_offset += 8;
switch (T) {
CentralDirectoryFileHeader => {
if (isMaxInt(header.local_file_header_offset)) {
if (data_offset + 8 > data.len)
return error.ZipBadCd64Size;
extents.local_file_header_offset = std.mem.readInt(u64, data[data_offset..][0..8], .little);
data_offset += 8;
}
if (isMaxInt(header.disk_number)) {
if (data_offset + 4 > data.len)
return error.ZipInvalid;
const disk_number = std.mem.readInt(u32, data[data_offset..][0..4], .little);
if (disk_number != 0)
return error.ZipMultiDiskUnsupported;
data_offset += 4;
}
if (data_offset > data.len)
return error.ZipBadCd64Size;
},
else => {},
}
if (isMaxInt(header.disk_number)) {
if (data_offset + 4 > data.len)
return error.ZipInvalid;
const disk_number = std.mem.readInt(u32, data[data_offset..][0..4], .little);
if (disk_number != 0)
return error.ZipMultiDiskUnsupported;
data_offset += 4;
}
if (data_offset > data.len)
return error.ZipBadCd64Size;
}
pub fn Iterator(comptime SeekableStream: type) type {
@ -394,7 +400,7 @@ pub fn Iterator(comptime SeekableStream: type) type {
return error.ZipBadExtraFieldSize;
const data = extra[extra_offset + 4 .. end];
switch (@as(ExtraHeader, @enumFromInt(header_id))) {
.zip64_info => try readZip64FileExtents(header, &extents, data),
.zip64_info => try readZip64FileExtents(CentralDirectoryFileHeader, header, &extents, data),
else => {}, // ignore
}
extra_offset = end;
@ -466,12 +472,45 @@ pub fn Iterator(comptime SeekableStream: type) type {
return error.ZipMismatchFlags;
if (local_header.crc32 != 0 and local_header.crc32 != self.crc32)
return error.ZipMismatchCrc32;
if (local_header.compressed_size != 0 and
local_header.compressed_size != self.compressed_size)
var extents: FileExtents = .{
.uncompressed_size = local_header.uncompressed_size,
.compressed_size = local_header.compressed_size,
.local_file_header_offset = 0,
};
if (local_header.extra_len > 0) {
var extra_buf: [std.math.maxInt(u16)]u8 = undefined;
const extra = extra_buf[0..local_header.extra_len];
{
try stream.seekTo(self.file_offset + @sizeOf(LocalFileHeader) + local_header.filename_len);
const len = try stream.context.reader().readAll(extra);
if (len != extra.len)
return error.ZipTruncated;
}
var extra_offset: usize = 0;
while (extra_offset + 4 <= local_header.extra_len) {
const header_id = std.mem.readInt(u16, extra[extra_offset..][0..2], .little);
const data_size = std.mem.readInt(u16, extra[extra_offset..][2..4], .little);
const end = extra_offset + 4 + data_size;
if (end > local_header.extra_len)
return error.ZipBadExtraFieldSize;
const data = extra[extra_offset + 4 .. end];
switch (@as(ExtraHeader, @enumFromInt(header_id))) {
.zip64_info => try readZip64FileExtents(LocalFileHeader, local_header, &extents, data),
else => {}, // ignore
}
extra_offset = end;
}
}
if (extents.compressed_size != 0 and
extents.compressed_size != self.compressed_size)
return error.ZipMismatchCompLen;
if (local_header.uncompressed_size != 0 and
local_header.uncompressed_size != self.uncompressed_size)
if (extents.uncompressed_size != 0 and
extents.uncompressed_size != self.uncompressed_size)
return error.ZipMismatchUncompLen;
if (local_header.filename_len != self.filename_len)
return error.ZipMismatchFilenameLen;
@ -695,6 +734,20 @@ test "zip64" {
.central_directory_offset = std.math.maxInt(u32), // trigger zip64
},
});
try testZip(.{}, &test_files, .{
.end = .{
.zip64 = .{},
.central_directory_offset = std.math.maxInt(u32), // trigger zip64
},
.local_header = .{
.zip64 = .{ // trigger local header zip64
.data_size = 16,
},
.compressed_size = std.math.maxInt(u32),
.uncompressed_size = std.math.maxInt(u32),
.extra_len = 20,
},
});
}
test "bad zip files" {

View File

@ -70,6 +70,16 @@ pub fn makeZipWithStore(
pub const WriteZipOptions = struct {
end: ?EndRecordOptions = null,
local_header: ?LocalHeaderOptions = null,
};
pub const LocalHeaderOptions = struct {
zip64: ?LocalHeaderZip64Options = null,
compressed_size: ?u32 = null,
uncompressed_size: ?u32 = null,
extra_len: ?u16 = null,
};
pub const LocalHeaderZip64Options = struct {
data_size: ?u16 = null,
};
pub const EndRecordOptions = struct {
zip64: ?Zip64Options = null,
@ -105,6 +115,7 @@ pub fn writeZip(
.name = file.name,
.content = file.content,
.compression = file.compression,
.write_options = options,
});
}
for (files, 0..) |file, i| {
@ -136,6 +147,7 @@ pub fn Zipper(comptime Writer: type) type {
name: []const u8,
content: []const u8,
compression: zip.CompressionMethod,
write_options: WriteZipOptions,
},
) !FileStore {
const writer = self.counting_writer.writer();
@ -143,7 +155,16 @@ pub fn Zipper(comptime Writer: type) type {
const file_offset: u64 = @intCast(self.counting_writer.bytes_written);
const crc32 = std.hash.Crc32.hash(opt.content);
const header_options = opt.write_options.local_header;
{
var compressed_size: u32 = 0;
var uncompressed_size: u32 = 0;
var extra_len: u16 = 0;
if (header_options) |hdr_options| {
compressed_size = if (hdr_options.compressed_size) |size| size else 0;
uncompressed_size = if (hdr_options.uncompressed_size) |size| size else @intCast(opt.content.len);
extra_len = if (hdr_options.extra_len) |len| len else 0;
}
const hdr: zip.LocalFileHeader = .{
.signature = zip.local_file_header_sig,
.version_needed_to_extract = 10,
@ -152,15 +173,25 @@ pub fn Zipper(comptime Writer: type) type {
.last_modification_time = 0,
.last_modification_date = 0,
.crc32 = crc32,
.compressed_size = 0,
.uncompressed_size = @intCast(opt.content.len),
.compressed_size = compressed_size,
.uncompressed_size = uncompressed_size,
.filename_len = @intCast(opt.name.len),
.extra_len = 0,
.extra_len = extra_len,
};
try writer.writeStructEndian(hdr, .little);
}
try writer.writeAll(opt.name);
if (header_options) |hdr| {
if (hdr.zip64) |options| {
try writer.writeInt(u16, 0x0001, .little);
const data_size = if (options.data_size) |size| size else 8;
try writer.writeInt(u16, data_size, .little);
try writer.writeInt(u64, 0, .little);
try writer.writeInt(u64, @intCast(opt.content.len), .little);
}
}
var compressed_size: u32 = undefined;
switch (opt.compression) {
.store => {