get autodocs wasm compiling again

it's triggering an error.EndOfStream error in the browser tho
This commit is contained in:
Andrew Kelley 2025-05-05 20:19:36 -07:00
parent af4bb996f0
commit 576702ca40
11 changed files with 598 additions and 586 deletions

View File

@ -334,7 +334,7 @@ fn buildWasmBinary(
var result_error_bundle = std.zig.ErrorBundle.empty;
while (true) {
receiveWasmMessage(arena, context, poller.reader(.stdout), &result, &result_error_bundle) catch |err| switch (err) {
receiveWasmMessage(gpa, arena, context, poller.reader(.stdout), &result, &result_error_bundle) catch |err| switch (err) {
error.EndOfStream => break,
error.ReadFailed => if (!(try poller.poll())) break,
else => |e| return e,
@ -387,6 +387,7 @@ fn buildWasmBinary(
}
fn receiveWasmMessage(
gpa: Allocator,
arena: Allocator,
context: *Context,
br: *std.io.BufferedReader,
@ -394,8 +395,9 @@ fn receiveWasmMessage(
result_error_bundle: *std.zig.ErrorBundle,
) !void {
// Ensure that we will be able to read the entire message without blocking.
try br.fillAlloc(gpa, @sizeOf(std.zig.Server.Message.Header));
const header = try br.peekStructEndian(std.zig.Server.Message.Header, .little);
try br.fill(@sizeOf(std.zig.Server.Message.Header) + header.bytes_len);
try br.fillAlloc(gpa, @sizeOf(std.zig.Server.Message.Header) + header.bytes_len);
br.toss(@sizeOf(std.zig.Server.Message.Header));
switch (header.tag) {
.zig_version => {

View File

@ -443,7 +443,13 @@ fn parse(file_name: []const u8, source: []u8) Oom!Ast {
const err_offset = token_offsets[err.token] + ast.errorOffset(err);
const err_loc = std.zig.findLineColumn(ast.source, err_offset);
rendered_err.clearRetainingCapacity();
try ast.renderError(err, rendered_err.writer(gpa));
{
var aw: std.io.AllocatingWriter = undefined;
defer rendered_err = aw.toArrayList();
ast.renderError(err, aw.fromArrayList(gpa, &rendered_err)) catch |e| switch (e) {
error.WriteFailed => return error.OutOfMemory,
};
}
log.err("{s}:{}:{}: {s}", .{ file_name, err_loc.line + 1, err_loc.column + 1, rendered_err.items });
}
return Ast.parse(gpa, "", .zig);

View File

@ -88,7 +88,7 @@ pub fn fileSourceHtml(
if (next_annotate_index >= options.source_location_annotations.len) break;
const next_annotation = options.source_location_annotations[next_annotate_index];
if (cursor <= next_annotation.file_byte_offset) break;
try out.writer(gpa).print("<span id=\"{s}{d}\"></span>", .{
try out.print(gpa, "<span id=\"{s}{d}\"></span>", .{
options.annotation_prefix, next_annotation.dom_id,
});
next_annotate_index += 1;

View File

@ -694,43 +694,49 @@ fn render_docs(
var link_buffer: std.ArrayListUnmanaged(u8) = .empty;
};
const Writer = std.ArrayListUnmanaged(u8).Writer;
const Renderer = markdown.Renderer(Writer, Decl.Index);
const renderer: Renderer = .{
.context = decl_index,
var render: markdown.Render = .{
.context = &decl_index,
.renderFn = struct {
const fmtHtml = markdown.Render.fmtHtml;
fn render(
r: Renderer,
r: markdown.Render,
doc: markdown.Document,
node: markdown.Document.Node.Index,
writer: Writer,
writer: *std.io.BufferedWriter,
) !void {
const decl_index_ptr: *const Decl.Index = @alignCast(@ptrCast(r.context));
const data = doc.nodes.items(.data)[@intFromEnum(node)];
switch (doc.nodes.items(.tag)[@intFromEnum(node)]) {
.code_span => {
try writer.writeAll("<code>");
const content = doc.string(data.text.content);
if (resolve_decl_path(r.context, content)) |resolved_decl_index| {
if (resolve_decl_path(decl_index_ptr.*, content)) |resolved_decl_index| {
g.link_buffer.clearRetainingCapacity();
try resolveDeclLink(resolved_decl_index, &g.link_buffer);
resolveDeclLink(resolved_decl_index, &g.link_buffer) catch |err| switch (err) {
error.OutOfMemory => return error.WriteFailed,
};
try writer.writeAll("<a href=\"#");
_ = missing_feature_url_escape;
try writer.writeAll(g.link_buffer.items);
try writer.print("\">{}</a>", .{markdown.fmtHtml(content)});
try writer.print("\">{f}</a>", .{fmtHtml(content)});
} else {
try writer.print("{}", .{markdown.fmtHtml(content)});
try writer.print("{f}", .{fmtHtml(content)});
}
try writer.writeAll("</code>");
},
else => try Renderer.renderDefault(r, doc, node, writer),
else => try markdown.Render.renderDefault(r, doc, node, writer),
}
}
}.render,
};
try renderer.render(parsed_doc, out.writer(gpa));
var aw: std.io.AllocatingWriter = undefined;
defer out.* = aw.toArrayList();
render.render(parsed_doc, aw.fromArrayList(gpa, out)) catch |err| switch (err) {
error.WriteFailed => return error.OutOfMemory,
};
}
fn resolve_decl_path(decl_index: Decl.Index, path: []const u8) ?Decl.Index {
@ -772,10 +778,11 @@ export fn decl_type_html(decl_index: Decl.Index) String {
const Oom = error{OutOfMemory};
fn unpackInner(tar_bytes: []u8) !void {
var fbs = std.io.fixedBufferStream(tar_bytes);
var br: std.io.BufferedReader = undefined;
br.initFixed(tar_bytes);
var file_name_buffer: [1024]u8 = undefined;
var link_name_buffer: [1024]u8 = undefined;
var it = std.tar.iterator(fbs.reader(), .{
var it = std.tar.Iterator.init(&br, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
@ -796,7 +803,7 @@ fn unpackInner(tar_bytes: []u8) !void {
{
gop.value_ptr.* = file;
}
const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)];
const file_bytes = tar_bytes[br.seek..][0..@intCast(tar_file.size)];
assert(file == try Walk.add_file(file_name, file_bytes));
}
} else {

View File

@ -129,9 +129,7 @@ const testing = std.testing;
pub const Document = @import("markdown/Document.zig");
pub const Parser = @import("markdown/Parser.zig");
pub const Renderer = @import("markdown/renderer.zig").Renderer;
pub const renderNodeInlineText = @import("markdown/renderer.zig").renderNodeInlineText;
pub const fmtHtml = @import("markdown/renderer.zig").fmtHtml;
pub const Render = @import("markdown/Render.zig");
// Avoid exposing main to other files merely importing this one.
pub const main = if (@import("root") == @This())

View File

@ -4,7 +4,7 @@ const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const Renderer = @import("renderer.zig").Renderer;
const Render = @import("Render.zig");
nodes: Node.List.Slice,
extra: []u32,
@ -160,8 +160,8 @@ pub fn deinit(doc: *Document, allocator: Allocator) void {
}
/// Renders a document directly to a writer using the default renderer.
pub fn render(doc: Document, writer: anytype) @TypeOf(writer).Error!void {
const renderer: Renderer(@TypeOf(writer), void) = .{ .context = {} };
pub fn render(doc: Document, writer: *std.io.BufferedWriter) @TypeOf(writer).Error!void {
const renderer: Render(@TypeOf(writer), void) = .{ .context = {} };
try renderer.render(doc, writer);
}

View File

@ -0,0 +1,243 @@
//! A Markdown document renderer.
//!
//! Each concrete `Renderer` type has a `renderDefault` function, with the
//! intention that custom `renderFn` implementations can call `renderDefault`
//! for node types for which they require no special rendering.
const std = @import("std");
const Document = @import("Document.zig");
const Node = Document.Node;
const Render = @This();
context: ?*const anyopaque,
renderFn: *const fn (
r: Render,
doc: Document,
node: Node.Index,
writer: *std.io.BufferedWriter,
) std.io.Writer.Error!void = renderDefault,
pub fn render(r: Render, doc: Document, writer: *std.io.BufferedWriter) std.io.Writer.Error!void {
try r.renderFn(r, doc, .root, writer);
}
pub fn renderDefault(
r: Render,
doc: Document,
node: Node.Index,
writer: *std.io.BufferedWriter,
) std.io.Writer.Error!void {
const data = doc.nodes.items(.data)[@intFromEnum(node)];
switch (doc.nodes.items(.tag)[@intFromEnum(node)]) {
.root => {
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
},
.list => {
if (data.list.start.asNumber()) |start| {
if (start == 1) {
try writer.writeAll("<ol>\n");
} else {
try writer.print("<ol start=\"{}\">\n", .{start});
}
} else {
try writer.writeAll("<ul>\n");
}
for (doc.extraChildren(data.list.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
if (data.list.start.asNumber() != null) {
try writer.writeAll("</ol>\n");
} else {
try writer.writeAll("</ul>\n");
}
},
.list_item => {
try writer.writeAll("<li>");
for (doc.extraChildren(data.list_item.children)) |child| {
if (data.list_item.tight and doc.nodes.items(.tag)[@intFromEnum(child)] == .paragraph) {
const para_data = doc.nodes.items(.data)[@intFromEnum(child)];
for (doc.extraChildren(para_data.container.children)) |para_child| {
try r.renderFn(r, doc, para_child, writer);
}
} else {
try r.renderFn(r, doc, child, writer);
}
}
try writer.writeAll("</li>\n");
},
.table => {
try writer.writeAll("<table>\n");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</table>\n");
},
.table_row => {
try writer.writeAll("<tr>\n");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</tr>\n");
},
.table_cell => {
if (data.table_cell.info.header) {
try writer.writeAll("<th");
} else {
try writer.writeAll("<td");
}
switch (data.table_cell.info.alignment) {
.unset => try writer.writeAll(">"),
else => |a| try writer.print(" style=\"text-align: {s}\">", .{@tagName(a)}),
}
for (doc.extraChildren(data.table_cell.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
if (data.table_cell.info.header) {
try writer.writeAll("</th>\n");
} else {
try writer.writeAll("</td>\n");
}
},
.heading => {
try writer.print("<h{}>", .{data.heading.level});
for (doc.extraChildren(data.heading.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.print("</h{}>\n", .{data.heading.level});
},
.code_block => {
const content = doc.string(data.code_block.content);
try writer.print("<pre><code>{f}</code></pre>\n", .{fmtHtml(content)});
},
.blockquote => {
try writer.writeAll("<blockquote>\n");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</blockquote>\n");
},
.paragraph => {
try writer.writeAll("<p>");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</p>\n");
},
.thematic_break => {
try writer.writeAll("<hr />\n");
},
.link => {
const target = doc.string(data.link.target);
try writer.print("<a href=\"{f}\">", .{fmtHtml(target)});
for (doc.extraChildren(data.link.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</a>");
},
.autolink => {
const target = doc.string(data.text.content);
try writer.print("<a href=\"{0f}\">{0f}</a>", .{fmtHtml(target)});
},
.image => {
const target = doc.string(data.link.target);
try writer.print("<img src=\"{f}\" alt=\"", .{fmtHtml(target)});
for (doc.extraChildren(data.link.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
try writer.writeAll("\" />");
},
.strong => {
try writer.writeAll("<strong>");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</strong>");
},
.emphasis => {
try writer.writeAll("<em>");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</em>");
},
.code_span => {
const content = doc.string(data.text.content);
try writer.print("<code>{f}</code>", .{fmtHtml(content)});
},
.text => {
const content = doc.string(data.text.content);
try writer.print("{f}", .{fmtHtml(content)});
},
.line_break => {
try writer.writeAll("<br />\n");
},
}
}
/// Renders an inline node as plain text. Asserts that the node is an inline and
/// has no non-inline children.
pub fn renderInlineNodeText(
doc: Document,
node: Node.Index,
writer: *std.io.BufferedWriter,
) std.io.Writer.Error!void {
const data = doc.nodes.items(.data)[@intFromEnum(node)];
switch (doc.nodes.items(.tag)[@intFromEnum(node)]) {
.root,
.list,
.list_item,
.table,
.table_row,
.table_cell,
.heading,
.code_block,
.blockquote,
.paragraph,
.thematic_break,
=> unreachable, // Blocks
.link, .image => {
for (doc.extraChildren(data.link.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
},
.strong => {
for (doc.extraChildren(data.container.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
},
.emphasis => {
for (doc.extraChildren(data.container.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
},
.autolink, .code_span, .text => {
const content = doc.string(data.text.content);
try writer.print("{f}", .{fmtHtml(content)});
},
.line_break => {
try writer.writeAll("\n");
},
}
}
pub fn fmtHtml(bytes: []const u8) std.fmt.Formatter(formatHtml) {
return .{ .data = bytes };
}
fn formatHtml(bytes: []const u8, writer: *std.io.BufferedWriter, comptime fmt: []const u8) !void {
_ = fmt;
for (bytes) |b| {
switch (b) {
'<' => try writer.writeAll("&lt;"),
'>' => try writer.writeAll("&gt;"),
'&' => try writer.writeAll("&amp;"),
'"' => try writer.writeAll("&quot;"),
else => try writer.writeByte(b),
}
}
}

View File

@ -1,253 +0,0 @@
const std = @import("std");
const Document = @import("Document.zig");
const Node = Document.Node;
/// A Markdown document renderer.
///
/// Each concrete `Renderer` type has a `renderDefault` function, with the
/// intention that custom `renderFn` implementations can call `renderDefault`
/// for node types for which they require no special rendering.
pub fn Renderer(comptime Writer: type, comptime Context: type) type {
return struct {
renderFn: *const fn (
r: Self,
doc: Document,
node: Node.Index,
writer: Writer,
) Writer.Error!void = renderDefault,
context: Context,
const Self = @This();
pub fn render(r: Self, doc: Document, writer: Writer) Writer.Error!void {
try r.renderFn(r, doc, .root, writer);
}
pub fn renderDefault(
r: Self,
doc: Document,
node: Node.Index,
writer: Writer,
) Writer.Error!void {
const data = doc.nodes.items(.data)[@intFromEnum(node)];
switch (doc.nodes.items(.tag)[@intFromEnum(node)]) {
.root => {
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
},
.list => {
if (data.list.start.asNumber()) |start| {
if (start == 1) {
try writer.writeAll("<ol>\n");
} else {
try writer.print("<ol start=\"{}\">\n", .{start});
}
} else {
try writer.writeAll("<ul>\n");
}
for (doc.extraChildren(data.list.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
if (data.list.start.asNumber() != null) {
try writer.writeAll("</ol>\n");
} else {
try writer.writeAll("</ul>\n");
}
},
.list_item => {
try writer.writeAll("<li>");
for (doc.extraChildren(data.list_item.children)) |child| {
if (data.list_item.tight and doc.nodes.items(.tag)[@intFromEnum(child)] == .paragraph) {
const para_data = doc.nodes.items(.data)[@intFromEnum(child)];
for (doc.extraChildren(para_data.container.children)) |para_child| {
try r.renderFn(r, doc, para_child, writer);
}
} else {
try r.renderFn(r, doc, child, writer);
}
}
try writer.writeAll("</li>\n");
},
.table => {
try writer.writeAll("<table>\n");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</table>\n");
},
.table_row => {
try writer.writeAll("<tr>\n");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</tr>\n");
},
.table_cell => {
if (data.table_cell.info.header) {
try writer.writeAll("<th");
} else {
try writer.writeAll("<td");
}
switch (data.table_cell.info.alignment) {
.unset => try writer.writeAll(">"),
else => |a| try writer.print(" style=\"text-align: {s}\">", .{@tagName(a)}),
}
for (doc.extraChildren(data.table_cell.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
if (data.table_cell.info.header) {
try writer.writeAll("</th>\n");
} else {
try writer.writeAll("</td>\n");
}
},
.heading => {
try writer.print("<h{}>", .{data.heading.level});
for (doc.extraChildren(data.heading.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.print("</h{}>\n", .{data.heading.level});
},
.code_block => {
const content = doc.string(data.code_block.content);
try writer.print("<pre><code>{}</code></pre>\n", .{fmtHtml(content)});
},
.blockquote => {
try writer.writeAll("<blockquote>\n");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</blockquote>\n");
},
.paragraph => {
try writer.writeAll("<p>");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</p>\n");
},
.thematic_break => {
try writer.writeAll("<hr />\n");
},
.link => {
const target = doc.string(data.link.target);
try writer.print("<a href=\"{}\">", .{fmtHtml(target)});
for (doc.extraChildren(data.link.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</a>");
},
.autolink => {
const target = doc.string(data.text.content);
try writer.print("<a href=\"{0}\">{0}</a>", .{fmtHtml(target)});
},
.image => {
const target = doc.string(data.link.target);
try writer.print("<img src=\"{}\" alt=\"", .{fmtHtml(target)});
for (doc.extraChildren(data.link.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
try writer.writeAll("\" />");
},
.strong => {
try writer.writeAll("<strong>");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</strong>");
},
.emphasis => {
try writer.writeAll("<em>");
for (doc.extraChildren(data.container.children)) |child| {
try r.renderFn(r, doc, child, writer);
}
try writer.writeAll("</em>");
},
.code_span => {
const content = doc.string(data.text.content);
try writer.print("<code>{}</code>", .{fmtHtml(content)});
},
.text => {
const content = doc.string(data.text.content);
try writer.print("{}", .{fmtHtml(content)});
},
.line_break => {
try writer.writeAll("<br />\n");
},
}
}
};
}
/// Renders an inline node as plain text. Asserts that the node is an inline and
/// has no non-inline children.
pub fn renderInlineNodeText(
doc: Document,
node: Node.Index,
writer: anytype,
) @TypeOf(writer).Error!void {
const data = doc.nodes.items(.data)[@intFromEnum(node)];
switch (doc.nodes.items(.tag)[@intFromEnum(node)]) {
.root,
.list,
.list_item,
.table,
.table_row,
.table_cell,
.heading,
.code_block,
.blockquote,
.paragraph,
.thematic_break,
=> unreachable, // Blocks
.link, .image => {
for (doc.extraChildren(data.link.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
},
.strong => {
for (doc.extraChildren(data.container.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
},
.emphasis => {
for (doc.extraChildren(data.container.children)) |child| {
try renderInlineNodeText(doc, child, writer);
}
},
.autolink, .code_span, .text => {
const content = doc.string(data.text.content);
try writer.print("{}", .{fmtHtml(content)});
},
.line_break => {
try writer.writeAll("\n");
},
}
}
pub fn fmtHtml(bytes: []const u8) std.fmt.Formatter(formatHtml) {
return .{ .data = bytes };
}
fn formatHtml(
bytes: []const u8,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
for (bytes) |b| {
switch (b) {
'<' => try writer.writeAll("&lt;"),
'>' => try writer.writeAll("&gt;"),
'&' => try writer.writeAll("&amp;"),
'"' => try writer.writeAll("&quot;"),
else => try writer.writeByte(b),
}
}
}

View File

@ -166,6 +166,7 @@ fn writeFile(
headers_and_trailers_full: []const []const u8,
headers_len_full: usize,
) std.io.Writer.FileError!usize {
if (std.fs.File.Handle == void) unreachable;
const aw: *AllocatingWriter = @alignCast(@ptrCast(context));
const gpa = aw.allocator;
var list = aw.toArrayList();

View File

@ -316,6 +316,15 @@ pub fn discardAll(br: *BufferedReader, n: usize) Reader.Error!void {
if ((try br.discardShort(n)) != n) return error.EndOfStream;
}
pub fn discardAll64(br: *BufferedReader, n: u64) Reader.Error!void {
var remaining: u64 = n;
while (remaining > 0) {
const limited = std.math.cast(usize, remaining) orelse std.math.maxInt(usize);
try discardAll(br, limited);
remaining -= limited;
}
}
/// Skips the next `n` bytes from the stream, advancing the seek position.
///
/// Unlike `toss` which is infallible, in this function `n` can be any amount.
@ -896,28 +905,35 @@ pub fn takeLeb128(br: *BufferedReader, comptime Result: type) TakeLeb128Error!Re
} }))) orelse error.Overflow;
}
pub fn expandTotalCapacity(br: *BufferedReader, allocator: Allocator, n: usize) Allocator.Error!void {
if (n <= br.buffer.len) return;
if (br.seek > 0) rebase(br);
var list: ArrayList(u8) = .{
.items = br.buffer[0..br.end],
.capacity = br.buffer.len,
};
defer br.buffer = list.allocatedSlice();
try list.ensureTotalCapacity(allocator, n);
}
pub const FillAllocError = Reader.Error || Allocator.Error;
pub fn fillAlloc(br: *BufferedReader, allocator: Allocator, n: usize) FillAllocError!void {
try expandTotalCapacity(br, allocator, n);
return fill(br, n);
}
/// Returns a slice into the unused capacity of `buffer` with at least
/// `min_len` bytes, extending `buffer` by resizing it with `gpa` as necessary.
///
/// After calling this function, typically the caller will follow up with a
/// call to `advanceBufferEnd` to report the actual number of bytes buffered.
pub fn writableSliceGreedyAlloc(
br: *BufferedReader,
allocator: Allocator,
min_len: usize,
) error{OutOfMemory}![]u8 {
pub fn writableSliceGreedyAlloc(br: *BufferedReader, allocator: Allocator, min_len: usize) Allocator.Error![]u8 {
{
const unused = br.buffer[br.end..];
if (unused.len >= min_len) return unused;
}
const seek = br.seek;
if (seek > 0) {
const buffer = br.buffer[0..br.end];
const remainder = buffer[seek..];
std.mem.copyForwards(u8, buffer[0..remainder.len], remainder);
br.end = remainder.len;
br.seek = 0;
}
if (br.seek > 0) rebase(br);
{
var list: ArrayList(u8) = .{
.items = br.buffer[0..br.end],

View File

@ -293,28 +293,6 @@ fn nullStr(str: []const u8) []const u8 {
return str;
}
/// Options for iterator.
/// Buffers should be provided by the caller.
pub const IteratorOptions = struct {
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
file_name_buffer: []u8,
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
link_name_buffer: []u8,
/// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
};
/// Iterates over files in tar archive.
/// `next` returns each file in tar archive.
pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) {
return .{
.reader = reader,
.diagnostics = options.diagnostics,
.file_name_buffer = options.file_name_buffer,
.link_name_buffer = options.link_name_buffer,
};
}
/// Type of the file returned by iterator `next` method.
pub const FileKind = enum {
directory,
@ -323,206 +301,230 @@ pub const FileKind = enum {
};
/// Iterator over entries in the tar file represented by reader.
pub fn Iterator(comptime ReaderType: type) type {
return struct {
reader: ReaderType,
diagnostics: ?*Diagnostics = null,
pub const Iterator = struct {
reader: *std.io.BufferedReader,
diagnostics: ?*Diagnostics = null,
// buffers for heeader and file attributes
header_buffer: [Header.SIZE]u8 = undefined,
// buffers for heeader and file attributes
header_buffer: [Header.SIZE]u8 = undefined,
file_name_buffer: []u8,
link_name_buffer: []u8,
// bytes of padding to the end of the block
padding: usize = 0,
// not consumed bytes of file from last next iteration
unread_file_bytes: u64 = 0,
/// Options for iterator.
/// Buffers should be provided by the caller.
pub const Options = struct {
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
file_name_buffer: []u8,
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
link_name_buffer: []u8,
/// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
};
// bytes of padding to the end of the block
padding: usize = 0,
// not consumed bytes of file from last next iteration
unread_file_bytes: u64 = 0,
pub const File = struct {
name: []const u8, // name of file, symlink or directory
link_name: []const u8, // target name of symlink
size: u64 = 0, // size of the file in bytes
mode: u32 = 0,
kind: FileKind = .file,
unread_bytes: *u64,
parent_reader: ReaderType,
pub const Reader = std.io.Reader(File, ReaderType.Error, File.read);
pub fn reader(self: File) Reader {
return .{ .context = self };
}
pub fn read(self: File, dest: []u8) ReaderType.Error!usize {
const buf = dest[0..@min(dest.len, self.unread_bytes.*)];
const n = try self.parent_reader.read(buf);
self.unread_bytes.* -= n;
return n;
}
// Writes file content to writer.
pub fn writeAll(self: File, out_writer: anytype) !void {
var buffer: [4096]u8 = undefined;
while (self.unread_bytes.* > 0) {
const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)];
try self.parent_reader.readNoEof(buf);
try out_writer.writeAll(buf);
self.unread_bytes.* -= buf.len;
}
}
/// Iterates over files in tar archive.
/// `next` returns each file in tar archive.
pub fn init(reader: *std.io.BufferedReader, options: Options) Iterator {
return .{
.reader = reader,
.diagnostics = options.diagnostics,
.file_name_buffer = options.file_name_buffer,
.link_name_buffer = options.link_name_buffer,
};
}
const Self = @This();
pub const File = struct {
name: []const u8, // name of file, symlink or directory
link_name: []const u8, // target name of symlink
size: u64 = 0, // size of the file in bytes
mode: u32 = 0,
kind: FileKind = .file,
fn readHeader(self: *Self) !?Header {
if (self.padding > 0) {
try self.reader.skipBytes(self.padding, .{});
}
const n = try self.reader.readAll(&self.header_buffer);
if (n == 0) return null;
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
if (try header.checkChksum() == 0) return null;
return header;
}
unread_bytes: *u64,
parent_reader: *std.io.BufferedReader,
fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
if (size > buffer.len) return error.TarInsufficientBuffer;
const buf = buffer[0..size];
try self.reader.readNoEof(buf);
return nullStr(buf);
}
fn newFile(self: *Self) File {
pub fn reader(self: *File) std.io.Reader {
return .{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
.parent_reader = self.reader,
.unread_bytes = &self.unread_file_bytes,
.context = self,
.vtable = &.{
.read = read,
.readVec = readVec,
.discard = discard,
},
};
}
// Number of padding bytes in the last file block.
fn blockPadding(size: u64) usize {
const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
return @intCast(block_rounded - size);
fn read(context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: std.io.Reader.Limit) std.io.Reader.RwError!usize {
const file: *File = @ptrCast(@alignCast(context));
if (file.unread_bytes.* == 0) return error.EndOfStream;
const n = try file.parent_reader.read(bw, limit.min(.limited(file.unread_bytes.*)));
file.unread_bytes.* -= n;
return n;
}
/// Iterates through the tar archive as if it is a series of files.
/// Internally, the tar format often uses entries (header with optional
/// content) to add meta data that describes the next file. These
/// entries should not normally be visible to the outside. As such, this
/// loop iterates through one or more entries until it collects a all
/// file attributes.
pub fn next(self: *Self) !?File {
if (self.unread_file_bytes > 0) {
// If file content was not consumed by caller
try self.reader.skipBytes(self.unread_file_bytes, .{});
self.unread_file_bytes = 0;
}
var file: File = self.newFile();
while (try self.readHeader()) |header| {
const kind = header.kind();
const size: u64 = try header.size();
self.padding = blockPadding(size);
switch (kind) {
// File types to return upstream
.directory, .normal, .symbolic_link => {
file.kind = switch (kind) {
.directory => .directory,
.normal => .file,
.symbolic_link => .sym_link,
else => unreachable,
};
file.mode = try header.mode();
// set file attributes if not already set by prefix/extended headers
if (file.size == 0) {
file.size = size;
}
if (file.link_name.len == 0) {
file.link_name = try header.linkName(self.link_name_buffer);
}
if (file.name.len == 0) {
file.name = try header.fullName(self.file_name_buffer);
}
self.padding = blockPadding(file.size);
self.unread_file_bytes = file.size;
return file;
},
// Prefix header types
.gnu_long_name => {
file.name = try self.readString(@intCast(size), self.file_name_buffer);
},
.gnu_long_link => {
file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
},
.extended_header => {
// Use just attributes from last extended header.
file = self.newFile();
var rdr = paxIterator(self.reader, @intCast(size));
while (try rdr.next()) |attr| {
switch (attr.kind) {
.path => {
file.name = try attr.value(self.file_name_buffer);
},
.linkpath => {
file.link_name = try attr.value(self.link_name_buffer);
},
.size => {
var buf: [pax_max_size_attr_len]u8 = undefined;
file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
}
},
// Ignored header type
.global_extended_header => {
self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
},
// All other are unsupported header types
else => {
const d = self.diagnostics orelse return error.TarUnsupportedHeader;
try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
.file_name = try d.allocator.dupe(u8, header.name()),
.file_type = kind,
} });
if (kind == .gnu_sparse) {
try self.skipGnuSparseExtendedHeaders(header);
}
self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
},
}
}
return null;
fn readVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize {
const file: *File = @ptrCast(@alignCast(context));
if (file.unread_bytes.* == 0) return error.EndOfStream;
const n = try file.parent_reader.readVecLimit(data, .limited(file.unread_bytes.*));
file.unread_bytes.* -= n;
return n;
}
fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
var is_extended = header.bytes[482] > 0;
while (is_extended) {
var buf: [Header.SIZE]u8 = undefined;
const n = try self.reader.readAll(&buf);
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
is_extended = buf[504] > 0;
}
fn discard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize {
const file: *File = @ptrCast(@alignCast(context));
const n = limit.minInt(file.unread_bytes.*);
file.unread_bytes.* -= n;
return n;
}
pub fn readRemaining(file: *File, out: *std.io.BufferedWriter) std.io.Reader.RwRemainingError!void {
return file.reader().readRemaining(out);
}
};
}
/// Pax attributes iterator.
/// Size is length of pax extended header in reader.
fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) {
return PaxIterator(@TypeOf(reader)){
.reader = reader,
.size = size,
};
}
fn readHeader(self: *Iterator) !?Header {
if (self.padding > 0) {
try self.reader.discardAll(self.padding);
}
const n = try self.reader.readSliceShort(&self.header_buffer);
if (n == 0) return null;
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
if (try header.checkChksum() == 0) return null;
return header;
}
fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 {
if (size > buffer.len) return error.TarInsufficientBuffer;
const buf = buffer[0..size];
try self.reader.readSlice(buf);
return nullStr(buf);
}
fn newFile(self: *Iterator) File {
return .{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
.parent_reader = self.reader,
.unread_bytes = &self.unread_file_bytes,
};
}
// Number of padding bytes in the last file block.
fn blockPadding(size: u64) usize {
const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
return @intCast(block_rounded - size);
}
/// Iterates through the tar archive as if it is a series of files.
/// Internally, the tar format often uses entries (header with optional
/// content) to add meta data that describes the next file. These
/// entries should not normally be visible to the outside. As such, this
/// loop iterates through one or more entries until it collects a all
/// file attributes.
pub fn next(self: *Iterator) !?File {
if (self.unread_file_bytes > 0) {
// If file content was not consumed by caller
try self.reader.discardAll64(self.unread_file_bytes);
self.unread_file_bytes = 0;
}
var file: File = self.newFile();
while (try self.readHeader()) |header| {
const kind = header.kind();
const size: u64 = try header.size();
self.padding = blockPadding(size);
switch (kind) {
// File types to return upstream
.directory, .normal, .symbolic_link => {
file.kind = switch (kind) {
.directory => .directory,
.normal => .file,
.symbolic_link => .sym_link,
else => unreachable,
};
file.mode = try header.mode();
// set file attributes if not already set by prefix/extended headers
if (file.size == 0) {
file.size = size;
}
if (file.link_name.len == 0) {
file.link_name = try header.linkName(self.link_name_buffer);
}
if (file.name.len == 0) {
file.name = try header.fullName(self.file_name_buffer);
}
self.padding = blockPadding(file.size);
self.unread_file_bytes = file.size;
return file;
},
// Prefix header types
.gnu_long_name => {
file.name = try self.readString(@intCast(size), self.file_name_buffer);
},
.gnu_long_link => {
file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
},
.extended_header => {
// Use just attributes from last extended header.
file = self.newFile();
var rdr: PaxIterator = .{
.reader = self.reader,
.size = @intCast(size),
};
while (try rdr.next()) |attr| {
switch (attr.kind) {
.path => {
file.name = try attr.value(self.file_name_buffer);
},
.linkpath => {
file.link_name = try attr.value(self.link_name_buffer);
},
.size => {
var buf: [pax_max_size_attr_len]u8 = undefined;
file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
}
},
// Ignored header type
.global_extended_header => {
self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
},
// All other are unsupported header types
else => {
const d = self.diagnostics orelse return error.TarUnsupportedHeader;
try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
.file_name = try d.allocator.dupe(u8, header.name()),
.file_type = kind,
} });
if (kind == .gnu_sparse) {
try self.skipGnuSparseExtendedHeaders(header);
}
self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
},
}
}
return null;
}
fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void {
var is_extended = header.bytes[482] > 0;
while (is_extended) {
var buf: [Header.SIZE]u8 = undefined;
try self.reader.readSlice(&buf);
is_extended = buf[504] > 0;
}
}
};
const PaxAttributeKind = enum {
path,
@ -533,103 +535,92 @@ const PaxAttributeKind = enum {
// maxInt(u64) has 20 chars, base 10 in practice we got 24 chars
const pax_max_size_attr_len = 64;
fn PaxIterator(comptime ReaderType: type) type {
return struct {
size: usize, // cumulative size of all pax attributes
reader: ReaderType,
// scratch buffer used for reading attribute length and keyword
scratch: [128]u8 = undefined,
pub const PaxIterator = struct {
size: usize, // cumulative size of all pax attributes
reader: *std.io.BufferedReader,
const Self = @This();
const Self = @This();
const Attribute = struct {
kind: PaxAttributeKind,
len: usize, // length of the attribute value
reader: ReaderType, // reader positioned at value start
const Attribute = struct {
kind: PaxAttributeKind,
len: usize, // length of the attribute value
reader: *std.io.BufferedReader, // reader positioned at value start
// Copies pax attribute value into destination buffer.
// Must be called with destination buffer of size at least Attribute.len.
pub fn value(self: Attribute, dst: []u8) ![]const u8 {
if (self.len > dst.len) return error.TarInsufficientBuffer;
// assert(self.len <= dst.len);
const buf = dst[0..self.len];
const n = try self.reader.readAll(buf);
if (n < self.len) return error.UnexpectedEndOfStream;
try validateAttributeEnding(self.reader);
if (hasNull(buf)) return error.PaxNullInValue;
return buf;
}
};
// Iterates over pax attributes. Returns known only known attributes.
// Caller has to call value in Attribute, to advance reader across value.
pub fn next(self: *Self) !?Attribute {
// Pax extended header consists of one or more attributes, each constructed as follows:
// "%d %s=%s\n", <length>, <keyword>, <value>
while (self.size > 0) {
const length_buf = try self.readUntil(' ');
const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
const keyword = try self.readUntil('=');
if (hasNull(keyword)) return error.PaxNullInKeyword;
// calculate value_len
const value_start = length_buf.len + keyword.len + 2; // 2 separators
if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
const value_len = length - value_start - 1; // \n separator at end
self.size -= length;
const kind: PaxAttributeKind = if (eql(keyword, "path"))
.path
else if (eql(keyword, "linkpath"))
.linkpath
else if (eql(keyword, "size"))
.size
else {
try self.reader.skipBytes(value_len, .{});
try validateAttributeEnding(self.reader);
continue;
};
if (kind == .size and value_len > pax_max_size_attr_len) {
return error.PaxSizeAttrOverflow;
}
return Attribute{
.kind = kind,
.len = value_len,
.reader = self.reader,
};
}
return null;
}
fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
var fbs: std.io.BufferedWriter = undefined;
fbs.initFixed(&self.scratch);
try self.reader.streamUntilDelimiter(&fbs, delimiter, null);
return fbs.getWritten();
}
fn eql(a: []const u8, b: []const u8) bool {
return std.mem.eql(u8, a, b);
}
fn hasNull(str: []const u8) bool {
return (std.mem.indexOfScalar(u8, str, 0)) != null;
}
// Checks that each record ends with new line.
fn validateAttributeEnding(reader: ReaderType) !void {
if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
// Copies pax attribute value into destination buffer.
// Must be called with destination buffer of size at least Attribute.len.
pub fn value(self: Attribute, dst: []u8) ![]const u8 {
if (self.len > dst.len) return error.TarInsufficientBuffer;
// assert(self.len <= dst.len);
const buf = dst[0..self.len];
const n = try self.reader.readSliceShort(buf);
if (n < self.len) return error.UnexpectedEndOfStream;
try validateAttributeEnding(self.reader);
if (hasNull(buf)) return error.PaxNullInValue;
return buf;
}
};
}
// Iterates over pax attributes. Returns known only known attributes.
// Caller has to call value in Attribute, to advance reader across value.
pub fn next(self: *Self) !?Attribute {
// Pax extended header consists of one or more attributes, each constructed as follows:
// "%d %s=%s\n", <length>, <keyword>, <value>
while (self.size > 0) {
const length_buf = try self.reader.takeSentinel(' ');
const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
const keyword = try self.reader.takeSentinel('=');
if (hasNull(keyword)) return error.PaxNullInKeyword;
// calculate value_len
const value_start = length_buf.len + keyword.len + 2; // 2 separators
if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
const value_len = length - value_start - 1; // \n separator at end
self.size -= length;
const kind: PaxAttributeKind = if (eql(keyword, "path"))
.path
else if (eql(keyword, "linkpath"))
.linkpath
else if (eql(keyword, "size"))
.size
else {
try self.reader.discardAll(value_len);
try validateAttributeEnding(self.reader);
continue;
};
if (kind == .size and value_len > pax_max_size_attr_len) {
return error.PaxSizeAttrOverflow;
}
return .{
.kind = kind,
.len = value_len,
.reader = self.reader,
};
}
return null;
}
fn eql(a: []const u8, b: []const u8) bool {
return std.mem.eql(u8, a, b);
}
fn hasNull(str: []const u8) bool {
return (std.mem.indexOfScalar(u8, str, 0)) != null;
}
// Checks that each record ends with new line.
fn validateAttributeEnding(reader: *std.io.BufferedReader) !void {
if (try reader.takeByte() != '\n') return error.PaxInvalidAttributeEnd;
}
};
/// Saves tar file content to the file systems.
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void {
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: *std.io.BufferedReader, options: PipeOptions) !void {
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var iter = iterator(reader, .{
var iter: Iterator = .init(reader, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
.diagnostics = options.diagnostics,
@ -827,8 +818,9 @@ test PaxIterator {
var buffer: [1024]u8 = undefined;
outer: for (cases) |case| {
var stream = std.io.fixedBufferStream(case.data);
var iter = paxIterator(stream.reader(), case.data.len);
var br: std.io.BufferedReader = undefined;
br.initFixed(case.data);
var iter: PaxIterator = .init(&br, case.data.len);
var i: usize = 0;
while (iter.next() catch |err| {
@ -942,7 +934,7 @@ test "create file and symlink" {
file.close();
}
test iterator {
test Iterator {
// Example tar file is created from this tree structure:
// $ tree example
// example
@ -969,7 +961,7 @@ test iterator {
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
// Create iterator
var iter = iterator(fbs.reader(), .{
var iter: Iterator = .init(fbs.reader(), .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});