From 576702ca40cef65964709ad70595ad913f1944ab Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 5 May 2025 20:19:36 -0700 Subject: [PATCH] get autodocs wasm compiling again it's triggering an error.EndOfStream error in the browser tho --- lib/compiler/std-docs.zig | 6 +- lib/docs/wasm/Walk.zig | 8 +- lib/docs/wasm/html_render.zig | 2 +- lib/docs/wasm/main.zig | 37 +- lib/docs/wasm/markdown.zig | 4 +- lib/docs/wasm/markdown/Document.zig | 6 +- lib/docs/wasm/markdown/Render.zig | 243 ++++++++++++ lib/docs/wasm/markdown/renderer.zig | 253 ------------ lib/std/io/AllocatingWriter.zig | 1 + lib/std/io/BufferedReader.zig | 42 +- lib/std/tar.zig | 582 ++++++++++++++-------------- 11 files changed, 598 insertions(+), 586 deletions(-) create mode 100644 lib/docs/wasm/markdown/Render.zig delete mode 100644 lib/docs/wasm/markdown/renderer.zig diff --git a/lib/compiler/std-docs.zig b/lib/compiler/std-docs.zig index 0645120212..5d1ffd68f1 100644 --- a/lib/compiler/std-docs.zig +++ b/lib/compiler/std-docs.zig @@ -334,7 +334,7 @@ fn buildWasmBinary( var result_error_bundle = std.zig.ErrorBundle.empty; while (true) { - receiveWasmMessage(arena, context, poller.reader(.stdout), &result, &result_error_bundle) catch |err| switch (err) { + receiveWasmMessage(gpa, arena, context, poller.reader(.stdout), &result, &result_error_bundle) catch |err| switch (err) { error.EndOfStream => break, error.ReadFailed => if (!(try poller.poll())) break, else => |e| return e, @@ -387,6 +387,7 @@ fn buildWasmBinary( } fn receiveWasmMessage( + gpa: Allocator, arena: Allocator, context: *Context, br: *std.io.BufferedReader, @@ -394,8 +395,9 @@ fn receiveWasmMessage( result_error_bundle: *std.zig.ErrorBundle, ) !void { // Ensure that we will be able to read the entire message without blocking. + try br.fillAlloc(gpa, @sizeOf(std.zig.Server.Message.Header)); const header = try br.peekStructEndian(std.zig.Server.Message.Header, .little); - try br.fill(@sizeOf(std.zig.Server.Message.Header) + header.bytes_len); + try br.fillAlloc(gpa, @sizeOf(std.zig.Server.Message.Header) + header.bytes_len); br.toss(@sizeOf(std.zig.Server.Message.Header)); switch (header.tag) { .zig_version => { diff --git a/lib/docs/wasm/Walk.zig b/lib/docs/wasm/Walk.zig index e3884f6271..b92f4bb51c 100644 --- a/lib/docs/wasm/Walk.zig +++ b/lib/docs/wasm/Walk.zig @@ -443,7 +443,13 @@ fn parse(file_name: []const u8, source: []u8) Oom!Ast { const err_offset = token_offsets[err.token] + ast.errorOffset(err); const err_loc = std.zig.findLineColumn(ast.source, err_offset); rendered_err.clearRetainingCapacity(); - try ast.renderError(err, rendered_err.writer(gpa)); + { + var aw: std.io.AllocatingWriter = undefined; + defer rendered_err = aw.toArrayList(); + ast.renderError(err, aw.fromArrayList(gpa, &rendered_err)) catch |e| switch (e) { + error.WriteFailed => return error.OutOfMemory, + }; + } log.err("{s}:{}:{}: {s}", .{ file_name, err_loc.line + 1, err_loc.column + 1, rendered_err.items }); } return Ast.parse(gpa, "", .zig); diff --git a/lib/docs/wasm/html_render.zig b/lib/docs/wasm/html_render.zig index b7e79e5732..b15ab127c7 100644 --- a/lib/docs/wasm/html_render.zig +++ b/lib/docs/wasm/html_render.zig @@ -88,7 +88,7 @@ pub fn fileSourceHtml( if (next_annotate_index >= options.source_location_annotations.len) break; const next_annotation = options.source_location_annotations[next_annotate_index]; if (cursor <= next_annotation.file_byte_offset) break; - try out.writer(gpa).print("", .{ + try out.print(gpa, "", .{ options.annotation_prefix, next_annotation.dom_id, }); next_annotate_index += 1; diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig index d886f8037c..fc45d77022 100644 --- a/lib/docs/wasm/main.zig +++ b/lib/docs/wasm/main.zig @@ -694,43 +694,49 @@ fn render_docs( var link_buffer: std.ArrayListUnmanaged(u8) = .empty; }; - const Writer = std.ArrayListUnmanaged(u8).Writer; - const Renderer = markdown.Renderer(Writer, Decl.Index); - const renderer: Renderer = .{ - .context = decl_index, + var render: markdown.Render = .{ + .context = &decl_index, .renderFn = struct { + const fmtHtml = markdown.Render.fmtHtml; fn render( - r: Renderer, + r: markdown.Render, doc: markdown.Document, node: markdown.Document.Node.Index, - writer: Writer, + writer: *std.io.BufferedWriter, ) !void { + const decl_index_ptr: *const Decl.Index = @alignCast(@ptrCast(r.context)); const data = doc.nodes.items(.data)[@intFromEnum(node)]; switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { .code_span => { try writer.writeAll(""); const content = doc.string(data.text.content); - if (resolve_decl_path(r.context, content)) |resolved_decl_index| { + if (resolve_decl_path(decl_index_ptr.*, content)) |resolved_decl_index| { g.link_buffer.clearRetainingCapacity(); - try resolveDeclLink(resolved_decl_index, &g.link_buffer); + resolveDeclLink(resolved_decl_index, &g.link_buffer) catch |err| switch (err) { + error.OutOfMemory => return error.WriteFailed, + }; try writer.writeAll("{}", .{markdown.fmtHtml(content)}); + try writer.print("\">{f}", .{fmtHtml(content)}); } else { - try writer.print("{}", .{markdown.fmtHtml(content)}); + try writer.print("{f}", .{fmtHtml(content)}); } try writer.writeAll(""); }, - else => try Renderer.renderDefault(r, doc, node, writer), + else => try markdown.Render.renderDefault(r, doc, node, writer), } } }.render, }; - try renderer.render(parsed_doc, out.writer(gpa)); + var aw: std.io.AllocatingWriter = undefined; + defer out.* = aw.toArrayList(); + render.render(parsed_doc, aw.fromArrayList(gpa, out)) catch |err| switch (err) { + error.WriteFailed => return error.OutOfMemory, + }; } fn resolve_decl_path(decl_index: Decl.Index, path: []const u8) ?Decl.Index { @@ -772,10 +778,11 @@ export fn decl_type_html(decl_index: Decl.Index) String { const Oom = error{OutOfMemory}; fn unpackInner(tar_bytes: []u8) !void { - var fbs = std.io.fixedBufferStream(tar_bytes); + var br: std.io.BufferedReader = undefined; + br.initFixed(tar_bytes); var file_name_buffer: [1024]u8 = undefined; var link_name_buffer: [1024]u8 = undefined; - var it = std.tar.iterator(fbs.reader(), .{ + var it = std.tar.Iterator.init(&br, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, }); @@ -796,7 +803,7 @@ fn unpackInner(tar_bytes: []u8) !void { { gop.value_ptr.* = file; } - const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)]; + const file_bytes = tar_bytes[br.seek..][0..@intCast(tar_file.size)]; assert(file == try Walk.add_file(file_name, file_bytes)); } } else { diff --git a/lib/docs/wasm/markdown.zig b/lib/docs/wasm/markdown.zig index b18fe05445..09b1f816c3 100644 --- a/lib/docs/wasm/markdown.zig +++ b/lib/docs/wasm/markdown.zig @@ -129,9 +129,7 @@ const testing = std.testing; pub const Document = @import("markdown/Document.zig"); pub const Parser = @import("markdown/Parser.zig"); -pub const Renderer = @import("markdown/renderer.zig").Renderer; -pub const renderNodeInlineText = @import("markdown/renderer.zig").renderNodeInlineText; -pub const fmtHtml = @import("markdown/renderer.zig").fmtHtml; +pub const Render = @import("markdown/Render.zig"); // Avoid exposing main to other files merely importing this one. pub const main = if (@import("root") == @This()) diff --git a/lib/docs/wasm/markdown/Document.zig b/lib/docs/wasm/markdown/Document.zig index 59a40135d6..2e678b1a12 100644 --- a/lib/docs/wasm/markdown/Document.zig +++ b/lib/docs/wasm/markdown/Document.zig @@ -4,7 +4,7 @@ const std = @import("std"); const builtin = @import("builtin"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; -const Renderer = @import("renderer.zig").Renderer; +const Render = @import("Render.zig"); nodes: Node.List.Slice, extra: []u32, @@ -160,8 +160,8 @@ pub fn deinit(doc: *Document, allocator: Allocator) void { } /// Renders a document directly to a writer using the default renderer. -pub fn render(doc: Document, writer: anytype) @TypeOf(writer).Error!void { - const renderer: Renderer(@TypeOf(writer), void) = .{ .context = {} }; +pub fn render(doc: Document, writer: *std.io.BufferedWriter) @TypeOf(writer).Error!void { + const renderer: Render(@TypeOf(writer), void) = .{ .context = {} }; try renderer.render(doc, writer); } diff --git a/lib/docs/wasm/markdown/Render.zig b/lib/docs/wasm/markdown/Render.zig new file mode 100644 index 0000000000..d5ff93e785 --- /dev/null +++ b/lib/docs/wasm/markdown/Render.zig @@ -0,0 +1,243 @@ +//! A Markdown document renderer. +//! +//! Each concrete `Renderer` type has a `renderDefault` function, with the +//! intention that custom `renderFn` implementations can call `renderDefault` +//! for node types for which they require no special rendering. + +const std = @import("std"); +const Document = @import("Document.zig"); +const Node = Document.Node; +const Render = @This(); + +context: ?*const anyopaque, +renderFn: *const fn ( + r: Render, + doc: Document, + node: Node.Index, + writer: *std.io.BufferedWriter, +) std.io.Writer.Error!void = renderDefault, + +pub fn render(r: Render, doc: Document, writer: *std.io.BufferedWriter) std.io.Writer.Error!void { + try r.renderFn(r, doc, .root, writer); +} + +pub fn renderDefault( + r: Render, + doc: Document, + node: Node.Index, + writer: *std.io.BufferedWriter, +) std.io.Writer.Error!void { + const data = doc.nodes.items(.data)[@intFromEnum(node)]; + switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { + .root => { + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + }, + .list => { + if (data.list.start.asNumber()) |start| { + if (start == 1) { + try writer.writeAll("
    \n"); + } else { + try writer.print("
      \n", .{start}); + } + } else { + try writer.writeAll("
        \n"); + } + for (doc.extraChildren(data.list.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + if (data.list.start.asNumber() != null) { + try writer.writeAll("
    \n"); + } else { + try writer.writeAll("\n"); + } + }, + .list_item => { + try writer.writeAll("
  1. "); + for (doc.extraChildren(data.list_item.children)) |child| { + if (data.list_item.tight and doc.nodes.items(.tag)[@intFromEnum(child)] == .paragraph) { + const para_data = doc.nodes.items(.data)[@intFromEnum(child)]; + for (doc.extraChildren(para_data.container.children)) |para_child| { + try r.renderFn(r, doc, para_child, writer); + } + } else { + try r.renderFn(r, doc, child, writer); + } + } + try writer.writeAll("
  2. \n"); + }, + .table => { + try writer.writeAll("\n"); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("
    \n"); + }, + .table_row => { + try writer.writeAll("\n"); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("\n"); + }, + .table_cell => { + if (data.table_cell.info.header) { + try writer.writeAll(" try writer.writeAll(">"), + else => |a| try writer.print(" style=\"text-align: {s}\">", .{@tagName(a)}), + } + + for (doc.extraChildren(data.table_cell.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + + if (data.table_cell.info.header) { + try writer.writeAll("\n"); + } else { + try writer.writeAll("\n"); + } + }, + .heading => { + try writer.print("", .{data.heading.level}); + for (doc.extraChildren(data.heading.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.print("\n", .{data.heading.level}); + }, + .code_block => { + const content = doc.string(data.code_block.content); + try writer.print("
    {f}
    \n", .{fmtHtml(content)}); + }, + .blockquote => { + try writer.writeAll("
    \n"); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("
    \n"); + }, + .paragraph => { + try writer.writeAll("

    "); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("

    \n"); + }, + .thematic_break => { + try writer.writeAll("
    \n"); + }, + .link => { + const target = doc.string(data.link.target); + try writer.print("", .{fmtHtml(target)}); + for (doc.extraChildren(data.link.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll(""); + }, + .autolink => { + const target = doc.string(data.text.content); + try writer.print("{0f}", .{fmtHtml(target)}); + }, + .image => { + const target = doc.string(data.link.target); + try writer.print("\"","); + }, + .strong => { + try writer.writeAll(""); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll(""); + }, + .emphasis => { + try writer.writeAll(""); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll(""); + }, + .code_span => { + const content = doc.string(data.text.content); + try writer.print("{f}", .{fmtHtml(content)}); + }, + .text => { + const content = doc.string(data.text.content); + try writer.print("{f}", .{fmtHtml(content)}); + }, + .line_break => { + try writer.writeAll("
    \n"); + }, + } +} + +/// Renders an inline node as plain text. Asserts that the node is an inline and +/// has no non-inline children. +pub fn renderInlineNodeText( + doc: Document, + node: Node.Index, + writer: *std.io.BufferedWriter, +) std.io.Writer.Error!void { + const data = doc.nodes.items(.data)[@intFromEnum(node)]; + switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { + .root, + .list, + .list_item, + .table, + .table_row, + .table_cell, + .heading, + .code_block, + .blockquote, + .paragraph, + .thematic_break, + => unreachable, // Blocks + + .link, .image => { + for (doc.extraChildren(data.link.children)) |child| { + try renderInlineNodeText(doc, child, writer); + } + }, + .strong => { + for (doc.extraChildren(data.container.children)) |child| { + try renderInlineNodeText(doc, child, writer); + } + }, + .emphasis => { + for (doc.extraChildren(data.container.children)) |child| { + try renderInlineNodeText(doc, child, writer); + } + }, + .autolink, .code_span, .text => { + const content = doc.string(data.text.content); + try writer.print("{f}", .{fmtHtml(content)}); + }, + .line_break => { + try writer.writeAll("\n"); + }, + } +} + +pub fn fmtHtml(bytes: []const u8) std.fmt.Formatter(formatHtml) { + return .{ .data = bytes }; +} + +fn formatHtml(bytes: []const u8, writer: *std.io.BufferedWriter, comptime fmt: []const u8) !void { + _ = fmt; + for (bytes) |b| { + switch (b) { + '<' => try writer.writeAll("<"), + '>' => try writer.writeAll(">"), + '&' => try writer.writeAll("&"), + '"' => try writer.writeAll("""), + else => try writer.writeByte(b), + } + } +} diff --git a/lib/docs/wasm/markdown/renderer.zig b/lib/docs/wasm/markdown/renderer.zig deleted file mode 100644 index 1e6041399a..0000000000 --- a/lib/docs/wasm/markdown/renderer.zig +++ /dev/null @@ -1,253 +0,0 @@ -const std = @import("std"); -const Document = @import("Document.zig"); -const Node = Document.Node; - -/// A Markdown document renderer. -/// -/// Each concrete `Renderer` type has a `renderDefault` function, with the -/// intention that custom `renderFn` implementations can call `renderDefault` -/// for node types for which they require no special rendering. -pub fn Renderer(comptime Writer: type, comptime Context: type) type { - return struct { - renderFn: *const fn ( - r: Self, - doc: Document, - node: Node.Index, - writer: Writer, - ) Writer.Error!void = renderDefault, - context: Context, - - const Self = @This(); - - pub fn render(r: Self, doc: Document, writer: Writer) Writer.Error!void { - try r.renderFn(r, doc, .root, writer); - } - - pub fn renderDefault( - r: Self, - doc: Document, - node: Node.Index, - writer: Writer, - ) Writer.Error!void { - const data = doc.nodes.items(.data)[@intFromEnum(node)]; - switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { - .root => { - for (doc.extraChildren(data.container.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - }, - .list => { - if (data.list.start.asNumber()) |start| { - if (start == 1) { - try writer.writeAll("
      \n"); - } else { - try writer.print("
        \n", .{start}); - } - } else { - try writer.writeAll("
          \n"); - } - for (doc.extraChildren(data.list.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - if (data.list.start.asNumber() != null) { - try writer.writeAll("
      \n"); - } else { - try writer.writeAll("\n"); - } - }, - .list_item => { - try writer.writeAll("
    1. "); - for (doc.extraChildren(data.list_item.children)) |child| { - if (data.list_item.tight and doc.nodes.items(.tag)[@intFromEnum(child)] == .paragraph) { - const para_data = doc.nodes.items(.data)[@intFromEnum(child)]; - for (doc.extraChildren(para_data.container.children)) |para_child| { - try r.renderFn(r, doc, para_child, writer); - } - } else { - try r.renderFn(r, doc, child, writer); - } - } - try writer.writeAll("
    2. \n"); - }, - .table => { - try writer.writeAll("\n"); - for (doc.extraChildren(data.container.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.writeAll("
      \n"); - }, - .table_row => { - try writer.writeAll("\n"); - for (doc.extraChildren(data.container.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.writeAll("\n"); - }, - .table_cell => { - if (data.table_cell.info.header) { - try writer.writeAll(" try writer.writeAll(">"), - else => |a| try writer.print(" style=\"text-align: {s}\">", .{@tagName(a)}), - } - - for (doc.extraChildren(data.table_cell.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - - if (data.table_cell.info.header) { - try writer.writeAll("\n"); - } else { - try writer.writeAll("\n"); - } - }, - .heading => { - try writer.print("", .{data.heading.level}); - for (doc.extraChildren(data.heading.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.print("\n", .{data.heading.level}); - }, - .code_block => { - const content = doc.string(data.code_block.content); - try writer.print("
      {}
      \n", .{fmtHtml(content)}); - }, - .blockquote => { - try writer.writeAll("
      \n"); - for (doc.extraChildren(data.container.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.writeAll("
      \n"); - }, - .paragraph => { - try writer.writeAll("

      "); - for (doc.extraChildren(data.container.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.writeAll("

      \n"); - }, - .thematic_break => { - try writer.writeAll("
      \n"); - }, - .link => { - const target = doc.string(data.link.target); - try writer.print("", .{fmtHtml(target)}); - for (doc.extraChildren(data.link.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.writeAll(""); - }, - .autolink => { - const target = doc.string(data.text.content); - try writer.print("{0}", .{fmtHtml(target)}); - }, - .image => { - const target = doc.string(data.link.target); - try writer.print("\"","); - }, - .strong => { - try writer.writeAll(""); - for (doc.extraChildren(data.container.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.writeAll(""); - }, - .emphasis => { - try writer.writeAll(""); - for (doc.extraChildren(data.container.children)) |child| { - try r.renderFn(r, doc, child, writer); - } - try writer.writeAll(""); - }, - .code_span => { - const content = doc.string(data.text.content); - try writer.print("{}", .{fmtHtml(content)}); - }, - .text => { - const content = doc.string(data.text.content); - try writer.print("{}", .{fmtHtml(content)}); - }, - .line_break => { - try writer.writeAll("
      \n"); - }, - } - } - }; -} - -/// Renders an inline node as plain text. Asserts that the node is an inline and -/// has no non-inline children. -pub fn renderInlineNodeText( - doc: Document, - node: Node.Index, - writer: anytype, -) @TypeOf(writer).Error!void { - const data = doc.nodes.items(.data)[@intFromEnum(node)]; - switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { - .root, - .list, - .list_item, - .table, - .table_row, - .table_cell, - .heading, - .code_block, - .blockquote, - .paragraph, - .thematic_break, - => unreachable, // Blocks - - .link, .image => { - for (doc.extraChildren(data.link.children)) |child| { - try renderInlineNodeText(doc, child, writer); - } - }, - .strong => { - for (doc.extraChildren(data.container.children)) |child| { - try renderInlineNodeText(doc, child, writer); - } - }, - .emphasis => { - for (doc.extraChildren(data.container.children)) |child| { - try renderInlineNodeText(doc, child, writer); - } - }, - .autolink, .code_span, .text => { - const content = doc.string(data.text.content); - try writer.print("{}", .{fmtHtml(content)}); - }, - .line_break => { - try writer.writeAll("\n"); - }, - } -} - -pub fn fmtHtml(bytes: []const u8) std.fmt.Formatter(formatHtml) { - return .{ .data = bytes }; -} - -fn formatHtml( - bytes: []const u8, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, -) !void { - _ = fmt; - _ = options; - for (bytes) |b| { - switch (b) { - '<' => try writer.writeAll("<"), - '>' => try writer.writeAll(">"), - '&' => try writer.writeAll("&"), - '"' => try writer.writeAll("""), - else => try writer.writeByte(b), - } - } -} diff --git a/lib/std/io/AllocatingWriter.zig b/lib/std/io/AllocatingWriter.zig index 7d9cddb376..e22c95fcad 100644 --- a/lib/std/io/AllocatingWriter.zig +++ b/lib/std/io/AllocatingWriter.zig @@ -166,6 +166,7 @@ fn writeFile( headers_and_trailers_full: []const []const u8, headers_len_full: usize, ) std.io.Writer.FileError!usize { + if (std.fs.File.Handle == void) unreachable; const aw: *AllocatingWriter = @alignCast(@ptrCast(context)); const gpa = aw.allocator; var list = aw.toArrayList(); diff --git a/lib/std/io/BufferedReader.zig b/lib/std/io/BufferedReader.zig index 7e9f99e915..8f9c13c193 100644 --- a/lib/std/io/BufferedReader.zig +++ b/lib/std/io/BufferedReader.zig @@ -316,6 +316,15 @@ pub fn discardAll(br: *BufferedReader, n: usize) Reader.Error!void { if ((try br.discardShort(n)) != n) return error.EndOfStream; } +pub fn discardAll64(br: *BufferedReader, n: u64) Reader.Error!void { + var remaining: u64 = n; + while (remaining > 0) { + const limited = std.math.cast(usize, remaining) orelse std.math.maxInt(usize); + try discardAll(br, limited); + remaining -= limited; + } +} + /// Skips the next `n` bytes from the stream, advancing the seek position. /// /// Unlike `toss` which is infallible, in this function `n` can be any amount. @@ -896,28 +905,35 @@ pub fn takeLeb128(br: *BufferedReader, comptime Result: type) TakeLeb128Error!Re } }))) orelse error.Overflow; } +pub fn expandTotalCapacity(br: *BufferedReader, allocator: Allocator, n: usize) Allocator.Error!void { + if (n <= br.buffer.len) return; + if (br.seek > 0) rebase(br); + var list: ArrayList(u8) = .{ + .items = br.buffer[0..br.end], + .capacity = br.buffer.len, + }; + defer br.buffer = list.allocatedSlice(); + try list.ensureTotalCapacity(allocator, n); +} + +pub const FillAllocError = Reader.Error || Allocator.Error; + +pub fn fillAlloc(br: *BufferedReader, allocator: Allocator, n: usize) FillAllocError!void { + try expandTotalCapacity(br, allocator, n); + return fill(br, n); +} + /// Returns a slice into the unused capacity of `buffer` with at least /// `min_len` bytes, extending `buffer` by resizing it with `gpa` as necessary. /// /// After calling this function, typically the caller will follow up with a /// call to `advanceBufferEnd` to report the actual number of bytes buffered. -pub fn writableSliceGreedyAlloc( - br: *BufferedReader, - allocator: Allocator, - min_len: usize, -) error{OutOfMemory}![]u8 { +pub fn writableSliceGreedyAlloc(br: *BufferedReader, allocator: Allocator, min_len: usize) Allocator.Error![]u8 { { const unused = br.buffer[br.end..]; if (unused.len >= min_len) return unused; } - const seek = br.seek; - if (seek > 0) { - const buffer = br.buffer[0..br.end]; - const remainder = buffer[seek..]; - std.mem.copyForwards(u8, buffer[0..remainder.len], remainder); - br.end = remainder.len; - br.seek = 0; - } + if (br.seek > 0) rebase(br); { var list: ArrayList(u8) = .{ .items = br.buffer[0..br.end], diff --git a/lib/std/tar.zig b/lib/std/tar.zig index afa9463acb..773e5b8b8a 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -293,28 +293,6 @@ fn nullStr(str: []const u8) []const u8 { return str; } -/// Options for iterator. -/// Buffers should be provided by the caller. -pub const IteratorOptions = struct { - /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. - file_name_buffer: []u8, - /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. - link_name_buffer: []u8, - /// Collects error messages during unpacking - diagnostics: ?*Diagnostics = null, -}; - -/// Iterates over files in tar archive. -/// `next` returns each file in tar archive. -pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { - return .{ - .reader = reader, - .diagnostics = options.diagnostics, - .file_name_buffer = options.file_name_buffer, - .link_name_buffer = options.link_name_buffer, - }; -} - /// Type of the file returned by iterator `next` method. pub const FileKind = enum { directory, @@ -323,206 +301,230 @@ pub const FileKind = enum { }; /// Iterator over entries in the tar file represented by reader. -pub fn Iterator(comptime ReaderType: type) type { - return struct { - reader: ReaderType, - diagnostics: ?*Diagnostics = null, +pub const Iterator = struct { + reader: *std.io.BufferedReader, + diagnostics: ?*Diagnostics = null, - // buffers for heeader and file attributes - header_buffer: [Header.SIZE]u8 = undefined, + // buffers for heeader and file attributes + header_buffer: [Header.SIZE]u8 = undefined, + file_name_buffer: []u8, + link_name_buffer: []u8, + + // bytes of padding to the end of the block + padding: usize = 0, + // not consumed bytes of file from last next iteration + unread_file_bytes: u64 = 0, + + /// Options for iterator. + /// Buffers should be provided by the caller. + pub const Options = struct { + /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. file_name_buffer: []u8, + /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. link_name_buffer: []u8, + /// Collects error messages during unpacking + diagnostics: ?*Diagnostics = null, + }; - // bytes of padding to the end of the block - padding: usize = 0, - // not consumed bytes of file from last next iteration - unread_file_bytes: u64 = 0, - - pub const File = struct { - name: []const u8, // name of file, symlink or directory - link_name: []const u8, // target name of symlink - size: u64 = 0, // size of the file in bytes - mode: u32 = 0, - kind: FileKind = .file, - - unread_bytes: *u64, - parent_reader: ReaderType, - - pub const Reader = std.io.Reader(File, ReaderType.Error, File.read); - - pub fn reader(self: File) Reader { - return .{ .context = self }; - } - - pub fn read(self: File, dest: []u8) ReaderType.Error!usize { - const buf = dest[0..@min(dest.len, self.unread_bytes.*)]; - const n = try self.parent_reader.read(buf); - self.unread_bytes.* -= n; - return n; - } - - // Writes file content to writer. - pub fn writeAll(self: File, out_writer: anytype) !void { - var buffer: [4096]u8 = undefined; - - while (self.unread_bytes.* > 0) { - const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)]; - try self.parent_reader.readNoEof(buf); - try out_writer.writeAll(buf); - self.unread_bytes.* -= buf.len; - } - } + /// Iterates over files in tar archive. + /// `next` returns each file in tar archive. + pub fn init(reader: *std.io.BufferedReader, options: Options) Iterator { + return .{ + .reader = reader, + .diagnostics = options.diagnostics, + .file_name_buffer = options.file_name_buffer, + .link_name_buffer = options.link_name_buffer, }; + } - const Self = @This(); + pub const File = struct { + name: []const u8, // name of file, symlink or directory + link_name: []const u8, // target name of symlink + size: u64 = 0, // size of the file in bytes + mode: u32 = 0, + kind: FileKind = .file, - fn readHeader(self: *Self) !?Header { - if (self.padding > 0) { - try self.reader.skipBytes(self.padding, .{}); - } - const n = try self.reader.readAll(&self.header_buffer); - if (n == 0) return null; - if (n < Header.SIZE) return error.UnexpectedEndOfStream; - const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; - if (try header.checkChksum() == 0) return null; - return header; - } + unread_bytes: *u64, + parent_reader: *std.io.BufferedReader, - fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { - if (size > buffer.len) return error.TarInsufficientBuffer; - const buf = buffer[0..size]; - try self.reader.readNoEof(buf); - return nullStr(buf); - } - - fn newFile(self: *Self) File { + pub fn reader(self: *File) std.io.Reader { return .{ - .name = self.file_name_buffer[0..0], - .link_name = self.link_name_buffer[0..0], - .parent_reader = self.reader, - .unread_bytes = &self.unread_file_bytes, + .context = self, + .vtable = &.{ + .read = read, + .readVec = readVec, + .discard = discard, + }, }; } - // Number of padding bytes in the last file block. - fn blockPadding(size: u64) usize { - const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary - return @intCast(block_rounded - size); + fn read(context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: std.io.Reader.Limit) std.io.Reader.RwError!usize { + const file: *File = @ptrCast(@alignCast(context)); + if (file.unread_bytes.* == 0) return error.EndOfStream; + const n = try file.parent_reader.read(bw, limit.min(.limited(file.unread_bytes.*))); + file.unread_bytes.* -= n; + return n; } - /// Iterates through the tar archive as if it is a series of files. - /// Internally, the tar format often uses entries (header with optional - /// content) to add meta data that describes the next file. These - /// entries should not normally be visible to the outside. As such, this - /// loop iterates through one or more entries until it collects a all - /// file attributes. - pub fn next(self: *Self) !?File { - if (self.unread_file_bytes > 0) { - // If file content was not consumed by caller - try self.reader.skipBytes(self.unread_file_bytes, .{}); - self.unread_file_bytes = 0; - } - var file: File = self.newFile(); - - while (try self.readHeader()) |header| { - const kind = header.kind(); - const size: u64 = try header.size(); - self.padding = blockPadding(size); - - switch (kind) { - // File types to return upstream - .directory, .normal, .symbolic_link => { - file.kind = switch (kind) { - .directory => .directory, - .normal => .file, - .symbolic_link => .sym_link, - else => unreachable, - }; - file.mode = try header.mode(); - - // set file attributes if not already set by prefix/extended headers - if (file.size == 0) { - file.size = size; - } - if (file.link_name.len == 0) { - file.link_name = try header.linkName(self.link_name_buffer); - } - if (file.name.len == 0) { - file.name = try header.fullName(self.file_name_buffer); - } - - self.padding = blockPadding(file.size); - self.unread_file_bytes = file.size; - return file; - }, - // Prefix header types - .gnu_long_name => { - file.name = try self.readString(@intCast(size), self.file_name_buffer); - }, - .gnu_long_link => { - file.link_name = try self.readString(@intCast(size), self.link_name_buffer); - }, - .extended_header => { - // Use just attributes from last extended header. - file = self.newFile(); - - var rdr = paxIterator(self.reader, @intCast(size)); - while (try rdr.next()) |attr| { - switch (attr.kind) { - .path => { - file.name = try attr.value(self.file_name_buffer); - }, - .linkpath => { - file.link_name = try attr.value(self.link_name_buffer); - }, - .size => { - var buf: [pax_max_size_attr_len]u8 = undefined; - file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); - }, - } - } - }, - // Ignored header type - .global_extended_header => { - self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; - }, - // All other are unsupported header types - else => { - const d = self.diagnostics orelse return error.TarUnsupportedHeader; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = kind, - } }); - if (kind == .gnu_sparse) { - try self.skipGnuSparseExtendedHeaders(header); - } - self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; - }, - } - } - return null; + fn readVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize { + const file: *File = @ptrCast(@alignCast(context)); + if (file.unread_bytes.* == 0) return error.EndOfStream; + const n = try file.parent_reader.readVecLimit(data, .limited(file.unread_bytes.*)); + file.unread_bytes.* -= n; + return n; } - fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void { - var is_extended = header.bytes[482] > 0; - while (is_extended) { - var buf: [Header.SIZE]u8 = undefined; - const n = try self.reader.readAll(&buf); - if (n < Header.SIZE) return error.UnexpectedEndOfStream; - is_extended = buf[504] > 0; - } + fn discard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize { + const file: *File = @ptrCast(@alignCast(context)); + const n = limit.minInt(file.unread_bytes.*); + file.unread_bytes.* -= n; + return n; + } + + pub fn readRemaining(file: *File, out: *std.io.BufferedWriter) std.io.Reader.RwRemainingError!void { + return file.reader().readRemaining(out); } }; -} -/// Pax attributes iterator. -/// Size is length of pax extended header in reader. -fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) { - return PaxIterator(@TypeOf(reader)){ - .reader = reader, - .size = size, - }; -} + fn readHeader(self: *Iterator) !?Header { + if (self.padding > 0) { + try self.reader.discardAll(self.padding); + } + const n = try self.reader.readSliceShort(&self.header_buffer); + if (n == 0) return null; + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; + if (try header.checkChksum() == 0) return null; + return header; + } + + fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 { + if (size > buffer.len) return error.TarInsufficientBuffer; + const buf = buffer[0..size]; + try self.reader.readSlice(buf); + return nullStr(buf); + } + + fn newFile(self: *Iterator) File { + return .{ + .name = self.file_name_buffer[0..0], + .link_name = self.link_name_buffer[0..0], + .parent_reader = self.reader, + .unread_bytes = &self.unread_file_bytes, + }; + } + + // Number of padding bytes in the last file block. + fn blockPadding(size: u64) usize { + const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary + return @intCast(block_rounded - size); + } + + /// Iterates through the tar archive as if it is a series of files. + /// Internally, the tar format often uses entries (header with optional + /// content) to add meta data that describes the next file. These + /// entries should not normally be visible to the outside. As such, this + /// loop iterates through one or more entries until it collects a all + /// file attributes. + pub fn next(self: *Iterator) !?File { + if (self.unread_file_bytes > 0) { + // If file content was not consumed by caller + try self.reader.discardAll64(self.unread_file_bytes); + self.unread_file_bytes = 0; + } + var file: File = self.newFile(); + + while (try self.readHeader()) |header| { + const kind = header.kind(); + const size: u64 = try header.size(); + self.padding = blockPadding(size); + + switch (kind) { + // File types to return upstream + .directory, .normal, .symbolic_link => { + file.kind = switch (kind) { + .directory => .directory, + .normal => .file, + .symbolic_link => .sym_link, + else => unreachable, + }; + file.mode = try header.mode(); + + // set file attributes if not already set by prefix/extended headers + if (file.size == 0) { + file.size = size; + } + if (file.link_name.len == 0) { + file.link_name = try header.linkName(self.link_name_buffer); + } + if (file.name.len == 0) { + file.name = try header.fullName(self.file_name_buffer); + } + + self.padding = blockPadding(file.size); + self.unread_file_bytes = file.size; + return file; + }, + // Prefix header types + .gnu_long_name => { + file.name = try self.readString(@intCast(size), self.file_name_buffer); + }, + .gnu_long_link => { + file.link_name = try self.readString(@intCast(size), self.link_name_buffer); + }, + .extended_header => { + // Use just attributes from last extended header. + file = self.newFile(); + + var rdr: PaxIterator = .{ + .reader = self.reader, + .size = @intCast(size), + }; + while (try rdr.next()) |attr| { + switch (attr.kind) { + .path => { + file.name = try attr.value(self.file_name_buffer); + }, + .linkpath => { + file.link_name = try attr.value(self.link_name_buffer); + }, + .size => { + var buf: [pax_max_size_attr_len]u8 = undefined; + file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); + }, + } + } + }, + // Ignored header type + .global_extended_header => { + self.reader.discardAll64(size) catch return error.TarHeadersTooBig; + }, + // All other are unsupported header types + else => { + const d = self.diagnostics orelse return error.TarUnsupportedHeader; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, header.name()), + .file_type = kind, + } }); + if (kind == .gnu_sparse) { + try self.skipGnuSparseExtendedHeaders(header); + } + self.reader.discardAll64(size) catch return error.TarHeadersTooBig; + }, + } + } + return null; + } + + fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void { + var is_extended = header.bytes[482] > 0; + while (is_extended) { + var buf: [Header.SIZE]u8 = undefined; + try self.reader.readSlice(&buf); + is_extended = buf[504] > 0; + } + } +}; const PaxAttributeKind = enum { path, @@ -533,103 +535,92 @@ const PaxAttributeKind = enum { // maxInt(u64) has 20 chars, base 10 in practice we got 24 chars const pax_max_size_attr_len = 64; -fn PaxIterator(comptime ReaderType: type) type { - return struct { - size: usize, // cumulative size of all pax attributes - reader: ReaderType, - // scratch buffer used for reading attribute length and keyword - scratch: [128]u8 = undefined, +pub const PaxIterator = struct { + size: usize, // cumulative size of all pax attributes + reader: *std.io.BufferedReader, - const Self = @This(); + const Self = @This(); - const Attribute = struct { - kind: PaxAttributeKind, - len: usize, // length of the attribute value - reader: ReaderType, // reader positioned at value start + const Attribute = struct { + kind: PaxAttributeKind, + len: usize, // length of the attribute value + reader: *std.io.BufferedReader, // reader positioned at value start - // Copies pax attribute value into destination buffer. - // Must be called with destination buffer of size at least Attribute.len. - pub fn value(self: Attribute, dst: []u8) ![]const u8 { - if (self.len > dst.len) return error.TarInsufficientBuffer; - // assert(self.len <= dst.len); - const buf = dst[0..self.len]; - const n = try self.reader.readAll(buf); - if (n < self.len) return error.UnexpectedEndOfStream; - try validateAttributeEnding(self.reader); - if (hasNull(buf)) return error.PaxNullInValue; - return buf; - } - }; - - // Iterates over pax attributes. Returns known only known attributes. - // Caller has to call value in Attribute, to advance reader across value. - pub fn next(self: *Self) !?Attribute { - // Pax extended header consists of one or more attributes, each constructed as follows: - // "%d %s=%s\n", , , - while (self.size > 0) { - const length_buf = try self.readUntil(' '); - const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes - - const keyword = try self.readUntil('='); - if (hasNull(keyword)) return error.PaxNullInKeyword; - - // calculate value_len - const value_start = length_buf.len + keyword.len + 2; // 2 separators - if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; - const value_len = length - value_start - 1; // \n separator at end - self.size -= length; - - const kind: PaxAttributeKind = if (eql(keyword, "path")) - .path - else if (eql(keyword, "linkpath")) - .linkpath - else if (eql(keyword, "size")) - .size - else { - try self.reader.skipBytes(value_len, .{}); - try validateAttributeEnding(self.reader); - continue; - }; - if (kind == .size and value_len > pax_max_size_attr_len) { - return error.PaxSizeAttrOverflow; - } - return Attribute{ - .kind = kind, - .len = value_len, - .reader = self.reader, - }; - } - - return null; - } - - fn readUntil(self: *Self, delimiter: u8) ![]const u8 { - var fbs: std.io.BufferedWriter = undefined; - fbs.initFixed(&self.scratch); - try self.reader.streamUntilDelimiter(&fbs, delimiter, null); - return fbs.getWritten(); - } - - fn eql(a: []const u8, b: []const u8) bool { - return std.mem.eql(u8, a, b); - } - - fn hasNull(str: []const u8) bool { - return (std.mem.indexOfScalar(u8, str, 0)) != null; - } - - // Checks that each record ends with new line. - fn validateAttributeEnding(reader: ReaderType) !void { - if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; + // Copies pax attribute value into destination buffer. + // Must be called with destination buffer of size at least Attribute.len. + pub fn value(self: Attribute, dst: []u8) ![]const u8 { + if (self.len > dst.len) return error.TarInsufficientBuffer; + // assert(self.len <= dst.len); + const buf = dst[0..self.len]; + const n = try self.reader.readSliceShort(buf); + if (n < self.len) return error.UnexpectedEndOfStream; + try validateAttributeEnding(self.reader); + if (hasNull(buf)) return error.PaxNullInValue; + return buf; } }; -} + + // Iterates over pax attributes. Returns known only known attributes. + // Caller has to call value in Attribute, to advance reader across value. + pub fn next(self: *Self) !?Attribute { + // Pax extended header consists of one or more attributes, each constructed as follows: + // "%d %s=%s\n", , , + while (self.size > 0) { + const length_buf = try self.reader.takeSentinel(' '); + const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes + + const keyword = try self.reader.takeSentinel('='); + if (hasNull(keyword)) return error.PaxNullInKeyword; + + // calculate value_len + const value_start = length_buf.len + keyword.len + 2; // 2 separators + if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; + const value_len = length - value_start - 1; // \n separator at end + self.size -= length; + + const kind: PaxAttributeKind = if (eql(keyword, "path")) + .path + else if (eql(keyword, "linkpath")) + .linkpath + else if (eql(keyword, "size")) + .size + else { + try self.reader.discardAll(value_len); + try validateAttributeEnding(self.reader); + continue; + }; + if (kind == .size and value_len > pax_max_size_attr_len) { + return error.PaxSizeAttrOverflow; + } + return .{ + .kind = kind, + .len = value_len, + .reader = self.reader, + }; + } + + return null; + } + + fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); + } + + fn hasNull(str: []const u8) bool { + return (std.mem.indexOfScalar(u8, str, 0)) != null; + } + + // Checks that each record ends with new line. + fn validateAttributeEnding(reader: *std.io.BufferedReader) !void { + if (try reader.takeByte() != '\n') return error.PaxInvalidAttributeEnd; + } +}; /// Saves tar file content to the file systems. -pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void { +pub fn pipeToFileSystem(dir: std.fs.Dir, reader: *std.io.BufferedReader, options: PipeOptions) !void { var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - var iter = iterator(reader, .{ + var iter: Iterator = .init(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, .diagnostics = options.diagnostics, @@ -827,8 +818,9 @@ test PaxIterator { var buffer: [1024]u8 = undefined; outer: for (cases) |case| { - var stream = std.io.fixedBufferStream(case.data); - var iter = paxIterator(stream.reader(), case.data.len); + var br: std.io.BufferedReader = undefined; + br.initFixed(case.data); + var iter: PaxIterator = .init(&br, case.data.len); var i: usize = 0; while (iter.next() catch |err| { @@ -942,7 +934,7 @@ test "create file and symlink" { file.close(); } -test iterator { +test Iterator { // Example tar file is created from this tree structure: // $ tree example // example @@ -969,7 +961,7 @@ test iterator { var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; // Create iterator - var iter = iterator(fbs.reader(), .{ + var iter: Iterator = .init(fbs.reader(), .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, });