From c7485d73ac3e6dd105c6ee8fcf774493cc9eb31e Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 16 Dec 2024 00:49:59 +0000 Subject: [PATCH] compiler: introduce ZonGen and make `ast-check` run it for ZON inputs Currently, `zig ast-check` fails on ZON files, because it tries to interpret the file as Zig source code. This commit introduces a new verification pass, `std.zig.ZonGen`, which applies to an AST in ZON mode. Like `AstGen`, this pass also converts the AST into a more helpful format. Rather than a sequence of instructions like `Zir`, the output format of `ZonGen` is a new datastructure called `Zoir`. This type is essentially a simpler form of AST, containing only the information required for consumers of ZON. It is also far more compact than `std.zig.Ast`, with the size generally being comparable to the size of the well-formatted source file. The emitted `Zoir` is currently not used aside from the `-t` option to `ast-check` which causes it to be dumped to stdout. However, in future, it can be used for comptime `@import` of ZON files, as well as for simpler handling of files like `build.zig.zon`, and even by other parts of the Zig Standard Library. Resolves: #22078 --- lib/std/zig.zig | 2 + lib/std/zig/AstGen.zig | 112 +---- lib/std/zig/ErrorBundle.zig | 73 ++- lib/std/zig/Zoir.zig | 239 ++++++++++ lib/std/zig/ZonGen.zig | 835 +++++++++++++++++++++++++++++++++ lib/std/zig/string_literal.zig | 35 ++ src/fmt.zig | 104 ++-- src/main.zig | 198 +++++--- src/print_zoir.zig | 122 +++++ 9 files changed, 1530 insertions(+), 190 deletions(-) create mode 100644 lib/std/zig/Zoir.zig create mode 100644 lib/std/zig/ZonGen.zig create mode 100644 src/print_zoir.zig diff --git a/lib/std/zig.zig b/lib/std/zig.zig index 0ef03c24a4..4002eb75cb 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -14,6 +14,8 @@ pub const isPrimitive = primitives.isPrimitive; pub const Ast = @import("zig/Ast.zig"); pub const AstGen = @import("zig/AstGen.zig"); pub const Zir = @import("zig/Zir.zig"); +pub const Zoir = @import("zig/Zoir.zig"); +pub const ZonGen = @import("zig/ZonGen.zig"); pub const system = @import("zig/system.zig"); pub const CrossTarget = @compileError("deprecated; use std.Target.Query"); pub const BuiltinFn = @import("zig/BuiltinFn.zig"); diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index 8d98c2a050..0751de120a 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -130,6 +130,8 @@ fn appendRefsAssumeCapacity(astgen: *AstGen, refs: []const Zir.Inst.Ref) void { } pub fn generate(gpa: Allocator, tree: Ast) Allocator.Error!Zir { + assert(tree.mode == .zig); + var arena = std.heap.ArenaAllocator.init(gpa); defer arena.deinit(); @@ -11413,83 +11415,7 @@ fn parseStrLit( fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token: Ast.TokenIndex, bytes: []const u8, offset: u32) InnerError { const raw_string = bytes[offset..]; - switch (err) { - .invalid_escape_character => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "invalid escape character: '{c}'", - .{raw_string[bad_index]}, - ); - }, - .expected_hex_digit => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "expected hex digit, found '{c}'", - .{raw_string[bad_index]}, - ); - }, - .empty_unicode_escape_sequence => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "empty unicode escape sequence", - .{}, - ); - }, - .expected_hex_digit_or_rbrace => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "expected hex digit or '}}', found '{c}'", - .{raw_string[bad_index]}, - ); - }, - .invalid_unicode_codepoint => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "unicode escape does not correspond to a valid unicode scalar value", - .{}, - ); - }, - .expected_lbrace => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "expected '{{', found '{c}", - .{raw_string[bad_index]}, - ); - }, - .expected_rbrace => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "expected '}}', found '{c}", - .{raw_string[bad_index]}, - ); - }, - .expected_single_quote => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "expected single quote ('), found '{c}", - .{raw_string[bad_index]}, - ); - }, - .invalid_character => |bad_index| { - return astgen.failOff( - token, - offset + @as(u32, @intCast(bad_index)), - "invalid byte in string or character literal: '{c}'", - .{raw_string[bad_index]}, - ); - }, - .empty_char_literal => { - return astgen.failOff(token, offset, "empty character literal", .{}); - }, - } + return err.lower(raw_string, offset, AstGen.failOff, .{ astgen, token }); } fn failNode( @@ -14019,30 +13945,40 @@ fn emitDbgStmtForceCurrentIndex(gz: *GenZir, lc: LineColumn) !void { } fn lowerAstErrors(astgen: *AstGen) !void { + const gpa = astgen.gpa; const tree = astgen.tree; assert(tree.errors.len > 0); - const gpa = astgen.gpa; - const parse_err = tree.errors[0]; - var msg: std.ArrayListUnmanaged(u8) = .empty; defer msg.deinit(gpa); var notes: std.ArrayListUnmanaged(u32) = .empty; defer notes.deinit(gpa); - for (tree.errors[1..]) |note| { - if (!note.is_note) break; + var cur_err = tree.errors[0]; + for (tree.errors[1..]) |err| { + if (err.is_note) { + try tree.renderError(err, msg.writer(gpa)); + try notes.append(gpa, try astgen.errNoteTok(err.token, "{s}", .{msg.items})); + } else { + // Flush error + const extra_offset = tree.errorOffset(cur_err); + try tree.renderError(cur_err, msg.writer(gpa)); + try astgen.appendErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items); + notes.clearRetainingCapacity(); + cur_err = err; + // TODO: `Parse` currently does not have good error recovery mechanisms, so the remaining errors could be bogus. + // As such, we'll ignore all remaining errors for now. We should improve `Parse` so that we can report all the errors. + return; + } msg.clearRetainingCapacity(); - try tree.renderError(note, msg.writer(gpa)); - try notes.append(gpa, try astgen.errNoteTok(note.token, "{s}", .{msg.items})); } - const extra_offset = tree.errorOffset(parse_err); - msg.clearRetainingCapacity(); - try tree.renderError(parse_err, msg.writer(gpa)); - try astgen.appendErrorTokNotesOff(parse_err.token, extra_offset, "{s}", .{msg.items}, notes.items); + // Flush error + const extra_offset = tree.errorOffset(cur_err); + try tree.renderError(cur_err, msg.writer(gpa)); + try astgen.appendErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items); } const DeclarationName = union(enum) { diff --git a/lib/std/zig/ErrorBundle.zig b/lib/std/zig/ErrorBundle.zig index 3f3c7ff9e3..4612d0762d 100644 --- a/lib/std/zig/ErrorBundle.zig +++ b/lib/std/zig/ErrorBundle.zig @@ -507,7 +507,7 @@ pub const Wip = struct { } if (item.data.notes != 0) { - const notes_start = try eb.reserveNotes(item.data.notes); + const notes_start = try eb.reserveNotes(item.data.notesLen(zir)); const block = zir.extraData(Zir.Inst.Block, item.data.notes); const body = zir.extra[block.end..][0..block.data.body_len]; for (notes_start.., body) |note_i, body_elem| { @@ -547,6 +547,77 @@ pub const Wip = struct { } } + pub fn addZoirErrorMessages( + eb: *ErrorBundle.Wip, + zoir: std.zig.Zoir, + tree: std.zig.Ast, + source: [:0]const u8, + src_path: []const u8, + ) !void { + assert(zoir.hasCompileErrors()); + + for (zoir.compile_errors) |err| { + const err_span: std.zig.Ast.Span = span: { + if (err.token == std.zig.Zoir.CompileError.invalid_token) { + break :span tree.nodeToSpan(err.node_or_offset); + } + const token_start = tree.tokens.items(.start)[err.token]; + const start = token_start + err.node_or_offset; + const end = token_start + @as(u32, @intCast(tree.tokenSlice(err.token).len)); + break :span .{ .start = start, .end = end, .main = start }; + }; + const err_loc = std.zig.findLineColumn(source, err_span.main); + + try eb.addRootErrorMessage(.{ + .msg = try eb.addString(err.msg.get(zoir)), + .src_loc = try eb.addSourceLocation(.{ + .src_path = try eb.addString(src_path), + .span_start = err_span.start, + .span_main = err_span.main, + .span_end = err_span.end, + .line = @intCast(err_loc.line), + .column = @intCast(err_loc.column), + .source_line = try eb.addString(err_loc.source_line), + }), + .notes_len = err.note_count, + }); + + const notes_start = try eb.reserveNotes(err.note_count); + for (notes_start.., err.first_note.., 0..err.note_count) |eb_note_idx, zoir_note_idx, _| { + const note = zoir.error_notes[zoir_note_idx]; + const note_span: std.zig.Ast.Span = span: { + if (note.token == std.zig.Zoir.CompileError.invalid_token) { + break :span tree.nodeToSpan(note.node_or_offset); + } + const token_start = tree.tokens.items(.start)[note.token]; + const start = token_start + note.node_or_offset; + const end = token_start + @as(u32, @intCast(tree.tokenSlice(note.token).len)); + break :span .{ .start = start, .end = end, .main = start }; + }; + const note_loc = std.zig.findLineColumn(source, note_span.main); + + // This line can cause `wip.extra.items` to be resized. + const note_index = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.addString(note.msg.get(zoir)), + .src_loc = try eb.addSourceLocation(.{ + .src_path = try eb.addString(src_path), + .span_start = note_span.start, + .span_main = note_span.main, + .span_end = note_span.end, + .line = @intCast(note_loc.line), + .column = @intCast(note_loc.column), + .source_line = if (note_loc.eql(err_loc)) + 0 + else + try eb.addString(note_loc.source_line), + }), + .notes_len = 0, + })); + eb.extra.items[eb_note_idx] = note_index; + } + } + } + fn addOtherMessage(wip: *Wip, other: ErrorBundle, msg_index: MessageIndex) !MessageIndex { const other_msg = other.getErrorMessage(msg_index); const src_loc = try wip.addOtherSourceLocation(other, other_msg.src_loc); diff --git a/lib/std/zig/Zoir.zig b/lib/std/zig/Zoir.zig new file mode 100644 index 0000000000..9a9bc41976 --- /dev/null +++ b/lib/std/zig/Zoir.zig @@ -0,0 +1,239 @@ +//! Zig Object Intermediate Representation. +//! Simplified AST for the ZON (Zig Object Notation) format. +//! `ZonGen` converts `Ast` to `Zoir`. + +nodes: std.MultiArrayList(Node.Repr).Slice, +extra: []u32, +limbs: []std.math.big.Limb, +string_bytes: []u8, + +compile_errors: []Zoir.CompileError, +error_notes: []Zoir.CompileError.Note, + +pub fn hasCompileErrors(zoir: Zoir) bool { + if (zoir.compile_errors.len > 0) { + assert(zoir.nodes.len == 0); + assert(zoir.extra.len == 0); + assert(zoir.limbs.len == 0); + return true; + } else { + assert(zoir.error_notes.len == 0); + return false; + } +} + +pub fn deinit(zoir: Zoir, gpa: Allocator) void { + var nodes = zoir.nodes; + nodes.deinit(gpa); + + gpa.free(zoir.extra); + gpa.free(zoir.limbs); + gpa.free(zoir.string_bytes); + gpa.free(zoir.compile_errors); + gpa.free(zoir.error_notes); +} + +pub const Node = union(enum) { + /// A literal `true` value. + true, + /// A literal `false` value. + false, + /// A literal `null` value. + null, + /// A literal `inf` value. + pos_inf, + /// A literal `-inf` value. + neg_inf, + /// A literal `nan` value. + nan, + /// An integer literal. + int_literal: union(enum) { + small: i32, + big: std.math.big.int.Const, + }, + /// A floating-point literal. + float_literal: f128, + /// A Unicode codepoint literal. + char_literal: u32, + /// An enum literal. The string is the literal, i.e. `foo` for `.foo`. + enum_literal: NullTerminatedString, + /// A string literal. + string_literal: []const u8, + /// An empty struct/array literal, i.e. `.{}`. + empty_literal, + /// An array literal. The `Range` gives the elements of the array literal. + array_literal: Node.Index.Range, + /// A struct literal. `names.len` is always equal to `vals.len`. + struct_literal: struct { + names: []const NullTerminatedString, + vals: Node.Index.Range, + }, + + pub const Index = enum(u32) { + root = 0, + _, + + pub fn get(idx: Index, zoir: Zoir) Node { + const repr = zoir.nodes.get(@intFromEnum(idx)); + return switch (repr.tag) { + .true => .true, + .false => .false, + .null => .null, + .pos_inf => .pos_inf, + .neg_inf => .neg_inf, + .nan => .nan, + .int_literal_small => .{ .int_literal = .{ .small = @bitCast(repr.data) } }, + .int_literal_pos, .int_literal_neg => .{ .int_literal = .{ .big = .{ + .limbs = l: { + const limb_count, const limbs_idx = zoir.extra[repr.data..][0..2].*; + break :l zoir.limbs[limbs_idx..][0..limb_count]; + }, + .positive = switch (repr.tag) { + .int_literal_pos => true, + .int_literal_neg => false, + else => unreachable, + }, + } } }, + .float_literal_small => .{ .float_literal = @as(f32, @bitCast(repr.data)) }, + .float_literal => .{ .float_literal = @bitCast(zoir.extra[repr.data..][0..4].*) }, + .char_literal => .{ .char_literal = repr.data }, + .enum_literal => .{ .enum_literal = @enumFromInt(repr.data) }, + .string_literal => .{ .string_literal = s: { + const start, const len = zoir.extra[repr.data..][0..2].*; + break :s zoir.string_bytes[start..][0..len]; + } }, + .string_literal_null => .{ .string_literal = NullTerminatedString.get(@enumFromInt(repr.data), zoir) }, + .empty_literal => .empty_literal, + .array_literal => .{ .array_literal = a: { + const elem_count, const first_elem = zoir.extra[repr.data..][0..2].*; + break :a .{ .start = @enumFromInt(first_elem), .len = elem_count }; + } }, + .struct_literal => .{ .struct_literal = s: { + const elem_count, const first_elem = zoir.extra[repr.data..][0..2].*; + const field_names = zoir.extra[repr.data + 2 ..][0..elem_count]; + break :s .{ + .names = @ptrCast(field_names), + .vals = .{ .start = @enumFromInt(first_elem), .len = elem_count }, + }; + } }, + }; + } + + pub fn getAstNode(idx: Index, zoir: Zoir) std.zig.Ast.Node.Index { + return zoir.nodes.items(.ast_node)[@intFromEnum(idx)]; + } + + pub const Range = struct { + start: Index, + len: u32, + + pub fn at(r: Range, i: u32) Index { + assert(i < r.len); + return @enumFromInt(@intFromEnum(r.start) + i); + } + }; + }; + + pub const Repr = struct { + tag: Tag, + data: u32, + ast_node: std.zig.Ast.Node.Index, + + pub const Tag = enum(u8) { + /// `data` is ignored. + true, + /// `data` is ignored. + false, + /// `data` is ignored. + null, + /// `data` is ignored. + pos_inf, + /// `data` is ignored. + neg_inf, + /// `data` is ignored. + nan, + /// `data` is the `i32` value. + int_literal_small, + /// `data` is index into `extra` of: + /// * `limb_count: u32` + /// * `limbs_idx: u32` + int_literal_pos, + /// Identical to `int_literal_pos`, except the value is negative. + int_literal_neg, + /// `data` is the `f32` value. + float_literal_small, + /// `data` is index into `extra` of 4 elements which are a bitcast `f128`. + float_literal, + /// `data` is the `u32` value. + char_literal, + /// `data` is a `NullTerminatedString`. + enum_literal, + /// `data` is index into `extra` of: + /// * `start: u32` + /// * `len: u32` + string_literal, + /// Null-terminated string literal, + /// `data` is a `NullTerminatedString`. + string_literal_null, + /// An empty struct/array literal, `.{}`. + /// `data` is ignored. + empty_literal, + /// `data` is index into `extra` of: + /// * `elem_count: u32` + /// * `first_elem: Node.Index` + /// The nodes `first_elem .. first_elem + elem_count` are the children. + array_literal, + /// `data` is index into `extra` of: + /// * `elem_count: u32` + /// * `first_elem: Node.Index` + /// * `field_name: NullTerminatedString` for each `elem_count` + /// The nodes `first_elem .. first_elem + elem_count` are the children. + struct_literal, + }; + }; +}; + +pub const NullTerminatedString = enum(u32) { + _, + pub fn get(nts: NullTerminatedString, zoir: Zoir) [:0]const u8 { + const idx = std.mem.indexOfScalar(u8, zoir.string_bytes[@intFromEnum(nts)..], 0).?; + return zoir.string_bytes[@intFromEnum(nts)..][0..idx :0]; + } +}; + +pub const CompileError = extern struct { + msg: NullTerminatedString, + token: Ast.TokenIndex, + /// If `token == invalid_token`, this is an `Ast.Node.Index`. + /// Otherwise, this is a byte offset into `token`. + node_or_offset: u32, + + /// Ignored if `note_count == 0`. + first_note: u32, + note_count: u32, + + pub fn getNotes(err: CompileError, zoir: Zoir) []const Note { + return zoir.error_notes[err.first_note..][0..err.note_count]; + } + + pub const Note = extern struct { + msg: NullTerminatedString, + token: Ast.TokenIndex, + /// If `token == invalid_token`, this is an `Ast.Node.Index`. + /// Otherwise, this is a byte offset into `token`. + node_or_offset: u32, + }; + + pub const invalid_token: Ast.TokenIndex = std.math.maxInt(Ast.TokenIndex); + + comptime { + assert(std.meta.hasUniqueRepresentation(CompileError)); + assert(std.meta.hasUniqueRepresentation(Note)); + } +}; + +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const Ast = std.zig.Ast; +const Zoir = @This(); diff --git a/lib/std/zig/ZonGen.zig b/lib/std/zig/ZonGen.zig new file mode 100644 index 0000000000..7f85f35f05 --- /dev/null +++ b/lib/std/zig/ZonGen.zig @@ -0,0 +1,835 @@ +//! Ingests an `Ast` and produces a `Zoir`. + +gpa: Allocator, +tree: Ast, + +nodes: std.MultiArrayList(Zoir.Node.Repr), +extra: std.ArrayListUnmanaged(u32), +limbs: std.ArrayListUnmanaged(std.math.big.Limb), +string_bytes: std.ArrayListUnmanaged(u8), +string_table: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage), + +compile_errors: std.ArrayListUnmanaged(Zoir.CompileError), +error_notes: std.ArrayListUnmanaged(Zoir.CompileError.Note), + +pub fn generate(gpa: Allocator, tree: Ast) Allocator.Error!Zoir { + assert(tree.mode == .zon); + + var zg: ZonGen = .{ + .gpa = gpa, + .tree = tree, + .nodes = .empty, + .extra = .empty, + .limbs = .empty, + .string_bytes = .empty, + .string_table = .empty, + .compile_errors = .empty, + .error_notes = .empty, + }; + defer { + zg.nodes.deinit(gpa); + zg.extra.deinit(gpa); + zg.limbs.deinit(gpa); + zg.string_bytes.deinit(gpa); + zg.string_table.deinit(gpa); + zg.compile_errors.deinit(gpa); + zg.error_notes.deinit(gpa); + } + + if (tree.errors.len == 0) { + const root_ast_node = tree.nodes.items(.data)[0].lhs; + try zg.nodes.append(gpa, undefined); // index 0; root node + try zg.expr(root_ast_node, .root); + } else { + try zg.lowerAstErrors(); + } + + if (zg.compile_errors.items.len > 0) { + const string_bytes = try zg.string_bytes.toOwnedSlice(gpa); + errdefer gpa.free(string_bytes); + const compile_errors = try zg.compile_errors.toOwnedSlice(gpa); + errdefer gpa.free(compile_errors); + const error_notes = try zg.error_notes.toOwnedSlice(gpa); + errdefer gpa.free(error_notes); + + return .{ + .nodes = .empty, + .extra = &.{}, + .limbs = &.{}, + .string_bytes = string_bytes, + .compile_errors = compile_errors, + .error_notes = error_notes, + }; + } else { + assert(zg.error_notes.items.len == 0); + + var nodes = zg.nodes.toOwnedSlice(); + errdefer nodes.deinit(gpa); + const extra = try zg.extra.toOwnedSlice(gpa); + errdefer gpa.free(extra); + const limbs = try zg.limbs.toOwnedSlice(gpa); + errdefer gpa.free(limbs); + const string_bytes = try zg.string_bytes.toOwnedSlice(gpa); + errdefer gpa.free(string_bytes); + + return .{ + .nodes = nodes, + .extra = extra, + .limbs = limbs, + .string_bytes = string_bytes, + .compile_errors = &.{}, + .error_notes = &.{}, + }; + } +} + +fn expr(zg: *ZonGen, node: Ast.Node.Index, dest_node: Zoir.Node.Index) Allocator.Error!void { + const gpa = zg.gpa; + const tree = zg.tree; + const node_tags = tree.nodes.items(.tag); + const node_datas = tree.nodes.items(.data); + const main_tokens = tree.nodes.items(.main_token); + + switch (node_tags[node]) { + .root => unreachable, + .@"usingnamespace" => unreachable, + .test_decl => unreachable, + .container_field_init => unreachable, + .container_field_align => unreachable, + .container_field => unreachable, + .fn_decl => unreachable, + .global_var_decl => unreachable, + .local_var_decl => unreachable, + .simple_var_decl => unreachable, + .aligned_var_decl => unreachable, + .@"defer" => unreachable, + .@"errdefer" => unreachable, + .switch_case => unreachable, + .switch_case_inline => unreachable, + .switch_case_one => unreachable, + .switch_case_inline_one => unreachable, + .switch_range => unreachable, + .asm_output => unreachable, + .asm_input => unreachable, + .for_range => unreachable, + .assign => unreachable, + .assign_destructure => unreachable, + .assign_shl => unreachable, + .assign_shl_sat => unreachable, + .assign_shr => unreachable, + .assign_bit_and => unreachable, + .assign_bit_or => unreachable, + .assign_bit_xor => unreachable, + .assign_div => unreachable, + .assign_sub => unreachable, + .assign_sub_wrap => unreachable, + .assign_sub_sat => unreachable, + .assign_mod => unreachable, + .assign_add => unreachable, + .assign_add_wrap => unreachable, + .assign_add_sat => unreachable, + .assign_mul => unreachable, + .assign_mul_wrap => unreachable, + .assign_mul_sat => unreachable, + + .shl, + .shr, + .add, + .add_wrap, + .add_sat, + .sub, + .sub_wrap, + .sub_sat, + .mul, + .mul_wrap, + .mul_sat, + .div, + .mod, + .shl_sat, + .bit_and, + .bit_or, + .bit_xor, + .bang_equal, + .equal_equal, + .greater_than, + .greater_or_equal, + .less_than, + .less_or_equal, + .array_cat, + .array_mult, + .bool_and, + .bool_or, + .bool_not, + .bit_not, + .negation_wrap, + => try zg.addErrorTok(main_tokens[node], "operator '{s}' is not allowed in ZON", .{tree.tokenSlice(main_tokens[node])}), + + .error_union, + .merge_error_sets, + .optional_type, + .anyframe_literal, + .anyframe_type, + .ptr_type_aligned, + .ptr_type_sentinel, + .ptr_type, + .ptr_type_bit_range, + .container_decl, + .container_decl_trailing, + .container_decl_arg, + .container_decl_arg_trailing, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union, + .tagged_union_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + .array_type, + .array_type_sentinel, + .error_set_decl, + .fn_proto_simple, + .fn_proto_multi, + .fn_proto_one, + .fn_proto, + => try zg.addErrorNode(node, "types are not available in ZON", .{}), + + .call_one, + .call_one_comma, + .async_call_one, + .async_call_one_comma, + .call, + .call_comma, + .async_call, + .async_call_comma, + .@"return", + .if_simple, + .@"if", + .while_simple, + .while_cont, + .@"while", + .for_simple, + .@"for", + .@"catch", + .@"orelse", + .@"break", + .@"continue", + .@"switch", + .switch_comma, + .@"nosuspend", + .@"suspend", + .@"await", + .@"resume", + .@"try", + .unreachable_literal, + => try zg.addErrorNode(node, "control flow is not allowed in ZON", .{}), + + .@"comptime" => try zg.addErrorNode(node, "keyword 'comptime' is not allowed in ZON", .{}), + .asm_simple, .@"asm" => try zg.addErrorNode(node, "inline asm is not allowed in ZON", .{}), + + .builtin_call_two, + .builtin_call_two_comma, + .builtin_call, + .builtin_call_comma, + => try zg.addErrorNode(node, "builtin function calls are not allowed in ZON", .{}), + + .field_access => try zg.addErrorNode(node, "field accesses are not allowed in ZON", .{}), + + .slice_open, + .slice, + .slice_sentinel, + => try zg.addErrorNode(node, "slice operator is not allowed in ZON", .{}), + + .deref, .address_of => try zg.addErrorTok(main_tokens[node], "pointers are not available in ZON", .{}), + .unwrap_optional => try zg.addErrorTok(main_tokens[node], "optionals are not available in ZON", .{}), + .error_value => try zg.addErrorNode(node, "errors are not available in ZON", .{}), + + .array_access => try zg.addErrorTok(node, "array indexing is not allowed in ZON", .{}), + + .block_two, + .block_two_semicolon, + .block, + .block_semicolon, + => try zg.addErrorNode(node, "blocks are not allowed in ZON", .{}), + + .array_init_one, + .array_init_one_comma, + .array_init, + .array_init_comma, + .struct_init_one, + .struct_init_one_comma, + .struct_init, + .struct_init_comma, + => { + var buf: [2]Ast.Node.Index = undefined; + + const type_node = if (tree.fullArrayInit(&buf, node)) |full| + full.ast.type_expr + else if (tree.fullStructInit(&buf, node)) |full| + full.ast.type_expr + else + unreachable; + + try zg.addErrorNodeNotes(type_node, "types are not available in ZON", .{}, &.{ + try zg.errNoteNode(type_node, "replace the type with '.'", .{}), + }); + }, + + .grouped_expression => { + try zg.addErrorTokNotes(main_tokens[node], "expression grouping is not allowed in ZON", .{}, &.{ + try zg.errNoteTok(main_tokens[node], "these parentheses are always redundant", .{}), + }); + return zg.expr(node_datas[node].lhs, dest_node); + }, + + .negation => { + const child_node = node_datas[node].lhs; + switch (node_tags[child_node]) { + .number_literal => return zg.numberLiteral(child_node, node, dest_node, .negative), + .identifier => { + const child_ident = tree.tokenSlice(main_tokens[child_node]); + if (mem.eql(u8, child_ident, "inf")) { + zg.setNode(dest_node, .{ + .tag = .neg_inf, + .data = 0, // ignored + .ast_node = node, + }); + return; + } + }, + else => {}, + } + try zg.addErrorTok(main_tokens[node], "expected number or 'inf' after '-'", .{}); + }, + .number_literal => try zg.numberLiteral(node, node, dest_node, .positive), + .char_literal => try zg.charLiteral(node, dest_node), + + .identifier => try zg.identifier(node, dest_node), + + .enum_literal => { + const str_index = zg.identAsString(main_tokens[node]) catch |err| switch (err) { + error.BadString => undefined, // doesn't matter, there's an error + error.OutOfMemory => |e| return e, + }; + zg.setNode(dest_node, .{ + .tag = .enum_literal, + .data = @intFromEnum(str_index), + .ast_node = node, + }); + }, + .string_literal, .multiline_string_literal => if (zg.strLitAsString(node)) |result| switch (result) { + .nts => |nts| zg.setNode(dest_node, .{ + .tag = .string_literal_null, + .data = @intFromEnum(nts), + .ast_node = node, + }), + .slice => |slice| { + const extra_index: u32 = @intCast(zg.extra.items.len); + try zg.extra.appendSlice(zg.gpa, &.{ slice.start, slice.len }); + zg.setNode(dest_node, .{ + .tag = .string_literal, + .data = extra_index, + .ast_node = node, + }); + }, + } else |err| switch (err) { + error.BadString => {}, + error.OutOfMemory => |e| return e, + }, + + .array_init_dot_two, + .array_init_dot_two_comma, + .array_init_dot, + .array_init_dot_comma, + => { + var buf: [2]Ast.Node.Index = undefined; + const full = tree.fullArrayInit(&buf, node).?; + assert(full.ast.elements.len != 0); // Otherwise it would be a struct init + assert(full.ast.type_expr == 0); // The tag was `array_init_dot_*` + + const first_elem: u32 = @intCast(zg.nodes.len); + try zg.nodes.resize(gpa, zg.nodes.len + full.ast.elements.len); + + const extra_index: u32 = @intCast(zg.extra.items.len); + try zg.extra.appendSlice(gpa, &.{ + @intCast(full.ast.elements.len), + first_elem, + }); + + zg.setNode(dest_node, .{ + .tag = .array_literal, + .data = extra_index, + .ast_node = node, + }); + + for (full.ast.elements, first_elem..) |elem_node, elem_dest_node| { + try zg.expr(elem_node, @enumFromInt(elem_dest_node)); + } + }, + + .struct_init_dot_two, + .struct_init_dot_two_comma, + .struct_init_dot, + .struct_init_dot_comma, + => { + var buf: [2]Ast.Node.Index = undefined; + const full = tree.fullStructInit(&buf, node).?; + assert(full.ast.type_expr == 0); // The tag was `struct_init_dot_*` + + if (full.ast.fields.len == 0) { + zg.setNode(dest_node, .{ + .tag = .empty_literal, + .data = 0, // ignored + .ast_node = node, + }); + return; + } + + const first_elem: u32 = @intCast(zg.nodes.len); + try zg.nodes.resize(gpa, zg.nodes.len + full.ast.fields.len); + + const extra_index: u32 = @intCast(zg.extra.items.len); + try zg.extra.ensureUnusedCapacity(gpa, 2 + full.ast.fields.len); + zg.extra.appendSliceAssumeCapacity(&.{ + @intCast(full.ast.fields.len), + first_elem, + }); + const names_start = extra_index + 2; + zg.extra.appendNTimesAssumeCapacity(undefined, full.ast.fields.len); + + zg.setNode(dest_node, .{ + .tag = .struct_literal, + .data = extra_index, + .ast_node = node, + }); + + for (full.ast.fields, names_start.., first_elem..) |elem_node, extra_name_idx, elem_dest_node| { + const name_token = tree.firstToken(elem_node) - 2; + zg.extra.items[extra_name_idx] = @intFromEnum(zg.identAsString(name_token) catch |err| switch (err) { + error.BadString => undefined, // doesn't matter, there's an error + error.OutOfMemory => |e| return e, + }); + try zg.expr(elem_node, @enumFromInt(elem_dest_node)); + } + }, + } +} + +fn parseStrLit(zg: *ZonGen, token: Ast.TokenIndex, offset: u32) !u32 { + const raw_string = zg.tree.tokenSlice(token)[offset..]; + const start = zg.string_bytes.items.len; + switch (try std.zig.string_literal.parseWrite(zg.string_bytes.writer(zg.gpa), raw_string)) { + .success => return @intCast(start), + .failure => |err| { + try zg.lowerStrLitError(err, token, raw_string, offset); + return error.BadString; + }, + } +} + +fn parseMultilineStrLit(zg: *ZonGen, node: Ast.Node.Index) !u32 { + const gpa = zg.gpa; + const tree = zg.tree; + const string_bytes = &zg.string_bytes; + + const first_tok, const last_tok = bounds: { + const node_data = tree.nodes.items(.data)[node]; + break :bounds .{ node_data.lhs, node_data.rhs }; + }; + + const str_index: u32 = @intCast(string_bytes.items.len); + + // First line: do not append a newline. + { + const line_bytes = tree.tokenSlice(first_tok)[2..]; + try string_bytes.appendSlice(gpa, line_bytes); + } + // Following lines: each line prepends a newline. + for (first_tok + 1..last_tok + 1) |tok_idx| { + const line_bytes = tree.tokenSlice(@intCast(tok_idx))[2..]; + try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1); + string_bytes.appendAssumeCapacity('\n'); + string_bytes.appendSliceAssumeCapacity(line_bytes); + } + + return @intCast(str_index); +} + +fn appendIdentStr(zg: *ZonGen, ident_token: Ast.TokenIndex) !u32 { + const tree = zg.tree; + assert(tree.tokens.items(.tag)[ident_token] == .identifier); + const ident_name = tree.tokenSlice(ident_token); + if (!mem.startsWith(u8, ident_name, "@")) { + const start = zg.string_bytes.items.len; + try zg.string_bytes.appendSlice(zg.gpa, ident_name); + return @intCast(start); + } else { + const start = try zg.parseStrLit(ident_token, 1); + const slice = zg.string_bytes.items[start..]; + if (mem.indexOfScalar(u8, slice, 0) != null) { + try zg.addErrorTok(ident_token, "identifier cannot contain null bytes", .{}); + return error.BadString; + } else if (slice.len == 0) { + try zg.addErrorTok(ident_token, "identifier cannot be empty", .{}); + return error.BadString; + } + return start; + } +} + +const StringLiteralResult = union(enum) { + nts: Zoir.NullTerminatedString, + slice: struct { start: u32, len: u32 }, +}; + +fn strLitAsString(zg: *ZonGen, str_node: Ast.Node.Index) !StringLiteralResult { + const gpa = zg.gpa; + const string_bytes = &zg.string_bytes; + const str_index = switch (zg.tree.nodes.items(.tag)[str_node]) { + .string_literal => try zg.parseStrLit(zg.tree.nodes.items(.main_token)[str_node], 0), + .multiline_string_literal => try zg.parseMultilineStrLit(str_node), + else => unreachable, + }; + const key: []const u8 = string_bytes.items[str_index..]; + if (std.mem.indexOfScalar(u8, key, 0) != null) return .{ .slice = .{ + .start = str_index, + .len = @intCast(key.len), + } }; + const gop = try zg.string_table.getOrPutContextAdapted( + gpa, + key, + StringIndexAdapter{ .bytes = string_bytes }, + StringIndexContext{ .bytes = string_bytes }, + ); + if (gop.found_existing) { + string_bytes.shrinkRetainingCapacity(str_index); + return .{ .nts = @enumFromInt(gop.key_ptr.*) }; + } + gop.key_ptr.* = str_index; + try string_bytes.append(gpa, 0); + return .{ .nts = @enumFromInt(str_index) }; +} + +fn identAsString(zg: *ZonGen, ident_token: Ast.TokenIndex) !Zoir.NullTerminatedString { + const gpa = zg.gpa; + const string_bytes = &zg.string_bytes; + const str_index = try zg.appendIdentStr(ident_token); + const key: []const u8 = string_bytes.items[str_index..]; + const gop = try zg.string_table.getOrPutContextAdapted( + gpa, + key, + StringIndexAdapter{ .bytes = string_bytes }, + StringIndexContext{ .bytes = string_bytes }, + ); + if (gop.found_existing) { + string_bytes.shrinkRetainingCapacity(str_index); + return @enumFromInt(gop.key_ptr.*); + } + gop.key_ptr.* = str_index; + try string_bytes.append(gpa, 0); + return @enumFromInt(str_index); +} + +fn numberLiteral(zg: *ZonGen, num_node: Ast.Node.Index, src_node: Ast.Node.Index, dest_node: Zoir.Node.Index, sign: enum { negative, positive }) !void { + const tree = zg.tree; + const num_token = tree.nodes.items(.main_token)[num_node]; + const num_bytes = tree.tokenSlice(num_token); + + switch (std.zig.parseNumberLiteral(num_bytes)) { + .int => |unsigned_num| { + if (unsigned_num == 0 and sign == .negative) { + try zg.addErrorTokNotes(num_token, "integer literal '-0' is ambiguous", .{}, &.{ + try zg.errNoteTok(num_token, "use '0' for an integer zero", .{}), + try zg.errNoteTok(num_token, "use '-0.0' for a flaoting-point signed zero", .{}), + }); + return; + } + const num: i65 = switch (sign) { + .positive => unsigned_num, + .negative => -@as(i65, unsigned_num), + }; + if (std.math.cast(i32, num)) |x| { + zg.setNode(dest_node, .{ + .tag = .int_literal_small, + .data = @bitCast(x), + .ast_node = src_node, + }); + return; + } + const max_limbs = comptime std.math.big.int.calcTwosCompLimbCount(@bitSizeOf(@TypeOf(num))); + var limbs: [max_limbs]std.math.big.Limb = undefined; + var big_int: std.math.big.int.Mutable = .init(&limbs, num); + try zg.setBigIntLiteralNode(dest_node, src_node, big_int.toConst()); + }, + .big_int => |base| { + const gpa = zg.gpa; + const num_without_prefix = switch (base) { + .decimal => num_bytes, + .hex, .binary, .octal => num_bytes[2..], + }; + var big_int: std.math.big.int.Managed = try .init(gpa); + defer big_int.deinit(); + big_int.setString(@intFromEnum(base), num_without_prefix) catch |err| switch (err) { + error.InvalidCharacter => unreachable, // caught in `parseNumberLiteral` + error.InvalidBase => unreachable, // we only pass 16, 8, 2, see above + error.OutOfMemory => return error.OutOfMemory, + }; + switch (sign) { + .positive => {}, + .negative => big_int.negate(), + } + try zg.setBigIntLiteralNode(dest_node, src_node, big_int.toConst()); + }, + .float => { + const unsigned_num = std.fmt.parseFloat(f128, num_bytes) catch |err| switch (err) { + error.InvalidCharacter => unreachable, // validated by tokenizer + }; + const num: f128 = switch (sign) { + .positive => unsigned_num, + .negative => -unsigned_num, + }; + + { + // If the value fits into an f32 without losing any precision, store it that way. + @setFloatMode(.strict); + const smaller_float: f32 = @floatCast(num); + const bigger_again: f128 = smaller_float; + if (bigger_again == num) { + zg.setNode(dest_node, .{ + .tag = .float_literal_small, + .data = @bitCast(smaller_float), + .ast_node = src_node, + }); + return; + } + } + + const elems: [4]u32 = @bitCast(num); + const extra_index: u32 = @intCast(zg.extra.items.len); + try zg.extra.appendSlice(zg.gpa, &elems); + zg.setNode(dest_node, .{ + .tag = .float_literal, + .data = extra_index, + .ast_node = src_node, + }); + }, + .failure => |err| try zg.lowerNumberError(err, num_token, num_bytes), + } +} + +fn setBigIntLiteralNode(zg: *ZonGen, dest_node: Zoir.Node.Index, src_node: Ast.Node.Index, val: std.math.big.int.Const) !void { + try zg.extra.ensureUnusedCapacity(zg.gpa, 2); + try zg.limbs.ensureUnusedCapacity(zg.gpa, val.limbs.len); + + const limbs_idx: u32 = @intCast(zg.limbs.items.len); + zg.limbs.appendSliceAssumeCapacity(val.limbs); + + const extra_idx: u32 = @intCast(zg.extra.items.len); + zg.extra.appendSliceAssumeCapacity(&.{ @intCast(val.limbs.len), limbs_idx }); + + zg.setNode(dest_node, .{ + .tag = if (val.positive) .int_literal_pos else .int_literal_neg, + .data = extra_idx, + .ast_node = src_node, + }); +} + +fn charLiteral(zg: *ZonGen, node: Ast.Node.Index, dest_node: Zoir.Node.Index) !void { + const tree = zg.tree; + assert(tree.nodes.items(.tag)[node] == .char_literal); + const main_token = tree.nodes.items(.main_token)[node]; + const slice = tree.tokenSlice(main_token); + switch (std.zig.parseCharLiteral(slice)) { + .success => |codepoint| zg.setNode(dest_node, .{ + .tag = .char_literal, + .data = codepoint, + .ast_node = node, + }), + .failure => |err| try zg.lowerStrLitError(err, main_token, slice, 0), + } +} + +fn identifier(zg: *ZonGen, node: Ast.Node.Index, dest_node: Zoir.Node.Index) !void { + const tree = zg.tree; + assert(tree.nodes.items(.tag)[node] == .identifier); + const main_token = tree.nodes.items(.main_token)[node]; + const ident = tree.tokenSlice(main_token); + + const tag: Zoir.Node.Repr.Tag = t: { + if (mem.eql(u8, ident, "true")) break :t .true; + if (mem.eql(u8, ident, "false")) break :t .false; + if (mem.eql(u8, ident, "null")) break :t .null; + if (mem.eql(u8, ident, "inf")) break :t .pos_inf; + if (mem.eql(u8, ident, "nan")) break :t .nan; + try zg.addErrorNodeNotes(node, "invalid expression", .{}, &.{ + try zg.errNoteNode(node, "ZON allows identifiers 'true', 'false', 'null', 'inf', and 'nan'", .{}), + try zg.errNoteNode(node, "precede identifier with '.' for an enum literal", .{}), + }); + return; + }; + + zg.setNode(dest_node, .{ + .tag = tag, + .data = 0, // ignored + .ast_node = node, + }); +} + +fn setNode(zg: *ZonGen, dest: Zoir.Node.Index, repr: Zoir.Node.Repr) void { + zg.nodes.set(@intFromEnum(dest), repr); +} + +fn lowerStrLitError(zg: *ZonGen, err: std.zig.string_literal.Error, token: Ast.TokenIndex, raw_string: []const u8, offset: u32) Allocator.Error!void { + return err.lower(raw_string, offset, ZonGen.addErrorTokOff, .{ zg, token }); +} + +fn lowerNumberError(zg: *ZonGen, err: std.zig.number_literal.Error, token: Ast.TokenIndex, bytes: []const u8) Allocator.Error!void { + const is_float = std.mem.indexOfScalar(u8, bytes, '.') != null; + switch (err) { + .leading_zero => if (is_float) { + try zg.addErrorTok(token, "number '{s}' has leading zero", .{bytes}); + } else { + try zg.addErrorTokNotes(token, "number '{s}' has leading zero", .{bytes}, &.{ + try zg.errNoteTok(token, "use '0o' prefix for octal literals", .{}), + }); + }, + .digit_after_base => try zg.addErrorTok(token, "expected a digit after base prefix", .{}), + .upper_case_base => |i| try zg.addErrorTokOff(token, @intCast(i), "base prefix must be lowercase", .{}), + .invalid_float_base => |i| try zg.addErrorTokOff(token, @intCast(i), "invalid base for float literal", .{}), + .repeated_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "repeated digit separator", .{}), + .invalid_underscore_after_special => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit before digit separator", .{}), + .invalid_digit => |info| try zg.addErrorTokOff(token, @intCast(info.i), "invalid digit '{c}' for {s} base", .{ bytes[info.i], @tagName(info.base) }), + .invalid_digit_exponent => |i| try zg.addErrorTokOff(token, @intCast(i), "invalid digit '{c}' in exponent", .{bytes[i]}), + .duplicate_exponent => |i| try zg.addErrorTokOff(token, @intCast(i), "duplicate exponent", .{}), + .exponent_after_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit before exponent", .{}), + .special_after_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit before '{c}'", .{bytes[i]}), + .trailing_special => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit after '{c}'", .{bytes[i - 1]}), + .trailing_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "trailing digit separator", .{}), + .duplicate_period => unreachable, // Validated by tokenizer + .invalid_character => unreachable, // Validated by tokenizer + .invalid_exponent_sign => |i| { + assert(bytes.len >= 2 and bytes[0] == '0' and bytes[1] == 'x'); // Validated by tokenizer + try zg.addErrorTokOff(token, @intCast(i), "sign '{c}' cannot follow digit '{c}' in hex base", .{ bytes[i], bytes[i - 1] }); + }, + .period_after_exponent => |i| try zg.addErrorTokOff(token, @intCast(i), "unexpected period after exponent", .{}), + } +} + +fn errNoteNode(zg: *ZonGen, node: Ast.Node.Index, comptime format: []const u8, args: anytype) Allocator.Error!Zoir.CompileError.Note { + const message_idx: u32 = @intCast(zg.string_bytes.items.len); + const writer = zg.string_bytes.writer(zg.gpa); + try writer.print(format, args); + try writer.writeByte(0); + + return .{ + .msg = @enumFromInt(message_idx), + .token = Zoir.CompileError.invalid_token, + .node_or_offset = node, + }; +} + +fn errNoteTok(zg: *ZonGen, tok: Ast.TokenIndex, comptime format: []const u8, args: anytype) Allocator.Error!Zoir.CompileError.Note { + const message_idx: u32 = @intCast(zg.string_bytes.items.len); + const writer = zg.string_bytes.writer(zg.gpa); + try writer.print(format, args); + try writer.writeByte(0); + + return .{ + .msg = @enumFromInt(message_idx), + .token = tok, + .node_or_offset = 0, + }; +} + +fn addErrorNode(zg: *ZonGen, node: Ast.Node.Index, comptime format: []const u8, args: anytype) Allocator.Error!void { + return zg.addErrorInner(Zoir.CompileError.invalid_token, node, format, args, &.{}); +} +fn addErrorTok(zg: *ZonGen, tok: Ast.TokenIndex, comptime format: []const u8, args: anytype) Allocator.Error!void { + return zg.addErrorInner(tok, 0, format, args, &.{}); +} +fn addErrorNodeNotes(zg: *ZonGen, node: Ast.Node.Index, comptime format: []const u8, args: anytype, notes: []const Zoir.CompileError.Note) Allocator.Error!void { + return zg.addErrorInner(Zoir.CompileError.invalid_token, node, format, args, notes); +} +fn addErrorTokNotes(zg: *ZonGen, tok: Ast.TokenIndex, comptime format: []const u8, args: anytype, notes: []const Zoir.CompileError.Note) Allocator.Error!void { + return zg.addErrorInner(tok, 0, format, args, notes); +} +fn addErrorTokOff(zg: *ZonGen, tok: Ast.TokenIndex, offset: u32, comptime format: []const u8, args: anytype) Allocator.Error!void { + return zg.addErrorInner(tok, offset, format, args, &.{}); +} +fn addErrorTokNotesOff(zg: *ZonGen, tok: Ast.TokenIndex, offset: u32, comptime format: []const u8, args: anytype, notes: []const Zoir.CompileError.Note) Allocator.Error!void { + return zg.addErrorInner(tok, offset, format, args, notes); +} + +fn addErrorInner( + zg: *ZonGen, + token: Ast.TokenIndex, + node_or_offset: u32, + comptime format: []const u8, + args: anytype, + notes: []const Zoir.CompileError.Note, +) Allocator.Error!void { + const gpa = zg.gpa; + + const first_note: u32 = @intCast(zg.error_notes.items.len); + try zg.error_notes.appendSlice(gpa, notes); + + const message_idx: u32 = @intCast(zg.string_bytes.items.len); + const writer = zg.string_bytes.writer(zg.gpa); + try writer.print(format, args); + try writer.writeByte(0); + + try zg.compile_errors.append(gpa, .{ + .msg = @enumFromInt(message_idx), + .token = token, + .node_or_offset = node_or_offset, + .first_note = first_note, + .note_count = @intCast(notes.len), + }); +} + +fn lowerAstErrors(zg: *ZonGen) Allocator.Error!void { + const gpa = zg.gpa; + const tree = zg.tree; + assert(tree.errors.len > 0); + + var msg: std.ArrayListUnmanaged(u8) = .empty; + defer msg.deinit(gpa); + + var notes: std.ArrayListUnmanaged(Zoir.CompileError.Note) = .empty; + defer notes.deinit(gpa); + + var cur_err = tree.errors[0]; + for (tree.errors[1..]) |err| { + if (err.is_note) { + try tree.renderError(err, msg.writer(gpa)); + try notes.append(gpa, try zg.errNoteTok(err.token, "{s}", .{msg.items})); + } else { + // Flush error + try tree.renderError(cur_err, msg.writer(gpa)); + const extra_offset = tree.errorOffset(cur_err); + try zg.addErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items); + notes.clearRetainingCapacity(); + cur_err = err; + + // TODO: `Parse` currently does not have good error recovery mechanisms, so the remaining errors could be bogus. + // As such, we'll ignore all remaining errors for now. We should improve `Parse` so that we can report all the errors. + return; + } + msg.clearRetainingCapacity(); + } + + // Flush error + const extra_offset = tree.errorOffset(cur_err); + try tree.renderError(cur_err, msg.writer(gpa)); + try zg.addErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items); +} + +const std = @import("std"); +const assert = std.debug.assert; +const mem = std.mem; +const Allocator = mem.Allocator; +const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const StringIndexContext = std.hash_map.StringIndexContext; +const ZonGen = @This(); +const Zoir = @import("Zoir.zig"); +const Ast = @import("Ast.zig"); diff --git a/lib/std/zig/string_literal.zig b/lib/std/zig/string_literal.zig index 6917809837..716a9b90f0 100644 --- a/lib/std/zig/string_literal.zig +++ b/lib/std/zig/string_literal.zig @@ -38,6 +38,41 @@ pub const Error = union(enum) { invalid_character: usize, /// `''`. Not returned for string literals. empty_char_literal, + + /// Returns `func(first_args[0], ..., first_args[n], offset + bad_idx, format, args)`. + pub fn lower( + err: Error, + raw_string: []const u8, + offset: u32, + comptime func: anytype, + first_args: anytype, + ) @typeInfo(@TypeOf(func)).@"fn".return_type.? { + switch (err) { + inline else => |bad_index_or_void, tag| { + const bad_index: u32 = switch (@TypeOf(bad_index_or_void)) { + void => 0, + else => @intCast(bad_index_or_void), + }; + const fmt_str: []const u8, const args = switch (tag) { + .invalid_escape_character => .{ "invalid escape character: '{c}'", .{raw_string[bad_index]} }, + .expected_hex_digit => .{ "expected hex digit, found '{c}'", .{raw_string[bad_index]} }, + .empty_unicode_escape_sequence => .{ "empty unicode escape sequence", .{} }, + .expected_hex_digit_or_rbrace => .{ "expected hex digit or '}}', found '{c}'", .{raw_string[bad_index]} }, + .invalid_unicode_codepoint => .{ "unicode escape does not correspond to a valid unicode scalar value", .{} }, + .expected_lbrace => .{ "expected '{{', found '{c}'", .{raw_string[bad_index]} }, + .expected_rbrace => .{ "expected '}}', found '{c}'", .{raw_string[bad_index]} }, + .expected_single_quote => .{ "expected singel quote ('), found '{c}'", .{raw_string[bad_index]} }, + .invalid_character => .{ "invalid byte in string or character literal: '{c}'", .{raw_string[bad_index]} }, + .empty_char_literal => .{ "empty character literal", .{} }, + }; + return @call(.auto, func, first_args ++ .{ + offset + bad_index, + fmt_str, + args, + }); + }, + } + } }; /// Asserts the slice starts and ends with single-quotes. diff --git a/src/fmt.zig b/src/fmt.zig index b545905367..9ab581cad4 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -13,6 +13,7 @@ const usage_fmt = \\ if the list is non-empty \\ --ast-check Run zig ast-check on every file \\ --exclude [file] Exclude file or directory from formatting + \\ --zon Treat all input files as ZON, regardless of file extension \\ \\ ; @@ -21,6 +22,7 @@ const Fmt = struct { seen: SeenMap, any_error: bool, check_ast: bool, + force_zon: bool, color: Color, gpa: Allocator, arena: Allocator, @@ -35,9 +37,10 @@ pub fn run( args: []const []const u8, ) !void { var color: Color = .auto; - var stdin_flag: bool = false; - var check_flag: bool = false; - var check_ast_flag: bool = false; + var stdin_flag = false; + var check_flag = false; + var check_ast_flag = false; + var force_zon = false; var input_files = std.ArrayList([]const u8).init(gpa); defer input_files.deinit(); var excluded_files = std.ArrayList([]const u8).init(gpa); @@ -74,6 +77,8 @@ pub fn run( i += 1; const next_arg = args[i]; try excluded_files.append(next_arg); + } else if (mem.eql(u8, arg, "--zon")) { + force_zon = true; } else { fatal("unrecognized parameter: '{s}'", .{arg}); } @@ -94,23 +99,40 @@ pub fn run( }; defer gpa.free(source_code); - var tree = std.zig.Ast.parse(gpa, source_code, .zig) catch |err| { + var tree = std.zig.Ast.parse(gpa, source_code, if (force_zon) .zon else .zig) catch |err| { fatal("error parsing stdin: {}", .{err}); }; defer tree.deinit(gpa); if (check_ast_flag) { - var zir = try std.zig.AstGen.generate(gpa, tree); + if (!force_zon) { + var zir = try std.zig.AstGen.generate(gpa, tree); + defer zir.deinit(gpa); - if (zir.hasCompileErrors()) { - var wip_errors: std.zig.ErrorBundle.Wip = undefined; - try wip_errors.init(gpa); - defer wip_errors.deinit(); - try wip_errors.addZirErrorMessages(zir, tree, source_code, ""); - var error_bundle = try wip_errors.toOwnedBundle(""); - defer error_bundle.deinit(gpa); - error_bundle.renderToStdErr(color.renderOptions()); - process.exit(2); + if (zir.hasCompileErrors()) { + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + try wip_errors.addZirErrorMessages(zir, tree, source_code, ""); + var error_bundle = try wip_errors.toOwnedBundle(""); + defer error_bundle.deinit(gpa); + error_bundle.renderToStdErr(color.renderOptions()); + process.exit(2); + } + } else { + const zoir = try std.zig.ZonGen.generate(gpa, tree); + defer zoir.deinit(gpa); + + if (zoir.hasCompileErrors()) { + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + try wip_errors.addZoirErrorMessages(zoir, tree, source_code, ""); + var error_bundle = try wip_errors.toOwnedBundle(""); + defer error_bundle.deinit(gpa); + error_bundle.renderToStdErr(color.renderOptions()); + process.exit(2); + } } } else if (tree.errors.len != 0) { try std.zig.printAstErrorsToStderr(gpa, tree, "", color); @@ -131,12 +153,13 @@ pub fn run( fatal("expected at least one source file argument", .{}); } - var fmt = Fmt{ + var fmt: Fmt = .{ .gpa = gpa, .arena = arena, - .seen = Fmt.SeenMap.init(gpa), + .seen = .init(gpa), .any_error = false, .check_ast = check_ast_flag, + .force_zon = force_zon, .color = color, .out_buffer = std.ArrayList(u8).init(gpa), }; @@ -276,7 +299,13 @@ fn fmtPathFile( // Add to set after no longer possible to get error.IsDir. if (try fmt.seen.fetchPut(stat.inode, {})) |_| return; - var tree = try std.zig.Ast.parse(gpa, source_code, .zig); + const mode: std.zig.Ast.Mode = mode: { + if (fmt.force_zon) break :mode .zon; + if (mem.endsWith(u8, sub_path, ".zon")) break :mode .zon; + break :mode .zig; + }; + + var tree = try std.zig.Ast.parse(gpa, source_code, mode); defer tree.deinit(gpa); if (tree.errors.len != 0) { @@ -289,18 +318,37 @@ fn fmtPathFile( if (stat.size > std.zig.max_src_size) return error.FileTooBig; - var zir = try std.zig.AstGen.generate(gpa, tree); - defer zir.deinit(gpa); + switch (mode) { + .zig => { + var zir = try std.zig.AstGen.generate(gpa, tree); + defer zir.deinit(gpa); - if (zir.hasCompileErrors()) { - var wip_errors: std.zig.ErrorBundle.Wip = undefined; - try wip_errors.init(gpa); - defer wip_errors.deinit(); - try wip_errors.addZirErrorMessages(zir, tree, source_code, file_path); - var error_bundle = try wip_errors.toOwnedBundle(""); - defer error_bundle.deinit(gpa); - error_bundle.renderToStdErr(fmt.color.renderOptions()); - fmt.any_error = true; + if (zir.hasCompileErrors()) { + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + try wip_errors.addZirErrorMessages(zir, tree, source_code, file_path); + var error_bundle = try wip_errors.toOwnedBundle(""); + defer error_bundle.deinit(gpa); + error_bundle.renderToStdErr(fmt.color.renderOptions()); + fmt.any_error = true; + } + }, + .zon => { + var zoir = try std.zig.ZonGen.generate(gpa, tree); + defer zoir.deinit(gpa); + + if (zoir.hasCompileErrors()) { + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + try wip_errors.addZoirErrorMessages(zoir, tree, source_code, file_path); + var error_bundle = try wip_errors.toOwnedBundle(""); + defer error_bundle.deinit(gpa); + error_bundle.renderToStdErr(fmt.color.renderOptions()); + fmt.any_error = true; + } + }, } } diff --git a/src/main.zig b/src/main.zig index 39fd3e6213..1d0f15f8fd 100644 --- a/src/main.zig +++ b/src/main.zig @@ -19,6 +19,7 @@ const Directory = std.Build.Cache.Directory; const EnvVar = std.zig.EnvVar; const LibCInstallation = std.zig.LibCInstallation; const AstGen = std.zig.AstGen; +const ZonGen = std.zig.ZonGen; const Server = std.zig.Server; const tracy = @import("tracy.zig"); @@ -6007,15 +6008,16 @@ fn parseCodeModel(arg: []const u8) std.builtin.CodeModel { const usage_ast_check = \\Usage: zig ast-check [file] \\ - \\ Given a .zig source file, reports any compile errors that can be - \\ ascertained on the basis of the source code alone, without target - \\ information or type checking. + \\ Given a .zig source file or .zon file, reports any compile errors + \\ that can be ascertained on the basis of the source code alone, + \\ without target information or type checking. \\ \\ If [file] is omitted, stdin is used. \\ \\Options: \\ -h, --help Print this help and exit \\ --color [auto|off|on] Enable or disable colored error messages + \\ --zon Treat the input file as ZON, regardless of file extension \\ -t (debug option) Output ZIR in text form to stdout \\ \\ @@ -6032,6 +6034,7 @@ fn cmdAstCheck( var color: Color = .auto; var want_output_text = false; + var force_zon = false; var zig_source_file: ?[]const u8 = null; var i: usize = 0; @@ -6043,6 +6046,8 @@ fn cmdAstCheck( return cleanExit(); } else if (mem.eql(u8, arg, "-t")) { want_output_text = true; + } else if (mem.eql(u8, arg, "--zon")) { + force_zon = true; } else if (mem.eql(u8, arg, "--color")) { if (i + 1 >= args.len) { fatal("expected [auto|on|off] after --color", .{}); @@ -6110,89 +6115,136 @@ fn cmdAstCheck( file.stat.size = source.len; } + const mode: Ast.Mode = mode: { + if (force_zon) break :mode .zon; + if (zig_source_file) |name| { + if (mem.endsWith(u8, name, ".zon")) { + break :mode .zon; + } + } + break :mode .zig; + }; + file.mod = try Package.Module.createLimited(arena, .{ .root = Path.cwd(), .root_src_path = file.sub_file_path, .fully_qualified_name = "root", }); - file.tree = try Ast.parse(gpa, file.source, .zig); + file.tree = try Ast.parse(gpa, file.source, mode); file.tree_loaded = true; defer file.tree.deinit(gpa); - file.zir = try AstGen.generate(gpa, file.tree); - file.zir_loaded = true; - defer file.zir.deinit(gpa); + switch (mode) { + .zig => { + file.zir = try AstGen.generate(gpa, file.tree); + file.zir_loaded = true; + defer file.zir.deinit(gpa); - if (file.zir.hasCompileErrors()) { - var wip_errors: std.zig.ErrorBundle.Wip = undefined; - try wip_errors.init(gpa); - defer wip_errors.deinit(); - try Compilation.addZirErrorMessages(&wip_errors, &file); - var error_bundle = try wip_errors.toOwnedBundle(""); - defer error_bundle.deinit(gpa); - error_bundle.renderToStdErr(color.renderOptions()); + if (file.zir.hasCompileErrors()) { + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + try Compilation.addZirErrorMessages(&wip_errors, &file); + var error_bundle = try wip_errors.toOwnedBundle(""); + defer error_bundle.deinit(gpa); + error_bundle.renderToStdErr(color.renderOptions()); - if (file.zir.loweringFailed()) { - process.exit(1); - } - } + if (file.zir.loweringFailed()) { + process.exit(1); + } + } - if (!want_output_text) { - if (file.zir.hasCompileErrors()) { - process.exit(1); - } else { + if (!want_output_text) { + if (file.zir.hasCompileErrors()) { + process.exit(1); + } else { + return cleanExit(); + } + } + if (!build_options.enable_debug_extensions) { + fatal("-t option only available in builds of zig with debug extensions", .{}); + } + + { + const token_bytes = @sizeOf(Ast.TokenList) + + file.tree.tokens.len * (@sizeOf(std.zig.Token.Tag) + @sizeOf(Ast.ByteOffset)); + const tree_bytes = @sizeOf(Ast) + file.tree.nodes.len * + (@sizeOf(Ast.Node.Tag) + + @sizeOf(Ast.Node.Data) + + @sizeOf(Ast.TokenIndex)); + const instruction_bytes = file.zir.instructions.len * + // Here we don't use @sizeOf(Zir.Inst.Data) because it would include + // the debug safety tag but we want to measure release size. + (@sizeOf(Zir.Inst.Tag) + 8); + const extra_bytes = file.zir.extra.len * @sizeOf(u32); + const total_bytes = @sizeOf(Zir) + instruction_bytes + extra_bytes + + file.zir.string_bytes.len * @sizeOf(u8); + const stdout = io.getStdOut(); + const fmtIntSizeBin = std.fmt.fmtIntSizeBin; + // zig fmt: off + try stdout.writer().print( + \\# Source bytes: {} + \\# Tokens: {} ({}) + \\# AST Nodes: {} ({}) + \\# Total ZIR bytes: {} + \\# Instructions: {d} ({}) + \\# String Table Bytes: {} + \\# Extra Data Items: {d} ({}) + \\ + , .{ + fmtIntSizeBin(file.source.len), + file.tree.tokens.len, fmtIntSizeBin(token_bytes), + file.tree.nodes.len, fmtIntSizeBin(tree_bytes), + fmtIntSizeBin(total_bytes), + file.zir.instructions.len, fmtIntSizeBin(instruction_bytes), + fmtIntSizeBin(file.zir.string_bytes.len), + file.zir.extra.len, fmtIntSizeBin(extra_bytes), + }); + // zig fmt: on + } + + try @import("print_zir.zig").renderAsTextToFile(gpa, &file, io.getStdOut()); + + if (file.zir.hasCompileErrors()) { + process.exit(1); + } else { + return cleanExit(); + } + }, + .zon => { + const zoir = try ZonGen.generate(gpa, file.tree); + defer zoir.deinit(gpa); + + if (zoir.hasCompileErrors()) { + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + + { + const src_path = try file.fullPath(gpa); + defer gpa.free(src_path); + try wip_errors.addZoirErrorMessages(zoir, file.tree, file.source, src_path); + } + + var error_bundle = try wip_errors.toOwnedBundle(""); + defer error_bundle.deinit(gpa); + error_bundle.renderToStdErr(color.renderOptions()); + + process.exit(1); + } + + if (!want_output_text) { + return cleanExit(); + } + + if (!build_options.enable_debug_extensions) { + fatal("-t option only available in builds of zig with debug extensions", .{}); + } + + try @import("print_zoir.zig").renderToFile(zoir, arena, io.getStdOut()); return cleanExit(); - } - } - if (!build_options.enable_debug_extensions) { - fatal("-t option only available in builds of zig with debug extensions", .{}); - } - - { - const token_bytes = @sizeOf(Ast.TokenList) + - file.tree.tokens.len * (@sizeOf(std.zig.Token.Tag) + @sizeOf(Ast.ByteOffset)); - const tree_bytes = @sizeOf(Ast) + file.tree.nodes.len * - (@sizeOf(Ast.Node.Tag) + - @sizeOf(Ast.Node.Data) + - @sizeOf(Ast.TokenIndex)); - const instruction_bytes = file.zir.instructions.len * - // Here we don't use @sizeOf(Zir.Inst.Data) because it would include - // the debug safety tag but we want to measure release size. - (@sizeOf(Zir.Inst.Tag) + 8); - const extra_bytes = file.zir.extra.len * @sizeOf(u32); - const total_bytes = @sizeOf(Zir) + instruction_bytes + extra_bytes + - file.zir.string_bytes.len * @sizeOf(u8); - const stdout = io.getStdOut(); - const fmtIntSizeBin = std.fmt.fmtIntSizeBin; - // zig fmt: off - try stdout.writer().print( - \\# Source bytes: {} - \\# Tokens: {} ({}) - \\# AST Nodes: {} ({}) - \\# Total ZIR bytes: {} - \\# Instructions: {d} ({}) - \\# String Table Bytes: {} - \\# Extra Data Items: {d} ({}) - \\ - , .{ - fmtIntSizeBin(file.source.len), - file.tree.tokens.len, fmtIntSizeBin(token_bytes), - file.tree.nodes.len, fmtIntSizeBin(tree_bytes), - fmtIntSizeBin(total_bytes), - file.zir.instructions.len, fmtIntSizeBin(instruction_bytes), - fmtIntSizeBin(file.zir.string_bytes.len), - file.zir.extra.len, fmtIntSizeBin(extra_bytes), - }); - // zig fmt: on - } - - try @import("print_zir.zig").renderAsTextToFile(gpa, &file, io.getStdOut()); - - if (file.zir.hasCompileErrors()) { - process.exit(1); - } else { - return cleanExit(); + }, } } diff --git a/src/print_zoir.zig b/src/print_zoir.zig new file mode 100644 index 0000000000..b6cc8fe4d9 --- /dev/null +++ b/src/print_zoir.zig @@ -0,0 +1,122 @@ +pub fn renderToFile(zoir: Zoir, arena: Allocator, f: std.fs.File) (std.fs.File.WriteError || Allocator.Error)!void { + var bw = std.io.bufferedWriter(f.writer()); + try renderToWriter(zoir, arena, bw.writer()); + try bw.flush(); +} + +pub fn renderToWriter(zoir: Zoir, arena: Allocator, w: anytype) (@TypeOf(w).Error || Allocator.Error)!void { + assert(!zoir.hasCompileErrors()); + + const fmtIntSizeBin = std.fmt.fmtIntSizeBin; + const bytes_per_node = comptime n: { + var n: usize = 0; + for (@typeInfo(Zoir.Node.Repr).@"struct".fields) |f| { + n += @sizeOf(f.type); + } + break :n n; + }; + + const node_bytes = zoir.nodes.len * bytes_per_node; + const extra_bytes = zoir.extra.len * @sizeOf(u32); + const limb_bytes = zoir.limbs.len * @sizeOf(std.math.big.Limb); + const string_bytes = zoir.string_bytes.len; + + // zig fmt: off + try w.print( + \\# Nodes: {} ({}) + \\# Extra Data Items: {} ({}) + \\# BigInt Limbs: {} ({}) + \\# String Table Bytes: {} + \\# Total ZON Bytes: {} + \\ + , .{ + zoir.nodes.len, fmtIntSizeBin(node_bytes), + zoir.extra.len, fmtIntSizeBin(extra_bytes), + zoir.limbs.len, fmtIntSizeBin(limb_bytes), + fmtIntSizeBin(string_bytes), + fmtIntSizeBin(node_bytes + extra_bytes + limb_bytes + string_bytes), + }); + // zig fmt: on + var pz: PrintZon = .{ + .w = w.any(), + .arena = arena, + .zoir = zoir, + .indent = 0, + }; + + return @errorCast(pz.renderRoot()); +} + +const PrintZon = struct { + w: std.io.AnyWriter, + arena: Allocator, + zoir: Zoir, + indent: u32, + + fn renderRoot(pz: *PrintZon) anyerror!void { + try pz.renderNode(.root); + try pz.w.writeByte('\n'); + } + + fn renderNode(pz: *PrintZon, node: Zoir.Node.Index) anyerror!void { + const zoir = pz.zoir; + try pz.w.print("%{d} = ", .{@intFromEnum(node)}); + switch (node.get(zoir)) { + .true => try pz.w.writeAll("true"), + .false => try pz.w.writeAll("false"), + .null => try pz.w.writeAll("null"), + .pos_inf => try pz.w.writeAll("inf"), + .neg_inf => try pz.w.writeAll("-inf"), + .nan => try pz.w.writeAll("nan"), + .int_literal => |storage| switch (storage) { + .small => |x| try pz.w.print("int({d})", .{x}), + .big => |x| { + const str = try x.toStringAlloc(pz.arena, 10, .lower); + try pz.w.print("int(big {s})", .{str}); + }, + }, + .float_literal => |x| try pz.w.print("float({d})", .{x}), + .char_literal => |x| try pz.w.print("char({d})", .{x}), + .enum_literal => |x| try pz.w.print("enum_literal({p})", .{std.zig.fmtId(x.get(zoir))}), + .string_literal => |x| try pz.w.print("str(\"{}\")", .{std.zig.fmtEscapes(x)}), + .empty_literal => try pz.w.writeAll("empty_literal(.{})"), + .array_literal => |vals| { + try pz.w.writeAll("array_literal({"); + pz.indent += 1; + for (0..vals.len) |idx| { + try pz.newline(); + try pz.renderNode(vals.at(@intCast(idx))); + try pz.w.writeByte(','); + } + pz.indent -= 1; + try pz.newline(); + try pz.w.writeAll("})"); + }, + .struct_literal => |s| { + try pz.w.writeAll("struct_literal({"); + pz.indent += 1; + for (s.names, 0..s.vals.len) |name, idx| { + try pz.newline(); + try pz.w.print("[{p}] ", .{std.zig.fmtId(name.get(zoir))}); + try pz.renderNode(s.vals.at(@intCast(idx))); + try pz.w.writeByte(','); + } + pz.indent -= 1; + try pz.newline(); + try pz.w.writeAll("})"); + }, + } + } + + fn newline(pz: *PrintZon) !void { + try pz.w.writeByte('\n'); + for (0..pz.indent) |_| { + try pz.w.writeByteNTimes(' ', 2); + } + } +}; + +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const Zoir = std.zig.Zoir;