compiler: introduce ZonGen and make ast-check run it for ZON inputs

Currently, `zig ast-check` fails on ZON files, because it tries to
interpret the file as Zig source code. This commit introduces a new
verification pass, `std.zig.ZonGen`, which applies to an AST in ZON
mode.

Like `AstGen`, this pass also converts the AST into a more helpful
format. Rather than a sequence of instructions like `Zir`, the output
format of `ZonGen` is a new datastructure called `Zoir`. This type is
essentially a simpler form of AST, containing only the information
required for consumers of ZON. It is also far more compact than
`std.zig.Ast`, with the size generally being comparable to the size of
the well-formatted source file.

The emitted `Zoir` is currently not used aside from the `-t` option to
`ast-check` which causes it to be dumped to stdout. However, in future,
it can be used for comptime `@import` of ZON files, as well as for
simpler handling of files like `build.zig.zon`, and even by other parts
of the Zig Standard Library.

Resolves: #22078
This commit is contained in:
mlugg 2024-12-16 00:49:59 +00:00
parent d12c0bf909
commit c7485d73ac
No known key found for this signature in database
GPG Key ID: 3F5B7DCCBF4AF02E
9 changed files with 1530 additions and 190 deletions

View File

@ -14,6 +14,8 @@ pub const isPrimitive = primitives.isPrimitive;
pub const Ast = @import("zig/Ast.zig");
pub const AstGen = @import("zig/AstGen.zig");
pub const Zir = @import("zig/Zir.zig");
pub const Zoir = @import("zig/Zoir.zig");
pub const ZonGen = @import("zig/ZonGen.zig");
pub const system = @import("zig/system.zig");
pub const CrossTarget = @compileError("deprecated; use std.Target.Query");
pub const BuiltinFn = @import("zig/BuiltinFn.zig");

View File

@ -130,6 +130,8 @@ fn appendRefsAssumeCapacity(astgen: *AstGen, refs: []const Zir.Inst.Ref) void {
}
pub fn generate(gpa: Allocator, tree: Ast) Allocator.Error!Zir {
assert(tree.mode == .zig);
var arena = std.heap.ArenaAllocator.init(gpa);
defer arena.deinit();
@ -11413,83 +11415,7 @@ fn parseStrLit(
fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token: Ast.TokenIndex, bytes: []const u8, offset: u32) InnerError {
const raw_string = bytes[offset..];
switch (err) {
.invalid_escape_character => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"invalid escape character: '{c}'",
.{raw_string[bad_index]},
);
},
.expected_hex_digit => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"expected hex digit, found '{c}'",
.{raw_string[bad_index]},
);
},
.empty_unicode_escape_sequence => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"empty unicode escape sequence",
.{},
);
},
.expected_hex_digit_or_rbrace => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"expected hex digit or '}}', found '{c}'",
.{raw_string[bad_index]},
);
},
.invalid_unicode_codepoint => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"unicode escape does not correspond to a valid unicode scalar value",
.{},
);
},
.expected_lbrace => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"expected '{{', found '{c}",
.{raw_string[bad_index]},
);
},
.expected_rbrace => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"expected '}}', found '{c}",
.{raw_string[bad_index]},
);
},
.expected_single_quote => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"expected single quote ('), found '{c}",
.{raw_string[bad_index]},
);
},
.invalid_character => |bad_index| {
return astgen.failOff(
token,
offset + @as(u32, @intCast(bad_index)),
"invalid byte in string or character literal: '{c}'",
.{raw_string[bad_index]},
);
},
.empty_char_literal => {
return astgen.failOff(token, offset, "empty character literal", .{});
},
}
return err.lower(raw_string, offset, AstGen.failOff, .{ astgen, token });
}
fn failNode(
@ -14019,30 +13945,40 @@ fn emitDbgStmtForceCurrentIndex(gz: *GenZir, lc: LineColumn) !void {
}
fn lowerAstErrors(astgen: *AstGen) !void {
const gpa = astgen.gpa;
const tree = astgen.tree;
assert(tree.errors.len > 0);
const gpa = astgen.gpa;
const parse_err = tree.errors[0];
var msg: std.ArrayListUnmanaged(u8) = .empty;
defer msg.deinit(gpa);
var notes: std.ArrayListUnmanaged(u32) = .empty;
defer notes.deinit(gpa);
for (tree.errors[1..]) |note| {
if (!note.is_note) break;
var cur_err = tree.errors[0];
for (tree.errors[1..]) |err| {
if (err.is_note) {
try tree.renderError(err, msg.writer(gpa));
try notes.append(gpa, try astgen.errNoteTok(err.token, "{s}", .{msg.items}));
} else {
// Flush error
const extra_offset = tree.errorOffset(cur_err);
try tree.renderError(cur_err, msg.writer(gpa));
try astgen.appendErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items);
notes.clearRetainingCapacity();
cur_err = err;
// TODO: `Parse` currently does not have good error recovery mechanisms, so the remaining errors could be bogus.
// As such, we'll ignore all remaining errors for now. We should improve `Parse` so that we can report all the errors.
return;
}
msg.clearRetainingCapacity();
try tree.renderError(note, msg.writer(gpa));
try notes.append(gpa, try astgen.errNoteTok(note.token, "{s}", .{msg.items}));
}
const extra_offset = tree.errorOffset(parse_err);
msg.clearRetainingCapacity();
try tree.renderError(parse_err, msg.writer(gpa));
try astgen.appendErrorTokNotesOff(parse_err.token, extra_offset, "{s}", .{msg.items}, notes.items);
// Flush error
const extra_offset = tree.errorOffset(cur_err);
try tree.renderError(cur_err, msg.writer(gpa));
try astgen.appendErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items);
}
const DeclarationName = union(enum) {

View File

@ -507,7 +507,7 @@ pub const Wip = struct {
}
if (item.data.notes != 0) {
const notes_start = try eb.reserveNotes(item.data.notes);
const notes_start = try eb.reserveNotes(item.data.notesLen(zir));
const block = zir.extraData(Zir.Inst.Block, item.data.notes);
const body = zir.extra[block.end..][0..block.data.body_len];
for (notes_start.., body) |note_i, body_elem| {
@ -547,6 +547,77 @@ pub const Wip = struct {
}
}
pub fn addZoirErrorMessages(
eb: *ErrorBundle.Wip,
zoir: std.zig.Zoir,
tree: std.zig.Ast,
source: [:0]const u8,
src_path: []const u8,
) !void {
assert(zoir.hasCompileErrors());
for (zoir.compile_errors) |err| {
const err_span: std.zig.Ast.Span = span: {
if (err.token == std.zig.Zoir.CompileError.invalid_token) {
break :span tree.nodeToSpan(err.node_or_offset);
}
const token_start = tree.tokens.items(.start)[err.token];
const start = token_start + err.node_or_offset;
const end = token_start + @as(u32, @intCast(tree.tokenSlice(err.token).len));
break :span .{ .start = start, .end = end, .main = start };
};
const err_loc = std.zig.findLineColumn(source, err_span.main);
try eb.addRootErrorMessage(.{
.msg = try eb.addString(err.msg.get(zoir)),
.src_loc = try eb.addSourceLocation(.{
.src_path = try eb.addString(src_path),
.span_start = err_span.start,
.span_main = err_span.main,
.span_end = err_span.end,
.line = @intCast(err_loc.line),
.column = @intCast(err_loc.column),
.source_line = try eb.addString(err_loc.source_line),
}),
.notes_len = err.note_count,
});
const notes_start = try eb.reserveNotes(err.note_count);
for (notes_start.., err.first_note.., 0..err.note_count) |eb_note_idx, zoir_note_idx, _| {
const note = zoir.error_notes[zoir_note_idx];
const note_span: std.zig.Ast.Span = span: {
if (note.token == std.zig.Zoir.CompileError.invalid_token) {
break :span tree.nodeToSpan(note.node_or_offset);
}
const token_start = tree.tokens.items(.start)[note.token];
const start = token_start + note.node_or_offset;
const end = token_start + @as(u32, @intCast(tree.tokenSlice(note.token).len));
break :span .{ .start = start, .end = end, .main = start };
};
const note_loc = std.zig.findLineColumn(source, note_span.main);
// This line can cause `wip.extra.items` to be resized.
const note_index = @intFromEnum(try eb.addErrorMessage(.{
.msg = try eb.addString(note.msg.get(zoir)),
.src_loc = try eb.addSourceLocation(.{
.src_path = try eb.addString(src_path),
.span_start = note_span.start,
.span_main = note_span.main,
.span_end = note_span.end,
.line = @intCast(note_loc.line),
.column = @intCast(note_loc.column),
.source_line = if (note_loc.eql(err_loc))
0
else
try eb.addString(note_loc.source_line),
}),
.notes_len = 0,
}));
eb.extra.items[eb_note_idx] = note_index;
}
}
}
fn addOtherMessage(wip: *Wip, other: ErrorBundle, msg_index: MessageIndex) !MessageIndex {
const other_msg = other.getErrorMessage(msg_index);
const src_loc = try wip.addOtherSourceLocation(other, other_msg.src_loc);

239
lib/std/zig/Zoir.zig Normal file
View File

@ -0,0 +1,239 @@
//! Zig Object Intermediate Representation.
//! Simplified AST for the ZON (Zig Object Notation) format.
//! `ZonGen` converts `Ast` to `Zoir`.
nodes: std.MultiArrayList(Node.Repr).Slice,
extra: []u32,
limbs: []std.math.big.Limb,
string_bytes: []u8,
compile_errors: []Zoir.CompileError,
error_notes: []Zoir.CompileError.Note,
pub fn hasCompileErrors(zoir: Zoir) bool {
if (zoir.compile_errors.len > 0) {
assert(zoir.nodes.len == 0);
assert(zoir.extra.len == 0);
assert(zoir.limbs.len == 0);
return true;
} else {
assert(zoir.error_notes.len == 0);
return false;
}
}
pub fn deinit(zoir: Zoir, gpa: Allocator) void {
var nodes = zoir.nodes;
nodes.deinit(gpa);
gpa.free(zoir.extra);
gpa.free(zoir.limbs);
gpa.free(zoir.string_bytes);
gpa.free(zoir.compile_errors);
gpa.free(zoir.error_notes);
}
pub const Node = union(enum) {
/// A literal `true` value.
true,
/// A literal `false` value.
false,
/// A literal `null` value.
null,
/// A literal `inf` value.
pos_inf,
/// A literal `-inf` value.
neg_inf,
/// A literal `nan` value.
nan,
/// An integer literal.
int_literal: union(enum) {
small: i32,
big: std.math.big.int.Const,
},
/// A floating-point literal.
float_literal: f128,
/// A Unicode codepoint literal.
char_literal: u32,
/// An enum literal. The string is the literal, i.e. `foo` for `.foo`.
enum_literal: NullTerminatedString,
/// A string literal.
string_literal: []const u8,
/// An empty struct/array literal, i.e. `.{}`.
empty_literal,
/// An array literal. The `Range` gives the elements of the array literal.
array_literal: Node.Index.Range,
/// A struct literal. `names.len` is always equal to `vals.len`.
struct_literal: struct {
names: []const NullTerminatedString,
vals: Node.Index.Range,
},
pub const Index = enum(u32) {
root = 0,
_,
pub fn get(idx: Index, zoir: Zoir) Node {
const repr = zoir.nodes.get(@intFromEnum(idx));
return switch (repr.tag) {
.true => .true,
.false => .false,
.null => .null,
.pos_inf => .pos_inf,
.neg_inf => .neg_inf,
.nan => .nan,
.int_literal_small => .{ .int_literal = .{ .small = @bitCast(repr.data) } },
.int_literal_pos, .int_literal_neg => .{ .int_literal = .{ .big = .{
.limbs = l: {
const limb_count, const limbs_idx = zoir.extra[repr.data..][0..2].*;
break :l zoir.limbs[limbs_idx..][0..limb_count];
},
.positive = switch (repr.tag) {
.int_literal_pos => true,
.int_literal_neg => false,
else => unreachable,
},
} } },
.float_literal_small => .{ .float_literal = @as(f32, @bitCast(repr.data)) },
.float_literal => .{ .float_literal = @bitCast(zoir.extra[repr.data..][0..4].*) },
.char_literal => .{ .char_literal = repr.data },
.enum_literal => .{ .enum_literal = @enumFromInt(repr.data) },
.string_literal => .{ .string_literal = s: {
const start, const len = zoir.extra[repr.data..][0..2].*;
break :s zoir.string_bytes[start..][0..len];
} },
.string_literal_null => .{ .string_literal = NullTerminatedString.get(@enumFromInt(repr.data), zoir) },
.empty_literal => .empty_literal,
.array_literal => .{ .array_literal = a: {
const elem_count, const first_elem = zoir.extra[repr.data..][0..2].*;
break :a .{ .start = @enumFromInt(first_elem), .len = elem_count };
} },
.struct_literal => .{ .struct_literal = s: {
const elem_count, const first_elem = zoir.extra[repr.data..][0..2].*;
const field_names = zoir.extra[repr.data + 2 ..][0..elem_count];
break :s .{
.names = @ptrCast(field_names),
.vals = .{ .start = @enumFromInt(first_elem), .len = elem_count },
};
} },
};
}
pub fn getAstNode(idx: Index, zoir: Zoir) std.zig.Ast.Node.Index {
return zoir.nodes.items(.ast_node)[@intFromEnum(idx)];
}
pub const Range = struct {
start: Index,
len: u32,
pub fn at(r: Range, i: u32) Index {
assert(i < r.len);
return @enumFromInt(@intFromEnum(r.start) + i);
}
};
};
pub const Repr = struct {
tag: Tag,
data: u32,
ast_node: std.zig.Ast.Node.Index,
pub const Tag = enum(u8) {
/// `data` is ignored.
true,
/// `data` is ignored.
false,
/// `data` is ignored.
null,
/// `data` is ignored.
pos_inf,
/// `data` is ignored.
neg_inf,
/// `data` is ignored.
nan,
/// `data` is the `i32` value.
int_literal_small,
/// `data` is index into `extra` of:
/// * `limb_count: u32`
/// * `limbs_idx: u32`
int_literal_pos,
/// Identical to `int_literal_pos`, except the value is negative.
int_literal_neg,
/// `data` is the `f32` value.
float_literal_small,
/// `data` is index into `extra` of 4 elements which are a bitcast `f128`.
float_literal,
/// `data` is the `u32` value.
char_literal,
/// `data` is a `NullTerminatedString`.
enum_literal,
/// `data` is index into `extra` of:
/// * `start: u32`
/// * `len: u32`
string_literal,
/// Null-terminated string literal,
/// `data` is a `NullTerminatedString`.
string_literal_null,
/// An empty struct/array literal, `.{}`.
/// `data` is ignored.
empty_literal,
/// `data` is index into `extra` of:
/// * `elem_count: u32`
/// * `first_elem: Node.Index`
/// The nodes `first_elem .. first_elem + elem_count` are the children.
array_literal,
/// `data` is index into `extra` of:
/// * `elem_count: u32`
/// * `first_elem: Node.Index`
/// * `field_name: NullTerminatedString` for each `elem_count`
/// The nodes `first_elem .. first_elem + elem_count` are the children.
struct_literal,
};
};
};
pub const NullTerminatedString = enum(u32) {
_,
pub fn get(nts: NullTerminatedString, zoir: Zoir) [:0]const u8 {
const idx = std.mem.indexOfScalar(u8, zoir.string_bytes[@intFromEnum(nts)..], 0).?;
return zoir.string_bytes[@intFromEnum(nts)..][0..idx :0];
}
};
pub const CompileError = extern struct {
msg: NullTerminatedString,
token: Ast.TokenIndex,
/// If `token == invalid_token`, this is an `Ast.Node.Index`.
/// Otherwise, this is a byte offset into `token`.
node_or_offset: u32,
/// Ignored if `note_count == 0`.
first_note: u32,
note_count: u32,
pub fn getNotes(err: CompileError, zoir: Zoir) []const Note {
return zoir.error_notes[err.first_note..][0..err.note_count];
}
pub const Note = extern struct {
msg: NullTerminatedString,
token: Ast.TokenIndex,
/// If `token == invalid_token`, this is an `Ast.Node.Index`.
/// Otherwise, this is a byte offset into `token`.
node_or_offset: u32,
};
pub const invalid_token: Ast.TokenIndex = std.math.maxInt(Ast.TokenIndex);
comptime {
assert(std.meta.hasUniqueRepresentation(CompileError));
assert(std.meta.hasUniqueRepresentation(Note));
}
};
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const Ast = std.zig.Ast;
const Zoir = @This();

835
lib/std/zig/ZonGen.zig Normal file
View File

@ -0,0 +1,835 @@
//! Ingests an `Ast` and produces a `Zoir`.
gpa: Allocator,
tree: Ast,
nodes: std.MultiArrayList(Zoir.Node.Repr),
extra: std.ArrayListUnmanaged(u32),
limbs: std.ArrayListUnmanaged(std.math.big.Limb),
string_bytes: std.ArrayListUnmanaged(u8),
string_table: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage),
compile_errors: std.ArrayListUnmanaged(Zoir.CompileError),
error_notes: std.ArrayListUnmanaged(Zoir.CompileError.Note),
pub fn generate(gpa: Allocator, tree: Ast) Allocator.Error!Zoir {
assert(tree.mode == .zon);
var zg: ZonGen = .{
.gpa = gpa,
.tree = tree,
.nodes = .empty,
.extra = .empty,
.limbs = .empty,
.string_bytes = .empty,
.string_table = .empty,
.compile_errors = .empty,
.error_notes = .empty,
};
defer {
zg.nodes.deinit(gpa);
zg.extra.deinit(gpa);
zg.limbs.deinit(gpa);
zg.string_bytes.deinit(gpa);
zg.string_table.deinit(gpa);
zg.compile_errors.deinit(gpa);
zg.error_notes.deinit(gpa);
}
if (tree.errors.len == 0) {
const root_ast_node = tree.nodes.items(.data)[0].lhs;
try zg.nodes.append(gpa, undefined); // index 0; root node
try zg.expr(root_ast_node, .root);
} else {
try zg.lowerAstErrors();
}
if (zg.compile_errors.items.len > 0) {
const string_bytes = try zg.string_bytes.toOwnedSlice(gpa);
errdefer gpa.free(string_bytes);
const compile_errors = try zg.compile_errors.toOwnedSlice(gpa);
errdefer gpa.free(compile_errors);
const error_notes = try zg.error_notes.toOwnedSlice(gpa);
errdefer gpa.free(error_notes);
return .{
.nodes = .empty,
.extra = &.{},
.limbs = &.{},
.string_bytes = string_bytes,
.compile_errors = compile_errors,
.error_notes = error_notes,
};
} else {
assert(zg.error_notes.items.len == 0);
var nodes = zg.nodes.toOwnedSlice();
errdefer nodes.deinit(gpa);
const extra = try zg.extra.toOwnedSlice(gpa);
errdefer gpa.free(extra);
const limbs = try zg.limbs.toOwnedSlice(gpa);
errdefer gpa.free(limbs);
const string_bytes = try zg.string_bytes.toOwnedSlice(gpa);
errdefer gpa.free(string_bytes);
return .{
.nodes = nodes,
.extra = extra,
.limbs = limbs,
.string_bytes = string_bytes,
.compile_errors = &.{},
.error_notes = &.{},
};
}
}
fn expr(zg: *ZonGen, node: Ast.Node.Index, dest_node: Zoir.Node.Index) Allocator.Error!void {
const gpa = zg.gpa;
const tree = zg.tree;
const node_tags = tree.nodes.items(.tag);
const node_datas = tree.nodes.items(.data);
const main_tokens = tree.nodes.items(.main_token);
switch (node_tags[node]) {
.root => unreachable,
.@"usingnamespace" => unreachable,
.test_decl => unreachable,
.container_field_init => unreachable,
.container_field_align => unreachable,
.container_field => unreachable,
.fn_decl => unreachable,
.global_var_decl => unreachable,
.local_var_decl => unreachable,
.simple_var_decl => unreachable,
.aligned_var_decl => unreachable,
.@"defer" => unreachable,
.@"errdefer" => unreachable,
.switch_case => unreachable,
.switch_case_inline => unreachable,
.switch_case_one => unreachable,
.switch_case_inline_one => unreachable,
.switch_range => unreachable,
.asm_output => unreachable,
.asm_input => unreachable,
.for_range => unreachable,
.assign => unreachable,
.assign_destructure => unreachable,
.assign_shl => unreachable,
.assign_shl_sat => unreachable,
.assign_shr => unreachable,
.assign_bit_and => unreachable,
.assign_bit_or => unreachable,
.assign_bit_xor => unreachable,
.assign_div => unreachable,
.assign_sub => unreachable,
.assign_sub_wrap => unreachable,
.assign_sub_sat => unreachable,
.assign_mod => unreachable,
.assign_add => unreachable,
.assign_add_wrap => unreachable,
.assign_add_sat => unreachable,
.assign_mul => unreachable,
.assign_mul_wrap => unreachable,
.assign_mul_sat => unreachable,
.shl,
.shr,
.add,
.add_wrap,
.add_sat,
.sub,
.sub_wrap,
.sub_sat,
.mul,
.mul_wrap,
.mul_sat,
.div,
.mod,
.shl_sat,
.bit_and,
.bit_or,
.bit_xor,
.bang_equal,
.equal_equal,
.greater_than,
.greater_or_equal,
.less_than,
.less_or_equal,
.array_cat,
.array_mult,
.bool_and,
.bool_or,
.bool_not,
.bit_not,
.negation_wrap,
=> try zg.addErrorTok(main_tokens[node], "operator '{s}' is not allowed in ZON", .{tree.tokenSlice(main_tokens[node])}),
.error_union,
.merge_error_sets,
.optional_type,
.anyframe_literal,
.anyframe_type,
.ptr_type_aligned,
.ptr_type_sentinel,
.ptr_type,
.ptr_type_bit_range,
.container_decl,
.container_decl_trailing,
.container_decl_arg,
.container_decl_arg_trailing,
.container_decl_two,
.container_decl_two_trailing,
.tagged_union,
.tagged_union_trailing,
.tagged_union_enum_tag,
.tagged_union_enum_tag_trailing,
.tagged_union_two,
.tagged_union_two_trailing,
.array_type,
.array_type_sentinel,
.error_set_decl,
.fn_proto_simple,
.fn_proto_multi,
.fn_proto_one,
.fn_proto,
=> try zg.addErrorNode(node, "types are not available in ZON", .{}),
.call_one,
.call_one_comma,
.async_call_one,
.async_call_one_comma,
.call,
.call_comma,
.async_call,
.async_call_comma,
.@"return",
.if_simple,
.@"if",
.while_simple,
.while_cont,
.@"while",
.for_simple,
.@"for",
.@"catch",
.@"orelse",
.@"break",
.@"continue",
.@"switch",
.switch_comma,
.@"nosuspend",
.@"suspend",
.@"await",
.@"resume",
.@"try",
.unreachable_literal,
=> try zg.addErrorNode(node, "control flow is not allowed in ZON", .{}),
.@"comptime" => try zg.addErrorNode(node, "keyword 'comptime' is not allowed in ZON", .{}),
.asm_simple, .@"asm" => try zg.addErrorNode(node, "inline asm is not allowed in ZON", .{}),
.builtin_call_two,
.builtin_call_two_comma,
.builtin_call,
.builtin_call_comma,
=> try zg.addErrorNode(node, "builtin function calls are not allowed in ZON", .{}),
.field_access => try zg.addErrorNode(node, "field accesses are not allowed in ZON", .{}),
.slice_open,
.slice,
.slice_sentinel,
=> try zg.addErrorNode(node, "slice operator is not allowed in ZON", .{}),
.deref, .address_of => try zg.addErrorTok(main_tokens[node], "pointers are not available in ZON", .{}),
.unwrap_optional => try zg.addErrorTok(main_tokens[node], "optionals are not available in ZON", .{}),
.error_value => try zg.addErrorNode(node, "errors are not available in ZON", .{}),
.array_access => try zg.addErrorTok(node, "array indexing is not allowed in ZON", .{}),
.block_two,
.block_two_semicolon,
.block,
.block_semicolon,
=> try zg.addErrorNode(node, "blocks are not allowed in ZON", .{}),
.array_init_one,
.array_init_one_comma,
.array_init,
.array_init_comma,
.struct_init_one,
.struct_init_one_comma,
.struct_init,
.struct_init_comma,
=> {
var buf: [2]Ast.Node.Index = undefined;
const type_node = if (tree.fullArrayInit(&buf, node)) |full|
full.ast.type_expr
else if (tree.fullStructInit(&buf, node)) |full|
full.ast.type_expr
else
unreachable;
try zg.addErrorNodeNotes(type_node, "types are not available in ZON", .{}, &.{
try zg.errNoteNode(type_node, "replace the type with '.'", .{}),
});
},
.grouped_expression => {
try zg.addErrorTokNotes(main_tokens[node], "expression grouping is not allowed in ZON", .{}, &.{
try zg.errNoteTok(main_tokens[node], "these parentheses are always redundant", .{}),
});
return zg.expr(node_datas[node].lhs, dest_node);
},
.negation => {
const child_node = node_datas[node].lhs;
switch (node_tags[child_node]) {
.number_literal => return zg.numberLiteral(child_node, node, dest_node, .negative),
.identifier => {
const child_ident = tree.tokenSlice(main_tokens[child_node]);
if (mem.eql(u8, child_ident, "inf")) {
zg.setNode(dest_node, .{
.tag = .neg_inf,
.data = 0, // ignored
.ast_node = node,
});
return;
}
},
else => {},
}
try zg.addErrorTok(main_tokens[node], "expected number or 'inf' after '-'", .{});
},
.number_literal => try zg.numberLiteral(node, node, dest_node, .positive),
.char_literal => try zg.charLiteral(node, dest_node),
.identifier => try zg.identifier(node, dest_node),
.enum_literal => {
const str_index = zg.identAsString(main_tokens[node]) catch |err| switch (err) {
error.BadString => undefined, // doesn't matter, there's an error
error.OutOfMemory => |e| return e,
};
zg.setNode(dest_node, .{
.tag = .enum_literal,
.data = @intFromEnum(str_index),
.ast_node = node,
});
},
.string_literal, .multiline_string_literal => if (zg.strLitAsString(node)) |result| switch (result) {
.nts => |nts| zg.setNode(dest_node, .{
.tag = .string_literal_null,
.data = @intFromEnum(nts),
.ast_node = node,
}),
.slice => |slice| {
const extra_index: u32 = @intCast(zg.extra.items.len);
try zg.extra.appendSlice(zg.gpa, &.{ slice.start, slice.len });
zg.setNode(dest_node, .{
.tag = .string_literal,
.data = extra_index,
.ast_node = node,
});
},
} else |err| switch (err) {
error.BadString => {},
error.OutOfMemory => |e| return e,
},
.array_init_dot_two,
.array_init_dot_two_comma,
.array_init_dot,
.array_init_dot_comma,
=> {
var buf: [2]Ast.Node.Index = undefined;
const full = tree.fullArrayInit(&buf, node).?;
assert(full.ast.elements.len != 0); // Otherwise it would be a struct init
assert(full.ast.type_expr == 0); // The tag was `array_init_dot_*`
const first_elem: u32 = @intCast(zg.nodes.len);
try zg.nodes.resize(gpa, zg.nodes.len + full.ast.elements.len);
const extra_index: u32 = @intCast(zg.extra.items.len);
try zg.extra.appendSlice(gpa, &.{
@intCast(full.ast.elements.len),
first_elem,
});
zg.setNode(dest_node, .{
.tag = .array_literal,
.data = extra_index,
.ast_node = node,
});
for (full.ast.elements, first_elem..) |elem_node, elem_dest_node| {
try zg.expr(elem_node, @enumFromInt(elem_dest_node));
}
},
.struct_init_dot_two,
.struct_init_dot_two_comma,
.struct_init_dot,
.struct_init_dot_comma,
=> {
var buf: [2]Ast.Node.Index = undefined;
const full = tree.fullStructInit(&buf, node).?;
assert(full.ast.type_expr == 0); // The tag was `struct_init_dot_*`
if (full.ast.fields.len == 0) {
zg.setNode(dest_node, .{
.tag = .empty_literal,
.data = 0, // ignored
.ast_node = node,
});
return;
}
const first_elem: u32 = @intCast(zg.nodes.len);
try zg.nodes.resize(gpa, zg.nodes.len + full.ast.fields.len);
const extra_index: u32 = @intCast(zg.extra.items.len);
try zg.extra.ensureUnusedCapacity(gpa, 2 + full.ast.fields.len);
zg.extra.appendSliceAssumeCapacity(&.{
@intCast(full.ast.fields.len),
first_elem,
});
const names_start = extra_index + 2;
zg.extra.appendNTimesAssumeCapacity(undefined, full.ast.fields.len);
zg.setNode(dest_node, .{
.tag = .struct_literal,
.data = extra_index,
.ast_node = node,
});
for (full.ast.fields, names_start.., first_elem..) |elem_node, extra_name_idx, elem_dest_node| {
const name_token = tree.firstToken(elem_node) - 2;
zg.extra.items[extra_name_idx] = @intFromEnum(zg.identAsString(name_token) catch |err| switch (err) {
error.BadString => undefined, // doesn't matter, there's an error
error.OutOfMemory => |e| return e,
});
try zg.expr(elem_node, @enumFromInt(elem_dest_node));
}
},
}
}
fn parseStrLit(zg: *ZonGen, token: Ast.TokenIndex, offset: u32) !u32 {
const raw_string = zg.tree.tokenSlice(token)[offset..];
const start = zg.string_bytes.items.len;
switch (try std.zig.string_literal.parseWrite(zg.string_bytes.writer(zg.gpa), raw_string)) {
.success => return @intCast(start),
.failure => |err| {
try zg.lowerStrLitError(err, token, raw_string, offset);
return error.BadString;
},
}
}
fn parseMultilineStrLit(zg: *ZonGen, node: Ast.Node.Index) !u32 {
const gpa = zg.gpa;
const tree = zg.tree;
const string_bytes = &zg.string_bytes;
const first_tok, const last_tok = bounds: {
const node_data = tree.nodes.items(.data)[node];
break :bounds .{ node_data.lhs, node_data.rhs };
};
const str_index: u32 = @intCast(string_bytes.items.len);
// First line: do not append a newline.
{
const line_bytes = tree.tokenSlice(first_tok)[2..];
try string_bytes.appendSlice(gpa, line_bytes);
}
// Following lines: each line prepends a newline.
for (first_tok + 1..last_tok + 1) |tok_idx| {
const line_bytes = tree.tokenSlice(@intCast(tok_idx))[2..];
try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1);
string_bytes.appendAssumeCapacity('\n');
string_bytes.appendSliceAssumeCapacity(line_bytes);
}
return @intCast(str_index);
}
fn appendIdentStr(zg: *ZonGen, ident_token: Ast.TokenIndex) !u32 {
const tree = zg.tree;
assert(tree.tokens.items(.tag)[ident_token] == .identifier);
const ident_name = tree.tokenSlice(ident_token);
if (!mem.startsWith(u8, ident_name, "@")) {
const start = zg.string_bytes.items.len;
try zg.string_bytes.appendSlice(zg.gpa, ident_name);
return @intCast(start);
} else {
const start = try zg.parseStrLit(ident_token, 1);
const slice = zg.string_bytes.items[start..];
if (mem.indexOfScalar(u8, slice, 0) != null) {
try zg.addErrorTok(ident_token, "identifier cannot contain null bytes", .{});
return error.BadString;
} else if (slice.len == 0) {
try zg.addErrorTok(ident_token, "identifier cannot be empty", .{});
return error.BadString;
}
return start;
}
}
const StringLiteralResult = union(enum) {
nts: Zoir.NullTerminatedString,
slice: struct { start: u32, len: u32 },
};
fn strLitAsString(zg: *ZonGen, str_node: Ast.Node.Index) !StringLiteralResult {
const gpa = zg.gpa;
const string_bytes = &zg.string_bytes;
const str_index = switch (zg.tree.nodes.items(.tag)[str_node]) {
.string_literal => try zg.parseStrLit(zg.tree.nodes.items(.main_token)[str_node], 0),
.multiline_string_literal => try zg.parseMultilineStrLit(str_node),
else => unreachable,
};
const key: []const u8 = string_bytes.items[str_index..];
if (std.mem.indexOfScalar(u8, key, 0) != null) return .{ .slice = .{
.start = str_index,
.len = @intCast(key.len),
} };
const gop = try zg.string_table.getOrPutContextAdapted(
gpa,
key,
StringIndexAdapter{ .bytes = string_bytes },
StringIndexContext{ .bytes = string_bytes },
);
if (gop.found_existing) {
string_bytes.shrinkRetainingCapacity(str_index);
return .{ .nts = @enumFromInt(gop.key_ptr.*) };
}
gop.key_ptr.* = str_index;
try string_bytes.append(gpa, 0);
return .{ .nts = @enumFromInt(str_index) };
}
fn identAsString(zg: *ZonGen, ident_token: Ast.TokenIndex) !Zoir.NullTerminatedString {
const gpa = zg.gpa;
const string_bytes = &zg.string_bytes;
const str_index = try zg.appendIdentStr(ident_token);
const key: []const u8 = string_bytes.items[str_index..];
const gop = try zg.string_table.getOrPutContextAdapted(
gpa,
key,
StringIndexAdapter{ .bytes = string_bytes },
StringIndexContext{ .bytes = string_bytes },
);
if (gop.found_existing) {
string_bytes.shrinkRetainingCapacity(str_index);
return @enumFromInt(gop.key_ptr.*);
}
gop.key_ptr.* = str_index;
try string_bytes.append(gpa, 0);
return @enumFromInt(str_index);
}
fn numberLiteral(zg: *ZonGen, num_node: Ast.Node.Index, src_node: Ast.Node.Index, dest_node: Zoir.Node.Index, sign: enum { negative, positive }) !void {
const tree = zg.tree;
const num_token = tree.nodes.items(.main_token)[num_node];
const num_bytes = tree.tokenSlice(num_token);
switch (std.zig.parseNumberLiteral(num_bytes)) {
.int => |unsigned_num| {
if (unsigned_num == 0 and sign == .negative) {
try zg.addErrorTokNotes(num_token, "integer literal '-0' is ambiguous", .{}, &.{
try zg.errNoteTok(num_token, "use '0' for an integer zero", .{}),
try zg.errNoteTok(num_token, "use '-0.0' for a flaoting-point signed zero", .{}),
});
return;
}
const num: i65 = switch (sign) {
.positive => unsigned_num,
.negative => -@as(i65, unsigned_num),
};
if (std.math.cast(i32, num)) |x| {
zg.setNode(dest_node, .{
.tag = .int_literal_small,
.data = @bitCast(x),
.ast_node = src_node,
});
return;
}
const max_limbs = comptime std.math.big.int.calcTwosCompLimbCount(@bitSizeOf(@TypeOf(num)));
var limbs: [max_limbs]std.math.big.Limb = undefined;
var big_int: std.math.big.int.Mutable = .init(&limbs, num);
try zg.setBigIntLiteralNode(dest_node, src_node, big_int.toConst());
},
.big_int => |base| {
const gpa = zg.gpa;
const num_without_prefix = switch (base) {
.decimal => num_bytes,
.hex, .binary, .octal => num_bytes[2..],
};
var big_int: std.math.big.int.Managed = try .init(gpa);
defer big_int.deinit();
big_int.setString(@intFromEnum(base), num_without_prefix) catch |err| switch (err) {
error.InvalidCharacter => unreachable, // caught in `parseNumberLiteral`
error.InvalidBase => unreachable, // we only pass 16, 8, 2, see above
error.OutOfMemory => return error.OutOfMemory,
};
switch (sign) {
.positive => {},
.negative => big_int.negate(),
}
try zg.setBigIntLiteralNode(dest_node, src_node, big_int.toConst());
},
.float => {
const unsigned_num = std.fmt.parseFloat(f128, num_bytes) catch |err| switch (err) {
error.InvalidCharacter => unreachable, // validated by tokenizer
};
const num: f128 = switch (sign) {
.positive => unsigned_num,
.negative => -unsigned_num,
};
{
// If the value fits into an f32 without losing any precision, store it that way.
@setFloatMode(.strict);
const smaller_float: f32 = @floatCast(num);
const bigger_again: f128 = smaller_float;
if (bigger_again == num) {
zg.setNode(dest_node, .{
.tag = .float_literal_small,
.data = @bitCast(smaller_float),
.ast_node = src_node,
});
return;
}
}
const elems: [4]u32 = @bitCast(num);
const extra_index: u32 = @intCast(zg.extra.items.len);
try zg.extra.appendSlice(zg.gpa, &elems);
zg.setNode(dest_node, .{
.tag = .float_literal,
.data = extra_index,
.ast_node = src_node,
});
},
.failure => |err| try zg.lowerNumberError(err, num_token, num_bytes),
}
}
fn setBigIntLiteralNode(zg: *ZonGen, dest_node: Zoir.Node.Index, src_node: Ast.Node.Index, val: std.math.big.int.Const) !void {
try zg.extra.ensureUnusedCapacity(zg.gpa, 2);
try zg.limbs.ensureUnusedCapacity(zg.gpa, val.limbs.len);
const limbs_idx: u32 = @intCast(zg.limbs.items.len);
zg.limbs.appendSliceAssumeCapacity(val.limbs);
const extra_idx: u32 = @intCast(zg.extra.items.len);
zg.extra.appendSliceAssumeCapacity(&.{ @intCast(val.limbs.len), limbs_idx });
zg.setNode(dest_node, .{
.tag = if (val.positive) .int_literal_pos else .int_literal_neg,
.data = extra_idx,
.ast_node = src_node,
});
}
fn charLiteral(zg: *ZonGen, node: Ast.Node.Index, dest_node: Zoir.Node.Index) !void {
const tree = zg.tree;
assert(tree.nodes.items(.tag)[node] == .char_literal);
const main_token = tree.nodes.items(.main_token)[node];
const slice = tree.tokenSlice(main_token);
switch (std.zig.parseCharLiteral(slice)) {
.success => |codepoint| zg.setNode(dest_node, .{
.tag = .char_literal,
.data = codepoint,
.ast_node = node,
}),
.failure => |err| try zg.lowerStrLitError(err, main_token, slice, 0),
}
}
fn identifier(zg: *ZonGen, node: Ast.Node.Index, dest_node: Zoir.Node.Index) !void {
const tree = zg.tree;
assert(tree.nodes.items(.tag)[node] == .identifier);
const main_token = tree.nodes.items(.main_token)[node];
const ident = tree.tokenSlice(main_token);
const tag: Zoir.Node.Repr.Tag = t: {
if (mem.eql(u8, ident, "true")) break :t .true;
if (mem.eql(u8, ident, "false")) break :t .false;
if (mem.eql(u8, ident, "null")) break :t .null;
if (mem.eql(u8, ident, "inf")) break :t .pos_inf;
if (mem.eql(u8, ident, "nan")) break :t .nan;
try zg.addErrorNodeNotes(node, "invalid expression", .{}, &.{
try zg.errNoteNode(node, "ZON allows identifiers 'true', 'false', 'null', 'inf', and 'nan'", .{}),
try zg.errNoteNode(node, "precede identifier with '.' for an enum literal", .{}),
});
return;
};
zg.setNode(dest_node, .{
.tag = tag,
.data = 0, // ignored
.ast_node = node,
});
}
fn setNode(zg: *ZonGen, dest: Zoir.Node.Index, repr: Zoir.Node.Repr) void {
zg.nodes.set(@intFromEnum(dest), repr);
}
fn lowerStrLitError(zg: *ZonGen, err: std.zig.string_literal.Error, token: Ast.TokenIndex, raw_string: []const u8, offset: u32) Allocator.Error!void {
return err.lower(raw_string, offset, ZonGen.addErrorTokOff, .{ zg, token });
}
fn lowerNumberError(zg: *ZonGen, err: std.zig.number_literal.Error, token: Ast.TokenIndex, bytes: []const u8) Allocator.Error!void {
const is_float = std.mem.indexOfScalar(u8, bytes, '.') != null;
switch (err) {
.leading_zero => if (is_float) {
try zg.addErrorTok(token, "number '{s}' has leading zero", .{bytes});
} else {
try zg.addErrorTokNotes(token, "number '{s}' has leading zero", .{bytes}, &.{
try zg.errNoteTok(token, "use '0o' prefix for octal literals", .{}),
});
},
.digit_after_base => try zg.addErrorTok(token, "expected a digit after base prefix", .{}),
.upper_case_base => |i| try zg.addErrorTokOff(token, @intCast(i), "base prefix must be lowercase", .{}),
.invalid_float_base => |i| try zg.addErrorTokOff(token, @intCast(i), "invalid base for float literal", .{}),
.repeated_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "repeated digit separator", .{}),
.invalid_underscore_after_special => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit before digit separator", .{}),
.invalid_digit => |info| try zg.addErrorTokOff(token, @intCast(info.i), "invalid digit '{c}' for {s} base", .{ bytes[info.i], @tagName(info.base) }),
.invalid_digit_exponent => |i| try zg.addErrorTokOff(token, @intCast(i), "invalid digit '{c}' in exponent", .{bytes[i]}),
.duplicate_exponent => |i| try zg.addErrorTokOff(token, @intCast(i), "duplicate exponent", .{}),
.exponent_after_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit before exponent", .{}),
.special_after_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit before '{c}'", .{bytes[i]}),
.trailing_special => |i| try zg.addErrorTokOff(token, @intCast(i), "expected digit after '{c}'", .{bytes[i - 1]}),
.trailing_underscore => |i| try zg.addErrorTokOff(token, @intCast(i), "trailing digit separator", .{}),
.duplicate_period => unreachable, // Validated by tokenizer
.invalid_character => unreachable, // Validated by tokenizer
.invalid_exponent_sign => |i| {
assert(bytes.len >= 2 and bytes[0] == '0' and bytes[1] == 'x'); // Validated by tokenizer
try zg.addErrorTokOff(token, @intCast(i), "sign '{c}' cannot follow digit '{c}' in hex base", .{ bytes[i], bytes[i - 1] });
},
.period_after_exponent => |i| try zg.addErrorTokOff(token, @intCast(i), "unexpected period after exponent", .{}),
}
}
fn errNoteNode(zg: *ZonGen, node: Ast.Node.Index, comptime format: []const u8, args: anytype) Allocator.Error!Zoir.CompileError.Note {
const message_idx: u32 = @intCast(zg.string_bytes.items.len);
const writer = zg.string_bytes.writer(zg.gpa);
try writer.print(format, args);
try writer.writeByte(0);
return .{
.msg = @enumFromInt(message_idx),
.token = Zoir.CompileError.invalid_token,
.node_or_offset = node,
};
}
fn errNoteTok(zg: *ZonGen, tok: Ast.TokenIndex, comptime format: []const u8, args: anytype) Allocator.Error!Zoir.CompileError.Note {
const message_idx: u32 = @intCast(zg.string_bytes.items.len);
const writer = zg.string_bytes.writer(zg.gpa);
try writer.print(format, args);
try writer.writeByte(0);
return .{
.msg = @enumFromInt(message_idx),
.token = tok,
.node_or_offset = 0,
};
}
fn addErrorNode(zg: *ZonGen, node: Ast.Node.Index, comptime format: []const u8, args: anytype) Allocator.Error!void {
return zg.addErrorInner(Zoir.CompileError.invalid_token, node, format, args, &.{});
}
fn addErrorTok(zg: *ZonGen, tok: Ast.TokenIndex, comptime format: []const u8, args: anytype) Allocator.Error!void {
return zg.addErrorInner(tok, 0, format, args, &.{});
}
fn addErrorNodeNotes(zg: *ZonGen, node: Ast.Node.Index, comptime format: []const u8, args: anytype, notes: []const Zoir.CompileError.Note) Allocator.Error!void {
return zg.addErrorInner(Zoir.CompileError.invalid_token, node, format, args, notes);
}
fn addErrorTokNotes(zg: *ZonGen, tok: Ast.TokenIndex, comptime format: []const u8, args: anytype, notes: []const Zoir.CompileError.Note) Allocator.Error!void {
return zg.addErrorInner(tok, 0, format, args, notes);
}
fn addErrorTokOff(zg: *ZonGen, tok: Ast.TokenIndex, offset: u32, comptime format: []const u8, args: anytype) Allocator.Error!void {
return zg.addErrorInner(tok, offset, format, args, &.{});
}
fn addErrorTokNotesOff(zg: *ZonGen, tok: Ast.TokenIndex, offset: u32, comptime format: []const u8, args: anytype, notes: []const Zoir.CompileError.Note) Allocator.Error!void {
return zg.addErrorInner(tok, offset, format, args, notes);
}
fn addErrorInner(
zg: *ZonGen,
token: Ast.TokenIndex,
node_or_offset: u32,
comptime format: []const u8,
args: anytype,
notes: []const Zoir.CompileError.Note,
) Allocator.Error!void {
const gpa = zg.gpa;
const first_note: u32 = @intCast(zg.error_notes.items.len);
try zg.error_notes.appendSlice(gpa, notes);
const message_idx: u32 = @intCast(zg.string_bytes.items.len);
const writer = zg.string_bytes.writer(zg.gpa);
try writer.print(format, args);
try writer.writeByte(0);
try zg.compile_errors.append(gpa, .{
.msg = @enumFromInt(message_idx),
.token = token,
.node_or_offset = node_or_offset,
.first_note = first_note,
.note_count = @intCast(notes.len),
});
}
fn lowerAstErrors(zg: *ZonGen) Allocator.Error!void {
const gpa = zg.gpa;
const tree = zg.tree;
assert(tree.errors.len > 0);
var msg: std.ArrayListUnmanaged(u8) = .empty;
defer msg.deinit(gpa);
var notes: std.ArrayListUnmanaged(Zoir.CompileError.Note) = .empty;
defer notes.deinit(gpa);
var cur_err = tree.errors[0];
for (tree.errors[1..]) |err| {
if (err.is_note) {
try tree.renderError(err, msg.writer(gpa));
try notes.append(gpa, try zg.errNoteTok(err.token, "{s}", .{msg.items}));
} else {
// Flush error
try tree.renderError(cur_err, msg.writer(gpa));
const extra_offset = tree.errorOffset(cur_err);
try zg.addErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items);
notes.clearRetainingCapacity();
cur_err = err;
// TODO: `Parse` currently does not have good error recovery mechanisms, so the remaining errors could be bogus.
// As such, we'll ignore all remaining errors for now. We should improve `Parse` so that we can report all the errors.
return;
}
msg.clearRetainingCapacity();
}
// Flush error
const extra_offset = tree.errorOffset(cur_err);
try tree.renderError(cur_err, msg.writer(gpa));
try zg.addErrorTokNotesOff(cur_err.token, extra_offset, "{s}", .{msg.items}, notes.items);
}
const std = @import("std");
const assert = std.debug.assert;
const mem = std.mem;
const Allocator = mem.Allocator;
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const StringIndexContext = std.hash_map.StringIndexContext;
const ZonGen = @This();
const Zoir = @import("Zoir.zig");
const Ast = @import("Ast.zig");

View File

@ -38,6 +38,41 @@ pub const Error = union(enum) {
invalid_character: usize,
/// `''`. Not returned for string literals.
empty_char_literal,
/// Returns `func(first_args[0], ..., first_args[n], offset + bad_idx, format, args)`.
pub fn lower(
err: Error,
raw_string: []const u8,
offset: u32,
comptime func: anytype,
first_args: anytype,
) @typeInfo(@TypeOf(func)).@"fn".return_type.? {
switch (err) {
inline else => |bad_index_or_void, tag| {
const bad_index: u32 = switch (@TypeOf(bad_index_or_void)) {
void => 0,
else => @intCast(bad_index_or_void),
};
const fmt_str: []const u8, const args = switch (tag) {
.invalid_escape_character => .{ "invalid escape character: '{c}'", .{raw_string[bad_index]} },
.expected_hex_digit => .{ "expected hex digit, found '{c}'", .{raw_string[bad_index]} },
.empty_unicode_escape_sequence => .{ "empty unicode escape sequence", .{} },
.expected_hex_digit_or_rbrace => .{ "expected hex digit or '}}', found '{c}'", .{raw_string[bad_index]} },
.invalid_unicode_codepoint => .{ "unicode escape does not correspond to a valid unicode scalar value", .{} },
.expected_lbrace => .{ "expected '{{', found '{c}'", .{raw_string[bad_index]} },
.expected_rbrace => .{ "expected '}}', found '{c}'", .{raw_string[bad_index]} },
.expected_single_quote => .{ "expected singel quote ('), found '{c}'", .{raw_string[bad_index]} },
.invalid_character => .{ "invalid byte in string or character literal: '{c}'", .{raw_string[bad_index]} },
.empty_char_literal => .{ "empty character literal", .{} },
};
return @call(.auto, func, first_args ++ .{
offset + bad_index,
fmt_str,
args,
});
},
}
}
};
/// Asserts the slice starts and ends with single-quotes.

View File

@ -13,6 +13,7 @@ const usage_fmt =
\\ if the list is non-empty
\\ --ast-check Run zig ast-check on every file
\\ --exclude [file] Exclude file or directory from formatting
\\ --zon Treat all input files as ZON, regardless of file extension
\\
\\
;
@ -21,6 +22,7 @@ const Fmt = struct {
seen: SeenMap,
any_error: bool,
check_ast: bool,
force_zon: bool,
color: Color,
gpa: Allocator,
arena: Allocator,
@ -35,9 +37,10 @@ pub fn run(
args: []const []const u8,
) !void {
var color: Color = .auto;
var stdin_flag: bool = false;
var check_flag: bool = false;
var check_ast_flag: bool = false;
var stdin_flag = false;
var check_flag = false;
var check_ast_flag = false;
var force_zon = false;
var input_files = std.ArrayList([]const u8).init(gpa);
defer input_files.deinit();
var excluded_files = std.ArrayList([]const u8).init(gpa);
@ -74,6 +77,8 @@ pub fn run(
i += 1;
const next_arg = args[i];
try excluded_files.append(next_arg);
} else if (mem.eql(u8, arg, "--zon")) {
force_zon = true;
} else {
fatal("unrecognized parameter: '{s}'", .{arg});
}
@ -94,23 +99,40 @@ pub fn run(
};
defer gpa.free(source_code);
var tree = std.zig.Ast.parse(gpa, source_code, .zig) catch |err| {
var tree = std.zig.Ast.parse(gpa, source_code, if (force_zon) .zon else .zig) catch |err| {
fatal("error parsing stdin: {}", .{err});
};
defer tree.deinit(gpa);
if (check_ast_flag) {
var zir = try std.zig.AstGen.generate(gpa, tree);
if (!force_zon) {
var zir = try std.zig.AstGen.generate(gpa, tree);
defer zir.deinit(gpa);
if (zir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try wip_errors.addZirErrorMessages(zir, tree, source_code, "<stdin>");
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(color.renderOptions());
process.exit(2);
if (zir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try wip_errors.addZirErrorMessages(zir, tree, source_code, "<stdin>");
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(color.renderOptions());
process.exit(2);
}
} else {
const zoir = try std.zig.ZonGen.generate(gpa, tree);
defer zoir.deinit(gpa);
if (zoir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try wip_errors.addZoirErrorMessages(zoir, tree, source_code, "<stdin>");
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(color.renderOptions());
process.exit(2);
}
}
} else if (tree.errors.len != 0) {
try std.zig.printAstErrorsToStderr(gpa, tree, "<stdin>", color);
@ -131,12 +153,13 @@ pub fn run(
fatal("expected at least one source file argument", .{});
}
var fmt = Fmt{
var fmt: Fmt = .{
.gpa = gpa,
.arena = arena,
.seen = Fmt.SeenMap.init(gpa),
.seen = .init(gpa),
.any_error = false,
.check_ast = check_ast_flag,
.force_zon = force_zon,
.color = color,
.out_buffer = std.ArrayList(u8).init(gpa),
};
@ -276,7 +299,13 @@ fn fmtPathFile(
// Add to set after no longer possible to get error.IsDir.
if (try fmt.seen.fetchPut(stat.inode, {})) |_| return;
var tree = try std.zig.Ast.parse(gpa, source_code, .zig);
const mode: std.zig.Ast.Mode = mode: {
if (fmt.force_zon) break :mode .zon;
if (mem.endsWith(u8, sub_path, ".zon")) break :mode .zon;
break :mode .zig;
};
var tree = try std.zig.Ast.parse(gpa, source_code, mode);
defer tree.deinit(gpa);
if (tree.errors.len != 0) {
@ -289,18 +318,37 @@ fn fmtPathFile(
if (stat.size > std.zig.max_src_size)
return error.FileTooBig;
var zir = try std.zig.AstGen.generate(gpa, tree);
defer zir.deinit(gpa);
switch (mode) {
.zig => {
var zir = try std.zig.AstGen.generate(gpa, tree);
defer zir.deinit(gpa);
if (zir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try wip_errors.addZirErrorMessages(zir, tree, source_code, file_path);
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(fmt.color.renderOptions());
fmt.any_error = true;
if (zir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try wip_errors.addZirErrorMessages(zir, tree, source_code, file_path);
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(fmt.color.renderOptions());
fmt.any_error = true;
}
},
.zon => {
var zoir = try std.zig.ZonGen.generate(gpa, tree);
defer zoir.deinit(gpa);
if (zoir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try wip_errors.addZoirErrorMessages(zoir, tree, source_code, file_path);
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(fmt.color.renderOptions());
fmt.any_error = true;
}
},
}
}

View File

@ -19,6 +19,7 @@ const Directory = std.Build.Cache.Directory;
const EnvVar = std.zig.EnvVar;
const LibCInstallation = std.zig.LibCInstallation;
const AstGen = std.zig.AstGen;
const ZonGen = std.zig.ZonGen;
const Server = std.zig.Server;
const tracy = @import("tracy.zig");
@ -6007,15 +6008,16 @@ fn parseCodeModel(arg: []const u8) std.builtin.CodeModel {
const usage_ast_check =
\\Usage: zig ast-check [file]
\\
\\ Given a .zig source file, reports any compile errors that can be
\\ ascertained on the basis of the source code alone, without target
\\ information or type checking.
\\ Given a .zig source file or .zon file, reports any compile errors
\\ that can be ascertained on the basis of the source code alone,
\\ without target information or type checking.
\\
\\ If [file] is omitted, stdin is used.
\\
\\Options:
\\ -h, --help Print this help and exit
\\ --color [auto|off|on] Enable or disable colored error messages
\\ --zon Treat the input file as ZON, regardless of file extension
\\ -t (debug option) Output ZIR in text form to stdout
\\
\\
@ -6032,6 +6034,7 @@ fn cmdAstCheck(
var color: Color = .auto;
var want_output_text = false;
var force_zon = false;
var zig_source_file: ?[]const u8 = null;
var i: usize = 0;
@ -6043,6 +6046,8 @@ fn cmdAstCheck(
return cleanExit();
} else if (mem.eql(u8, arg, "-t")) {
want_output_text = true;
} else if (mem.eql(u8, arg, "--zon")) {
force_zon = true;
} else if (mem.eql(u8, arg, "--color")) {
if (i + 1 >= args.len) {
fatal("expected [auto|on|off] after --color", .{});
@ -6110,89 +6115,136 @@ fn cmdAstCheck(
file.stat.size = source.len;
}
const mode: Ast.Mode = mode: {
if (force_zon) break :mode .zon;
if (zig_source_file) |name| {
if (mem.endsWith(u8, name, ".zon")) {
break :mode .zon;
}
}
break :mode .zig;
};
file.mod = try Package.Module.createLimited(arena, .{
.root = Path.cwd(),
.root_src_path = file.sub_file_path,
.fully_qualified_name = "root",
});
file.tree = try Ast.parse(gpa, file.source, .zig);
file.tree = try Ast.parse(gpa, file.source, mode);
file.tree_loaded = true;
defer file.tree.deinit(gpa);
file.zir = try AstGen.generate(gpa, file.tree);
file.zir_loaded = true;
defer file.zir.deinit(gpa);
switch (mode) {
.zig => {
file.zir = try AstGen.generate(gpa, file.tree);
file.zir_loaded = true;
defer file.zir.deinit(gpa);
if (file.zir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try Compilation.addZirErrorMessages(&wip_errors, &file);
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(color.renderOptions());
if (file.zir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
try Compilation.addZirErrorMessages(&wip_errors, &file);
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(color.renderOptions());
if (file.zir.loweringFailed()) {
process.exit(1);
}
}
if (file.zir.loweringFailed()) {
process.exit(1);
}
}
if (!want_output_text) {
if (file.zir.hasCompileErrors()) {
process.exit(1);
} else {
if (!want_output_text) {
if (file.zir.hasCompileErrors()) {
process.exit(1);
} else {
return cleanExit();
}
}
if (!build_options.enable_debug_extensions) {
fatal("-t option only available in builds of zig with debug extensions", .{});
}
{
const token_bytes = @sizeOf(Ast.TokenList) +
file.tree.tokens.len * (@sizeOf(std.zig.Token.Tag) + @sizeOf(Ast.ByteOffset));
const tree_bytes = @sizeOf(Ast) + file.tree.nodes.len *
(@sizeOf(Ast.Node.Tag) +
@sizeOf(Ast.Node.Data) +
@sizeOf(Ast.TokenIndex));
const instruction_bytes = file.zir.instructions.len *
// Here we don't use @sizeOf(Zir.Inst.Data) because it would include
// the debug safety tag but we want to measure release size.
(@sizeOf(Zir.Inst.Tag) + 8);
const extra_bytes = file.zir.extra.len * @sizeOf(u32);
const total_bytes = @sizeOf(Zir) + instruction_bytes + extra_bytes +
file.zir.string_bytes.len * @sizeOf(u8);
const stdout = io.getStdOut();
const fmtIntSizeBin = std.fmt.fmtIntSizeBin;
// zig fmt: off
try stdout.writer().print(
\\# Source bytes: {}
\\# Tokens: {} ({})
\\# AST Nodes: {} ({})
\\# Total ZIR bytes: {}
\\# Instructions: {d} ({})
\\# String Table Bytes: {}
\\# Extra Data Items: {d} ({})
\\
, .{
fmtIntSizeBin(file.source.len),
file.tree.tokens.len, fmtIntSizeBin(token_bytes),
file.tree.nodes.len, fmtIntSizeBin(tree_bytes),
fmtIntSizeBin(total_bytes),
file.zir.instructions.len, fmtIntSizeBin(instruction_bytes),
fmtIntSizeBin(file.zir.string_bytes.len),
file.zir.extra.len, fmtIntSizeBin(extra_bytes),
});
// zig fmt: on
}
try @import("print_zir.zig").renderAsTextToFile(gpa, &file, io.getStdOut());
if (file.zir.hasCompileErrors()) {
process.exit(1);
} else {
return cleanExit();
}
},
.zon => {
const zoir = try ZonGen.generate(gpa, file.tree);
defer zoir.deinit(gpa);
if (zoir.hasCompileErrors()) {
var wip_errors: std.zig.ErrorBundle.Wip = undefined;
try wip_errors.init(gpa);
defer wip_errors.deinit();
{
const src_path = try file.fullPath(gpa);
defer gpa.free(src_path);
try wip_errors.addZoirErrorMessages(zoir, file.tree, file.source, src_path);
}
var error_bundle = try wip_errors.toOwnedBundle("");
defer error_bundle.deinit(gpa);
error_bundle.renderToStdErr(color.renderOptions());
process.exit(1);
}
if (!want_output_text) {
return cleanExit();
}
if (!build_options.enable_debug_extensions) {
fatal("-t option only available in builds of zig with debug extensions", .{});
}
try @import("print_zoir.zig").renderToFile(zoir, arena, io.getStdOut());
return cleanExit();
}
}
if (!build_options.enable_debug_extensions) {
fatal("-t option only available in builds of zig with debug extensions", .{});
}
{
const token_bytes = @sizeOf(Ast.TokenList) +
file.tree.tokens.len * (@sizeOf(std.zig.Token.Tag) + @sizeOf(Ast.ByteOffset));
const tree_bytes = @sizeOf(Ast) + file.tree.nodes.len *
(@sizeOf(Ast.Node.Tag) +
@sizeOf(Ast.Node.Data) +
@sizeOf(Ast.TokenIndex));
const instruction_bytes = file.zir.instructions.len *
// Here we don't use @sizeOf(Zir.Inst.Data) because it would include
// the debug safety tag but we want to measure release size.
(@sizeOf(Zir.Inst.Tag) + 8);
const extra_bytes = file.zir.extra.len * @sizeOf(u32);
const total_bytes = @sizeOf(Zir) + instruction_bytes + extra_bytes +
file.zir.string_bytes.len * @sizeOf(u8);
const stdout = io.getStdOut();
const fmtIntSizeBin = std.fmt.fmtIntSizeBin;
// zig fmt: off
try stdout.writer().print(
\\# Source bytes: {}
\\# Tokens: {} ({})
\\# AST Nodes: {} ({})
\\# Total ZIR bytes: {}
\\# Instructions: {d} ({})
\\# String Table Bytes: {}
\\# Extra Data Items: {d} ({})
\\
, .{
fmtIntSizeBin(file.source.len),
file.tree.tokens.len, fmtIntSizeBin(token_bytes),
file.tree.nodes.len, fmtIntSizeBin(tree_bytes),
fmtIntSizeBin(total_bytes),
file.zir.instructions.len, fmtIntSizeBin(instruction_bytes),
fmtIntSizeBin(file.zir.string_bytes.len),
file.zir.extra.len, fmtIntSizeBin(extra_bytes),
});
// zig fmt: on
}
try @import("print_zir.zig").renderAsTextToFile(gpa, &file, io.getStdOut());
if (file.zir.hasCompileErrors()) {
process.exit(1);
} else {
return cleanExit();
},
}
}

122
src/print_zoir.zig Normal file
View File

@ -0,0 +1,122 @@
pub fn renderToFile(zoir: Zoir, arena: Allocator, f: std.fs.File) (std.fs.File.WriteError || Allocator.Error)!void {
var bw = std.io.bufferedWriter(f.writer());
try renderToWriter(zoir, arena, bw.writer());
try bw.flush();
}
pub fn renderToWriter(zoir: Zoir, arena: Allocator, w: anytype) (@TypeOf(w).Error || Allocator.Error)!void {
assert(!zoir.hasCompileErrors());
const fmtIntSizeBin = std.fmt.fmtIntSizeBin;
const bytes_per_node = comptime n: {
var n: usize = 0;
for (@typeInfo(Zoir.Node.Repr).@"struct".fields) |f| {
n += @sizeOf(f.type);
}
break :n n;
};
const node_bytes = zoir.nodes.len * bytes_per_node;
const extra_bytes = zoir.extra.len * @sizeOf(u32);
const limb_bytes = zoir.limbs.len * @sizeOf(std.math.big.Limb);
const string_bytes = zoir.string_bytes.len;
// zig fmt: off
try w.print(
\\# Nodes: {} ({})
\\# Extra Data Items: {} ({})
\\# BigInt Limbs: {} ({})
\\# String Table Bytes: {}
\\# Total ZON Bytes: {}
\\
, .{
zoir.nodes.len, fmtIntSizeBin(node_bytes),
zoir.extra.len, fmtIntSizeBin(extra_bytes),
zoir.limbs.len, fmtIntSizeBin(limb_bytes),
fmtIntSizeBin(string_bytes),
fmtIntSizeBin(node_bytes + extra_bytes + limb_bytes + string_bytes),
});
// zig fmt: on
var pz: PrintZon = .{
.w = w.any(),
.arena = arena,
.zoir = zoir,
.indent = 0,
};
return @errorCast(pz.renderRoot());
}
const PrintZon = struct {
w: std.io.AnyWriter,
arena: Allocator,
zoir: Zoir,
indent: u32,
fn renderRoot(pz: *PrintZon) anyerror!void {
try pz.renderNode(.root);
try pz.w.writeByte('\n');
}
fn renderNode(pz: *PrintZon, node: Zoir.Node.Index) anyerror!void {
const zoir = pz.zoir;
try pz.w.print("%{d} = ", .{@intFromEnum(node)});
switch (node.get(zoir)) {
.true => try pz.w.writeAll("true"),
.false => try pz.w.writeAll("false"),
.null => try pz.w.writeAll("null"),
.pos_inf => try pz.w.writeAll("inf"),
.neg_inf => try pz.w.writeAll("-inf"),
.nan => try pz.w.writeAll("nan"),
.int_literal => |storage| switch (storage) {
.small => |x| try pz.w.print("int({d})", .{x}),
.big => |x| {
const str = try x.toStringAlloc(pz.arena, 10, .lower);
try pz.w.print("int(big {s})", .{str});
},
},
.float_literal => |x| try pz.w.print("float({d})", .{x}),
.char_literal => |x| try pz.w.print("char({d})", .{x}),
.enum_literal => |x| try pz.w.print("enum_literal({p})", .{std.zig.fmtId(x.get(zoir))}),
.string_literal => |x| try pz.w.print("str(\"{}\")", .{std.zig.fmtEscapes(x)}),
.empty_literal => try pz.w.writeAll("empty_literal(.{})"),
.array_literal => |vals| {
try pz.w.writeAll("array_literal({");
pz.indent += 1;
for (0..vals.len) |idx| {
try pz.newline();
try pz.renderNode(vals.at(@intCast(idx)));
try pz.w.writeByte(',');
}
pz.indent -= 1;
try pz.newline();
try pz.w.writeAll("})");
},
.struct_literal => |s| {
try pz.w.writeAll("struct_literal({");
pz.indent += 1;
for (s.names, 0..s.vals.len) |name, idx| {
try pz.newline();
try pz.w.print("[{p}] ", .{std.zig.fmtId(name.get(zoir))});
try pz.renderNode(s.vals.at(@intCast(idx)));
try pz.w.writeByte(',');
}
pz.indent -= 1;
try pz.newline();
try pz.w.writeAll("})");
},
}
}
fn newline(pz: *PrintZon) !void {
try pz.w.writeByte('\n');
for (0..pz.indent) |_| {
try pz.w.writeByteNTimes(' ', 2);
}
}
};
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const Zoir = std.zig.Zoir;