AstGen: improve error for invalid bytes in strings and comments

This commit is contained in:
Will Lillis 2025-02-05 04:10:11 -05:00 committed by GitHub
parent d72f3d353f
commit cf059ee087
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 93 additions and 4 deletions

View File

@ -458,6 +458,19 @@ pub fn renderError(tree: Ast, parse_error: Error, stream: anytype) !void {
return stream.writeAll("for input is not captured"); return stream.writeAll("for input is not captured");
}, },
.invalid_byte => {
const tok_slice = tree.source[tree.tokens.items(.start)[parse_error.token]..];
return stream.print("{s} contains invalid byte: '{'}'", .{
switch (tok_slice[0]) {
'\'' => "character literal",
'"', '\\' => "string literal",
'/' => "comment",
else => unreachable,
},
std.zig.fmtEscapes(tok_slice[parse_error.extra.offset..][0..1]),
});
},
.expected_token => { .expected_token => {
const found_tag = token_tags[parse_error.token + @intFromBool(parse_error.token_is_prev)]; const found_tag = token_tags[parse_error.token + @intFromBool(parse_error.token_is_prev)];
const expected_symbol = parse_error.extra.expected_tag.symbol(); const expected_symbol = parse_error.extra.expected_tag.symbol();
@ -2926,6 +2939,7 @@ pub const Error = struct {
extra: union { extra: union {
none: void, none: void,
expected_tag: Token.Tag, expected_tag: Token.Tag,
offset: usize,
} = .{ .none = {} }, } = .{ .none = {} },
pub const Tag = enum { pub const Tag = enum {
@ -2996,6 +3010,9 @@ pub const Error = struct {
/// `expected_tag` is populated. /// `expected_tag` is populated.
expected_token, expected_token,
/// `offset` is populated
invalid_byte,
}; };
}; };

View File

@ -14017,6 +14017,39 @@ fn lowerAstErrors(astgen: *AstGen) !void {
var notes: std.ArrayListUnmanaged(u32) = .empty; var notes: std.ArrayListUnmanaged(u32) = .empty;
defer notes.deinit(gpa); defer notes.deinit(gpa);
const token_starts = tree.tokens.items(.start);
const token_tags = tree.tokens.items(.tag);
const parse_err = tree.errors[0];
const tok = parse_err.token + @intFromBool(parse_err.token_is_prev);
const tok_start = token_starts[tok];
const start_char = tree.source[tok_start];
if (token_tags[tok] == .invalid and
(start_char == '\"' or start_char == '\'' or start_char == '/' or mem.startsWith(u8, tree.source[tok_start..], "\\\\")))
{
const tok_len: u32 = @intCast(tree.tokenSlice(tok).len);
const tok_end = tok_start + tok_len;
const bad_off = blk: {
var idx = tok_start;
while (idx < tok_end) : (idx += 1) {
switch (tree.source[idx]) {
0x00...0x09, 0x0b...0x1f, 0x7f => break,
else => {},
}
}
break :blk idx - tok_start;
};
const err: Ast.Error = .{
.tag = Ast.Error.Tag.invalid_byte,
.token = tok,
.extra = .{ .offset = bad_off },
};
msg.clearRetainingCapacity();
try tree.renderError(err, msg.writer(gpa));
return try astgen.appendErrorTokNotesOff(tok, bad_off, "{s}", .{msg.items}, notes.items);
}
var cur_err = tree.errors[0]; var cur_err = tree.errors[0];
for (tree.errors[1..]) |err| { for (tree.errors[1..]) |err| {
if (err.is_note) { if (err.is_note) {

View File

@ -5,4 +5,4 @@ b";
// backend=stage2 // backend=stage2
// target=native // target=native
// //
// :1:13: error: expected expression, found 'invalid token' // :1:15: error: string literal contains invalid byte: '\n'

View File

@ -0,0 +1,8 @@
// Some comment
export fn entry() void {}
// error
// backend=stage2
// target=native
//
// :1:8: error: comment contains invalid byte: '\t'

View File

@ -0,0 +1,8 @@
/// Some doc comment
export fn entry() void {}
// error
// backend=stage2
// target=native
//
// :1:13: error: comment contains invalid byte: '\t'

View File

@ -0,0 +1,13 @@
export fn entry() void {
const foo =
\\const S = struct {
\\ // hello
\\}
;
_ = foo;
}
// error
// backend=stage2
// target=native
//
// :4:11: error: string literal contains invalid byte: '\t'

View File

@ -0,0 +1,10 @@
export fn entry() void {
const foo = " hello";
_ = foo;
}
// error
// backend=stage2
// target=native
//
// :2:18: error: string literal contains invalid byte: '\t'

View File

@ -217,7 +217,7 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void {
const case = ctx.obj("invalid byte in string", b.graph.host); const case = ctx.obj("invalid byte in string", b.graph.host);
case.addError("_ = \"\x01Q\";", &[_][]const u8{ case.addError("_ = \"\x01Q\";", &[_][]const u8{
":1:5: error: expected expression, found 'invalid token'", ":1:6: error: string literal contains invalid byte: '\\x01'",
}); });
} }
@ -225,7 +225,7 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void {
const case = ctx.obj("invalid byte in comment", b.graph.host); const case = ctx.obj("invalid byte in comment", b.graph.host);
case.addError("//\x01Q", &[_][]const u8{ case.addError("//\x01Q", &[_][]const u8{
":1:1: error: expected type expression, found 'invalid token'", ":1:3: error: comment contains invalid byte: '\\x01'",
}); });
} }
@ -233,7 +233,7 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void {
const case = ctx.obj("control character in character literal", b.graph.host); const case = ctx.obj("control character in character literal", b.graph.host);
case.addError("const c = '\x01';", &[_][]const u8{ case.addError("const c = '\x01';", &[_][]const u8{
":1:11: error: expected expression, found 'invalid token'", ":1:12: error: character literal contains invalid byte: '\\x01'",
}); });
} }