diff --git a/lib/std/zig/Ast.zig b/lib/std/zig/Ast.zig index 3f69ce5aeb..d4503b95ca 100644 --- a/lib/std/zig/Ast.zig +++ b/lib/std/zig/Ast.zig @@ -458,6 +458,19 @@ pub fn renderError(tree: Ast, parse_error: Error, stream: anytype) !void { return stream.writeAll("for input is not captured"); }, + .invalid_byte => { + const tok_slice = tree.source[tree.tokens.items(.start)[parse_error.token]..]; + return stream.print("{s} contains invalid byte: '{'}'", .{ + switch (tok_slice[0]) { + '\'' => "character literal", + '"', '\\' => "string literal", + '/' => "comment", + else => unreachable, + }, + std.zig.fmtEscapes(tok_slice[parse_error.extra.offset..][0..1]), + }); + }, + .expected_token => { const found_tag = token_tags[parse_error.token + @intFromBool(parse_error.token_is_prev)]; const expected_symbol = parse_error.extra.expected_tag.symbol(); @@ -2926,6 +2939,7 @@ pub const Error = struct { extra: union { none: void, expected_tag: Token.Tag, + offset: usize, } = .{ .none = {} }, pub const Tag = enum { @@ -2996,6 +3010,9 @@ pub const Error = struct { /// `expected_tag` is populated. expected_token, + + /// `offset` is populated + invalid_byte, }; }; diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index b2cc85b98b..c105a371ef 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -14017,6 +14017,39 @@ fn lowerAstErrors(astgen: *AstGen) !void { var notes: std.ArrayListUnmanaged(u32) = .empty; defer notes.deinit(gpa); + const token_starts = tree.tokens.items(.start); + const token_tags = tree.tokens.items(.tag); + const parse_err = tree.errors[0]; + const tok = parse_err.token + @intFromBool(parse_err.token_is_prev); + const tok_start = token_starts[tok]; + const start_char = tree.source[tok_start]; + + if (token_tags[tok] == .invalid and + (start_char == '\"' or start_char == '\'' or start_char == '/' or mem.startsWith(u8, tree.source[tok_start..], "\\\\"))) + { + const tok_len: u32 = @intCast(tree.tokenSlice(tok).len); + const tok_end = tok_start + tok_len; + const bad_off = blk: { + var idx = tok_start; + while (idx < tok_end) : (idx += 1) { + switch (tree.source[idx]) { + 0x00...0x09, 0x0b...0x1f, 0x7f => break, + else => {}, + } + } + break :blk idx - tok_start; + }; + + const err: Ast.Error = .{ + .tag = Ast.Error.Tag.invalid_byte, + .token = tok, + .extra = .{ .offset = bad_off }, + }; + msg.clearRetainingCapacity(); + try tree.renderError(err, msg.writer(gpa)); + return try astgen.appendErrorTokNotesOff(tok, bad_off, "{s}", .{msg.items}, notes.items); + } + var cur_err = tree.errors[0]; for (tree.errors[1..]) |err| { if (err.is_note) { diff --git a/test/cases/compile_errors/normal_string_with_newline.zig b/test/cases/compile_errors/normal_string_with_newline.zig index f19ce59ec8..71fc6352f1 100644 --- a/test/cases/compile_errors/normal_string_with_newline.zig +++ b/test/cases/compile_errors/normal_string_with_newline.zig @@ -5,4 +5,4 @@ b"; // backend=stage2 // target=native // -// :1:13: error: expected expression, found 'invalid token' +// :1:15: error: string literal contains invalid byte: '\n' diff --git a/test/cases/compile_errors/tab_inside_comment.zig b/test/cases/compile_errors/tab_inside_comment.zig new file mode 100644 index 0000000000..75ed7e24cd --- /dev/null +++ b/test/cases/compile_errors/tab_inside_comment.zig @@ -0,0 +1,8 @@ +// Some comment +export fn entry() void {} + +// error +// backend=stage2 +// target=native +// +// :1:8: error: comment contains invalid byte: '\t' diff --git a/test/cases/compile_errors/tab_inside_doc_comment.zig b/test/cases/compile_errors/tab_inside_doc_comment.zig new file mode 100644 index 0000000000..7c3b0dcf33 --- /dev/null +++ b/test/cases/compile_errors/tab_inside_doc_comment.zig @@ -0,0 +1,8 @@ +/// Some doc comment +export fn entry() void {} + +// error +// backend=stage2 +// target=native +// +// :1:13: error: comment contains invalid byte: '\t' diff --git a/test/cases/compile_errors/tab_inside_multiline_string.zig b/test/cases/compile_errors/tab_inside_multiline_string.zig new file mode 100644 index 0000000000..49ff26adde --- /dev/null +++ b/test/cases/compile_errors/tab_inside_multiline_string.zig @@ -0,0 +1,13 @@ +export fn entry() void { + const foo = + \\const S = struct { + \\ // hello + \\} + ; + _ = foo; +} +// error +// backend=stage2 +// target=native +// +// :4:11: error: string literal contains invalid byte: '\t' diff --git a/test/cases/compile_errors/tab_inside_string.zig b/test/cases/compile_errors/tab_inside_string.zig new file mode 100644 index 0000000000..017dadc461 --- /dev/null +++ b/test/cases/compile_errors/tab_inside_string.zig @@ -0,0 +1,10 @@ +export fn entry() void { + const foo = " hello"; + _ = foo; +} + +// error +// backend=stage2 +// target=native +// +// :2:18: error: string literal contains invalid byte: '\t' diff --git a/test/compile_errors.zig b/test/compile_errors.zig index 13d97ba256..8472cfbf7e 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -217,7 +217,7 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void { const case = ctx.obj("invalid byte in string", b.graph.host); case.addError("_ = \"\x01Q\";", &[_][]const u8{ - ":1:5: error: expected expression, found 'invalid token'", + ":1:6: error: string literal contains invalid byte: '\\x01'", }); } @@ -225,7 +225,7 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void { const case = ctx.obj("invalid byte in comment", b.graph.host); case.addError("//\x01Q", &[_][]const u8{ - ":1:1: error: expected type expression, found 'invalid token'", + ":1:3: error: comment contains invalid byte: '\\x01'", }); } @@ -233,7 +233,7 @@ pub fn addCases(ctx: *Cases, b: *std.Build) !void { const case = ctx.obj("control character in character literal", b.graph.host); case.addError("const c = '\x01';", &[_][]const u8{ - ":1:11: error: expected expression, found 'invalid token'", + ":1:12: error: character literal contains invalid byte: '\\x01'", }); }