diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index d277fb2502..77d74c52ee 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -416,6 +416,44 @@ static void handle_string_escape(Tokenize *t, uint8_t c) { } } +static const char* get_escape_shorthand(uint8_t c) { + switch (c) { + case '\0': + return "\\0"; + case '\a': + return "\\a"; + case '\b': + return "\\b"; + case '\t': + return "\\t"; + case '\n': + return "\\n"; + case '\v': + return "\\v"; + case '\f': + return "\\f"; + case '\r': + return "\\r"; + default: + return nullptr; + } +} + +static void invalid_char_error(Tokenize *t, uint8_t c) { + if (c == '\r') { + tokenize_error(t, "invalid carriage return, only '\\n' line endings are supported"); + } else if (isprint(c)) { + tokenize_error(t, "invalid character: '%c'", c); + } else { + const char *sh = get_escape_shorthand(c); + if (sh) { + tokenize_error(t, "invalid character: '%s'", sh); + } else { + tokenize_error(t, "invalid character: '\\x%x'", c); + } + } +} + void tokenize(Buf *buf, Tokenization *out) { Tokenize t = {0}; t.out = out; @@ -580,7 +618,7 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateSawQuestionMark; break; default: - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); } break; case TokenizeStateSawQuestionMark: @@ -890,7 +928,7 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateLineString; break; default: - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); break; } break; @@ -919,7 +957,7 @@ void tokenize(Buf *buf, Tokenization *out) { break; case '\\': if (t.cur_tok->data.str_lit.is_c_str) { - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); } t.state = TokenizeStateLineStringContinue; break; @@ -949,7 +987,7 @@ void tokenize(Buf *buf, Tokenization *out) { buf_append_char(&t.cur_tok->data.str_lit.str, '\n'); break; default: - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); break; } break; @@ -1073,7 +1111,7 @@ void tokenize(Buf *buf, Tokenization *out) { handle_string_escape(&t, '\"'); break; default: - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); } break; case TokenizeStateCharCode: @@ -1147,7 +1185,7 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; break; default: - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); } break; case TokenizeStateZero: @@ -1189,7 +1227,7 @@ void tokenize(Buf *buf, Tokenization *out) { uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { if (is_symbol_char(c)) { - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); } // not my char t.pos -= 1; @@ -1233,7 +1271,7 @@ void tokenize(Buf *buf, Tokenization *out) { uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { if (is_symbol_char(c)) { - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); } // not my char t.pos -= 1; @@ -1282,7 +1320,7 @@ void tokenize(Buf *buf, Tokenization *out) { uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { if (is_symbol_char(c)) { - tokenize_error(&t, "invalid character: '%c'", c); + invalid_char_error(&t, c); } // not my char t.pos -= 1; diff --git a/test/compile_errors.zig b/test/compile_errors.zig index f3201aea9a..f8e08d599f 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -2252,4 +2252,27 @@ pub fn addCases(cases: &tests.CompileErrorContext) { \\} , ".tmp_source.zig:9:13: error: type '&MyType' does not support field access"); + + cases.add("carriage return special case", + "fn test() -> bool {\r\n" ++ + " true\r\n" ++ + "}\r\n" + , + ".tmp_source.zig:1:20: error: invalid carriage return, only '\\n' line endings are supported"); + + cases.add("non-printable invalid character", + "\xff\xfe" ++ + \\fn test() -> bool {\r + \\ true\r + \\} + , + ".tmp_source.zig:1:1: error: invalid character: '\\xff'"); + + cases.add("non-printable invalid character with escape alternative", + "fn test() -> bool {\n" ++ + "\ttrue\n" ++ + "}\n" + , + ".tmp_source.zig:2:1: error: invalid character: '\\t'"); + }