mirror of
https://github.com/ziglang/zig.git
synced 2026-02-04 13:43:46 +00:00
Part of #19063. Primarily, this moves Aro from deps/ to lib/compiler/ so that it can be lazily compiled from source. src/aro_translate_c.zig is moved to lib/compiler/aro_translate_c.zig and some of Zig CLI logic moved to a main() function there. aro_translate_c.zig becomes the "common" import for clang-based translate-c. Not all of the compiler was able to be detangled from Aro, however, so it still, for now, remains being compiled with the main compiler sources due to the clang-based translate-c depending on it. Once aro-based translate-c achieves feature parity with the clang-based translate-c implementation, the clang-based one can be removed from Zig. Aro made it unnecessarily difficult to depend on with these .def files and all these Zig module requirements. I looked at the .def files and made these observations: - The canonical source is llvm .def files. - Therefore there is an update process to sync with llvm that involves regenerating the .def files in Aro. - Therefore you might as well just regenerate the .zig files directly and check those into Aro. - Also with a small amount of tinkering, the file size on disk of these generated .zig files can be made many times smaller, without compromising type safety in the usage of the data. This would make things much easier on Zig as downstream project, particularly we could remove those pesky stubs when bootstrapping. I have gone ahead with these changes since they unblock me and I will have a chat with Vexu to see what he thinks.
2175 lines
68 KiB
Zig
Vendored
2175 lines
68 KiB
Zig
Vendored
const std = @import("std");
|
||
const assert = std.debug.assert;
|
||
const Compilation = @import("Compilation.zig");
|
||
const Source = @import("Source.zig");
|
||
const LangOpts = @import("LangOpts.zig");
|
||
|
||
pub const Token = struct {
|
||
id: Id,
|
||
source: Source.Id,
|
||
start: u32 = 0,
|
||
end: u32 = 0,
|
||
line: u32 = 0,
|
||
|
||
pub const Id = enum(u8) {
|
||
invalid,
|
||
nl,
|
||
whitespace,
|
||
eof,
|
||
/// identifier containing solely basic character set characters
|
||
identifier,
|
||
/// identifier with at least one extended character
|
||
extended_identifier,
|
||
|
||
// string literals with prefixes
|
||
string_literal,
|
||
string_literal_utf_16,
|
||
string_literal_utf_8,
|
||
string_literal_utf_32,
|
||
string_literal_wide,
|
||
|
||
/// Any string literal with an embedded newline or EOF
|
||
/// Always a parser error; by default just a warning from preprocessor
|
||
unterminated_string_literal,
|
||
|
||
// <foobar> only generated by preprocessor
|
||
macro_string,
|
||
|
||
// char literals with prefixes
|
||
char_literal,
|
||
char_literal_utf_8,
|
||
char_literal_utf_16,
|
||
char_literal_utf_32,
|
||
char_literal_wide,
|
||
|
||
/// Any character literal with nothing inside the quotes
|
||
/// Always a parser error; by default just a warning from preprocessor
|
||
empty_char_literal,
|
||
|
||
/// Any character literal with an embedded newline or EOF
|
||
/// Always a parser error; by default just a warning from preprocessor
|
||
unterminated_char_literal,
|
||
|
||
/// `/* */` style comment without a closing `*/` before EOF
|
||
unterminated_comment,
|
||
|
||
/// Integer literal tokens generated by preprocessor.
|
||
one,
|
||
zero,
|
||
|
||
bang,
|
||
bang_equal,
|
||
pipe,
|
||
pipe_pipe,
|
||
pipe_equal,
|
||
equal,
|
||
equal_equal,
|
||
l_paren,
|
||
r_paren,
|
||
l_brace,
|
||
r_brace,
|
||
l_bracket,
|
||
r_bracket,
|
||
period,
|
||
ellipsis,
|
||
caret,
|
||
caret_equal,
|
||
plus,
|
||
plus_plus,
|
||
plus_equal,
|
||
minus,
|
||
minus_minus,
|
||
minus_equal,
|
||
asterisk,
|
||
asterisk_equal,
|
||
percent,
|
||
percent_equal,
|
||
arrow,
|
||
colon,
|
||
colon_colon,
|
||
semicolon,
|
||
slash,
|
||
slash_equal,
|
||
comma,
|
||
ampersand,
|
||
ampersand_ampersand,
|
||
ampersand_equal,
|
||
question_mark,
|
||
angle_bracket_left,
|
||
angle_bracket_left_equal,
|
||
angle_bracket_angle_bracket_left,
|
||
angle_bracket_angle_bracket_left_equal,
|
||
angle_bracket_right,
|
||
angle_bracket_right_equal,
|
||
angle_bracket_angle_bracket_right,
|
||
angle_bracket_angle_bracket_right_equal,
|
||
tilde,
|
||
hash,
|
||
hash_hash,
|
||
|
||
/// Special token to speed up preprocessing, `loc.end` will be an index to the param list.
|
||
macro_param,
|
||
/// Special token to signal that the argument must be replaced without expansion (e.g. in concatenation)
|
||
macro_param_no_expand,
|
||
/// Special token to speed up preprocessing, `loc.end` will be an index to the param list.
|
||
stringify_param,
|
||
/// Same as stringify_param, but for var args
|
||
stringify_va_args,
|
||
/// Special macro whitespace, always equal to a single space
|
||
macro_ws,
|
||
/// Special token for implementing __has_attribute
|
||
macro_param_has_attribute,
|
||
/// Special token for implementing __has_c_attribute
|
||
macro_param_has_c_attribute,
|
||
/// Special token for implementing __has_declspec_attribute
|
||
macro_param_has_declspec_attribute,
|
||
/// Special token for implementing __has_warning
|
||
macro_param_has_warning,
|
||
/// Special token for implementing __has_feature
|
||
macro_param_has_feature,
|
||
/// Special token for implementing __has_extension
|
||
macro_param_has_extension,
|
||
/// Special token for implementing __has_builtin
|
||
macro_param_has_builtin,
|
||
/// Special token for implementing __has_include
|
||
macro_param_has_include,
|
||
/// Special token for implementing __has_include_next
|
||
macro_param_has_include_next,
|
||
/// Special token for implementing __has_embed
|
||
macro_param_has_embed,
|
||
/// Special token for implementing __is_identifier
|
||
macro_param_is_identifier,
|
||
/// Special token for implementing __FILE__
|
||
macro_file,
|
||
/// Special token for implementing __LINE__
|
||
macro_line,
|
||
/// Special token for implementing __COUNTER__
|
||
macro_counter,
|
||
/// Special token for implementing _Pragma
|
||
macro_param_pragma_operator,
|
||
|
||
/// Special identifier for implementing __func__
|
||
macro_func,
|
||
/// Special identifier for implementing __FUNCTION__
|
||
macro_function,
|
||
/// Special identifier for implementing __PRETTY_FUNCTION__
|
||
macro_pretty_func,
|
||
|
||
keyword_auto,
|
||
keyword_auto_type,
|
||
keyword_break,
|
||
keyword_case,
|
||
keyword_char,
|
||
keyword_const,
|
||
keyword_continue,
|
||
keyword_default,
|
||
keyword_do,
|
||
keyword_double,
|
||
keyword_else,
|
||
keyword_enum,
|
||
keyword_extern,
|
||
keyword_float,
|
||
keyword_for,
|
||
keyword_goto,
|
||
keyword_if,
|
||
keyword_int,
|
||
keyword_long,
|
||
keyword_register,
|
||
keyword_return,
|
||
keyword_short,
|
||
keyword_signed,
|
||
keyword_sizeof,
|
||
keyword_static,
|
||
keyword_struct,
|
||
keyword_switch,
|
||
keyword_typedef,
|
||
keyword_typeof1,
|
||
keyword_typeof2,
|
||
keyword_union,
|
||
keyword_unsigned,
|
||
keyword_void,
|
||
keyword_volatile,
|
||
keyword_while,
|
||
|
||
// ISO C99
|
||
keyword_bool,
|
||
keyword_complex,
|
||
keyword_imaginary,
|
||
keyword_inline,
|
||
keyword_restrict,
|
||
|
||
// ISO C11
|
||
keyword_alignas,
|
||
keyword_alignof,
|
||
keyword_atomic,
|
||
keyword_generic,
|
||
keyword_noreturn,
|
||
keyword_static_assert,
|
||
keyword_thread_local,
|
||
|
||
// ISO C23
|
||
keyword_bit_int,
|
||
keyword_c23_alignas,
|
||
keyword_c23_alignof,
|
||
keyword_c23_bool,
|
||
keyword_c23_static_assert,
|
||
keyword_c23_thread_local,
|
||
keyword_constexpr,
|
||
keyword_true,
|
||
keyword_false,
|
||
keyword_nullptr,
|
||
keyword_typeof_unqual,
|
||
|
||
// Preprocessor directives
|
||
keyword_include,
|
||
keyword_include_next,
|
||
keyword_embed,
|
||
keyword_define,
|
||
keyword_defined,
|
||
keyword_undef,
|
||
keyword_ifdef,
|
||
keyword_ifndef,
|
||
keyword_elif,
|
||
keyword_elifdef,
|
||
keyword_elifndef,
|
||
keyword_endif,
|
||
keyword_error,
|
||
keyword_warning,
|
||
keyword_pragma,
|
||
keyword_line,
|
||
keyword_va_args,
|
||
keyword_va_opt,
|
||
|
||
// gcc keywords
|
||
keyword_const1,
|
||
keyword_const2,
|
||
keyword_inline1,
|
||
keyword_inline2,
|
||
keyword_volatile1,
|
||
keyword_volatile2,
|
||
keyword_restrict1,
|
||
keyword_restrict2,
|
||
keyword_alignof1,
|
||
keyword_alignof2,
|
||
keyword_typeof,
|
||
keyword_attribute1,
|
||
keyword_attribute2,
|
||
keyword_extension,
|
||
keyword_asm,
|
||
keyword_asm1,
|
||
keyword_asm2,
|
||
keyword_float80,
|
||
/// _Float128
|
||
keyword_float128_1,
|
||
/// __float128
|
||
keyword_float128_2,
|
||
keyword_int128,
|
||
keyword_imag1,
|
||
keyword_imag2,
|
||
keyword_real1,
|
||
keyword_real2,
|
||
keyword_float16,
|
||
|
||
// clang keywords
|
||
keyword_fp16,
|
||
|
||
// ms keywords
|
||
keyword_declspec,
|
||
keyword_int64,
|
||
keyword_int64_2,
|
||
keyword_int32,
|
||
keyword_int32_2,
|
||
keyword_int16,
|
||
keyword_int16_2,
|
||
keyword_int8,
|
||
keyword_int8_2,
|
||
keyword_stdcall,
|
||
keyword_stdcall2,
|
||
keyword_thiscall,
|
||
keyword_thiscall2,
|
||
keyword_vectorcall,
|
||
keyword_vectorcall2,
|
||
|
||
// builtins that require special parsing
|
||
builtin_choose_expr,
|
||
builtin_va_arg,
|
||
builtin_offsetof,
|
||
builtin_bitoffsetof,
|
||
builtin_types_compatible_p,
|
||
|
||
/// Generated by #embed directive
|
||
/// Decimal value with no prefix or suffix
|
||
embed_byte,
|
||
|
||
/// preprocessor number
|
||
/// An optional period, followed by a digit 0-9, followed by any number of letters
|
||
/// digits, underscores, periods, and exponents (e+, e-, E+, E-, p+, p-, P+, P-)
|
||
pp_num,
|
||
|
||
/// preprocessor placemarker token
|
||
/// generated if `##` is used with a zero-token argument
|
||
/// removed after substitution, so the parser should never see this
|
||
/// See C99 6.10.3.3.2
|
||
placemarker,
|
||
|
||
/// Virtual linemarker token output from preprocessor to indicate start of a new include
|
||
include_start,
|
||
|
||
/// Virtual linemarker token output from preprocessor to indicate resuming a file after
|
||
/// completion of the preceding #include
|
||
include_resume,
|
||
|
||
/// A comment token if asked to preserve comments.
|
||
comment,
|
||
|
||
/// Return true if token is identifier or keyword.
|
||
pub fn isMacroIdentifier(id: Id) bool {
|
||
switch (id) {
|
||
.keyword_include,
|
||
.keyword_include_next,
|
||
.keyword_embed,
|
||
.keyword_define,
|
||
.keyword_defined,
|
||
.keyword_undef,
|
||
.keyword_ifdef,
|
||
.keyword_ifndef,
|
||
.keyword_elif,
|
||
.keyword_elifdef,
|
||
.keyword_elifndef,
|
||
.keyword_endif,
|
||
.keyword_error,
|
||
.keyword_warning,
|
||
.keyword_pragma,
|
||
.keyword_line,
|
||
.keyword_va_args,
|
||
.keyword_va_opt,
|
||
.macro_func,
|
||
.macro_function,
|
||
.macro_pretty_func,
|
||
.keyword_auto,
|
||
.keyword_auto_type,
|
||
.keyword_break,
|
||
.keyword_case,
|
||
.keyword_char,
|
||
.keyword_const,
|
||
.keyword_continue,
|
||
.keyword_default,
|
||
.keyword_do,
|
||
.keyword_double,
|
||
.keyword_else,
|
||
.keyword_enum,
|
||
.keyword_extern,
|
||
.keyword_float,
|
||
.keyword_for,
|
||
.keyword_goto,
|
||
.keyword_if,
|
||
.keyword_int,
|
||
.keyword_long,
|
||
.keyword_register,
|
||
.keyword_return,
|
||
.keyword_short,
|
||
.keyword_signed,
|
||
.keyword_sizeof,
|
||
.keyword_static,
|
||
.keyword_struct,
|
||
.keyword_switch,
|
||
.keyword_typedef,
|
||
.keyword_union,
|
||
.keyword_unsigned,
|
||
.keyword_void,
|
||
.keyword_volatile,
|
||
.keyword_while,
|
||
.keyword_bool,
|
||
.keyword_complex,
|
||
.keyword_imaginary,
|
||
.keyword_inline,
|
||
.keyword_restrict,
|
||
.keyword_alignas,
|
||
.keyword_alignof,
|
||
.keyword_atomic,
|
||
.keyword_generic,
|
||
.keyword_noreturn,
|
||
.keyword_static_assert,
|
||
.keyword_thread_local,
|
||
.identifier,
|
||
.extended_identifier,
|
||
.keyword_typeof,
|
||
.keyword_typeof1,
|
||
.keyword_typeof2,
|
||
.keyword_const1,
|
||
.keyword_const2,
|
||
.keyword_inline1,
|
||
.keyword_inline2,
|
||
.keyword_volatile1,
|
||
.keyword_volatile2,
|
||
.keyword_restrict1,
|
||
.keyword_restrict2,
|
||
.keyword_alignof1,
|
||
.keyword_alignof2,
|
||
.builtin_choose_expr,
|
||
.builtin_va_arg,
|
||
.builtin_offsetof,
|
||
.builtin_bitoffsetof,
|
||
.builtin_types_compatible_p,
|
||
.keyword_attribute1,
|
||
.keyword_attribute2,
|
||
.keyword_extension,
|
||
.keyword_asm,
|
||
.keyword_asm1,
|
||
.keyword_asm2,
|
||
.keyword_float80,
|
||
.keyword_float128_1,
|
||
.keyword_float128_2,
|
||
.keyword_int128,
|
||
.keyword_imag1,
|
||
.keyword_imag2,
|
||
.keyword_real1,
|
||
.keyword_real2,
|
||
.keyword_float16,
|
||
.keyword_fp16,
|
||
.keyword_declspec,
|
||
.keyword_int64,
|
||
.keyword_int64_2,
|
||
.keyword_int32,
|
||
.keyword_int32_2,
|
||
.keyword_int16,
|
||
.keyword_int16_2,
|
||
.keyword_int8,
|
||
.keyword_int8_2,
|
||
.keyword_stdcall,
|
||
.keyword_stdcall2,
|
||
.keyword_thiscall,
|
||
.keyword_thiscall2,
|
||
.keyword_vectorcall,
|
||
.keyword_vectorcall2,
|
||
.keyword_bit_int,
|
||
.keyword_c23_alignas,
|
||
.keyword_c23_alignof,
|
||
.keyword_c23_bool,
|
||
.keyword_c23_static_assert,
|
||
.keyword_c23_thread_local,
|
||
.keyword_constexpr,
|
||
.keyword_true,
|
||
.keyword_false,
|
||
.keyword_nullptr,
|
||
.keyword_typeof_unqual,
|
||
=> return true,
|
||
else => return false,
|
||
}
|
||
}
|
||
|
||
/// Turn macro keywords into identifiers.
|
||
/// `keyword_defined` is special since it should only turn into an identifier if
|
||
/// we are *not* in an #if or #elif expression
|
||
pub fn simplifyMacroKeywordExtra(id: *Id, defined_to_identifier: bool) void {
|
||
switch (id.*) {
|
||
.keyword_include,
|
||
.keyword_include_next,
|
||
.keyword_embed,
|
||
.keyword_define,
|
||
.keyword_undef,
|
||
.keyword_ifdef,
|
||
.keyword_ifndef,
|
||
.keyword_elif,
|
||
.keyword_elifdef,
|
||
.keyword_elifndef,
|
||
.keyword_endif,
|
||
.keyword_error,
|
||
.keyword_warning,
|
||
.keyword_pragma,
|
||
.keyword_line,
|
||
.keyword_va_args,
|
||
.keyword_va_opt,
|
||
=> id.* = .identifier,
|
||
.keyword_defined => if (defined_to_identifier) {
|
||
id.* = .identifier;
|
||
},
|
||
else => {},
|
||
}
|
||
}
|
||
|
||
pub fn simplifyMacroKeyword(id: *Id) void {
|
||
simplifyMacroKeywordExtra(id, false);
|
||
}
|
||
|
||
pub fn lexeme(id: Id) ?[]const u8 {
|
||
return switch (id) {
|
||
.include_start,
|
||
.include_resume,
|
||
=> unreachable,
|
||
|
||
.unterminated_comment,
|
||
.invalid,
|
||
.identifier,
|
||
.extended_identifier,
|
||
.string_literal,
|
||
.string_literal_utf_16,
|
||
.string_literal_utf_8,
|
||
.string_literal_utf_32,
|
||
.string_literal_wide,
|
||
.unterminated_string_literal,
|
||
.unterminated_char_literal,
|
||
.empty_char_literal,
|
||
.char_literal,
|
||
.char_literal_utf_8,
|
||
.char_literal_utf_16,
|
||
.char_literal_utf_32,
|
||
.char_literal_wide,
|
||
.macro_string,
|
||
.whitespace,
|
||
.pp_num,
|
||
.embed_byte,
|
||
.comment,
|
||
=> null,
|
||
|
||
.zero => "0",
|
||
.one => "1",
|
||
|
||
.nl,
|
||
.eof,
|
||
.macro_param,
|
||
.macro_param_no_expand,
|
||
.stringify_param,
|
||
.stringify_va_args,
|
||
.macro_param_has_attribute,
|
||
.macro_param_has_c_attribute,
|
||
.macro_param_has_declspec_attribute,
|
||
.macro_param_has_warning,
|
||
.macro_param_has_feature,
|
||
.macro_param_has_extension,
|
||
.macro_param_has_builtin,
|
||
.macro_param_has_include,
|
||
.macro_param_has_include_next,
|
||
.macro_param_has_embed,
|
||
.macro_param_is_identifier,
|
||
.macro_file,
|
||
.macro_line,
|
||
.macro_counter,
|
||
.macro_param_pragma_operator,
|
||
.placemarker,
|
||
=> "",
|
||
.macro_ws => " ",
|
||
|
||
.macro_func => "__func__",
|
||
.macro_function => "__FUNCTION__",
|
||
.macro_pretty_func => "__PRETTY_FUNCTION__",
|
||
|
||
.bang => "!",
|
||
.bang_equal => "!=",
|
||
.pipe => "|",
|
||
.pipe_pipe => "||",
|
||
.pipe_equal => "|=",
|
||
.equal => "=",
|
||
.equal_equal => "==",
|
||
.l_paren => "(",
|
||
.r_paren => ")",
|
||
.l_brace => "{",
|
||
.r_brace => "}",
|
||
.l_bracket => "[",
|
||
.r_bracket => "]",
|
||
.period => ".",
|
||
.ellipsis => "...",
|
||
.caret => "^",
|
||
.caret_equal => "^=",
|
||
.plus => "+",
|
||
.plus_plus => "++",
|
||
.plus_equal => "+=",
|
||
.minus => "-",
|
||
.minus_minus => "--",
|
||
.minus_equal => "-=",
|
||
.asterisk => "*",
|
||
.asterisk_equal => "*=",
|
||
.percent => "%",
|
||
.percent_equal => "%=",
|
||
.arrow => "->",
|
||
.colon => ":",
|
||
.colon_colon => "::",
|
||
.semicolon => ";",
|
||
.slash => "/",
|
||
.slash_equal => "/=",
|
||
.comma => ",",
|
||
.ampersand => "&",
|
||
.ampersand_ampersand => "&&",
|
||
.ampersand_equal => "&=",
|
||
.question_mark => "?",
|
||
.angle_bracket_left => "<",
|
||
.angle_bracket_left_equal => "<=",
|
||
.angle_bracket_angle_bracket_left => "<<",
|
||
.angle_bracket_angle_bracket_left_equal => "<<=",
|
||
.angle_bracket_right => ">",
|
||
.angle_bracket_right_equal => ">=",
|
||
.angle_bracket_angle_bracket_right => ">>",
|
||
.angle_bracket_angle_bracket_right_equal => ">>=",
|
||
.tilde => "~",
|
||
.hash => "#",
|
||
.hash_hash => "##",
|
||
|
||
.keyword_auto => "auto",
|
||
.keyword_auto_type => "__auto_type",
|
||
.keyword_break => "break",
|
||
.keyword_case => "case",
|
||
.keyword_char => "char",
|
||
.keyword_const => "const",
|
||
.keyword_continue => "continue",
|
||
.keyword_default => "default",
|
||
.keyword_do => "do",
|
||
.keyword_double => "double",
|
||
.keyword_else => "else",
|
||
.keyword_enum => "enum",
|
||
.keyword_extern => "extern",
|
||
.keyword_float => "float",
|
||
.keyword_for => "for",
|
||
.keyword_goto => "goto",
|
||
.keyword_if => "if",
|
||
.keyword_int => "int",
|
||
.keyword_long => "long",
|
||
.keyword_register => "register",
|
||
.keyword_return => "return",
|
||
.keyword_short => "short",
|
||
.keyword_signed => "signed",
|
||
.keyword_sizeof => "sizeof",
|
||
.keyword_static => "static",
|
||
.keyword_struct => "struct",
|
||
.keyword_switch => "switch",
|
||
.keyword_typedef => "typedef",
|
||
.keyword_typeof => "typeof",
|
||
.keyword_union => "union",
|
||
.keyword_unsigned => "unsigned",
|
||
.keyword_void => "void",
|
||
.keyword_volatile => "volatile",
|
||
.keyword_while => "while",
|
||
.keyword_bool => "_Bool",
|
||
.keyword_complex => "_Complex",
|
||
.keyword_imaginary => "_Imaginary",
|
||
.keyword_inline => "inline",
|
||
.keyword_restrict => "restrict",
|
||
.keyword_alignas => "_Alignas",
|
||
.keyword_alignof => "_Alignof",
|
||
.keyword_atomic => "_Atomic",
|
||
.keyword_generic => "_Generic",
|
||
.keyword_noreturn => "_Noreturn",
|
||
.keyword_static_assert => "_Static_assert",
|
||
.keyword_thread_local => "_Thread_local",
|
||
.keyword_bit_int => "_BitInt",
|
||
.keyword_c23_alignas => "alignas",
|
||
.keyword_c23_alignof => "alignof",
|
||
.keyword_c23_bool => "bool",
|
||
.keyword_c23_static_assert => "static_assert",
|
||
.keyword_c23_thread_local => "thread_local",
|
||
.keyword_constexpr => "constexpr",
|
||
.keyword_true => "true",
|
||
.keyword_false => "false",
|
||
.keyword_nullptr => "nullptr",
|
||
.keyword_typeof_unqual => "typeof_unqual",
|
||
.keyword_include => "include",
|
||
.keyword_include_next => "include_next",
|
||
.keyword_embed => "embed",
|
||
.keyword_define => "define",
|
||
.keyword_defined => "defined",
|
||
.keyword_undef => "undef",
|
||
.keyword_ifdef => "ifdef",
|
||
.keyword_ifndef => "ifndef",
|
||
.keyword_elif => "elif",
|
||
.keyword_elifdef => "elifdef",
|
||
.keyword_elifndef => "elifndef",
|
||
.keyword_endif => "endif",
|
||
.keyword_error => "error",
|
||
.keyword_warning => "warning",
|
||
.keyword_pragma => "pragma",
|
||
.keyword_line => "line",
|
||
.keyword_va_args => "__VA_ARGS__",
|
||
.keyword_va_opt => "__VA_OPT__",
|
||
.keyword_const1 => "__const",
|
||
.keyword_const2 => "__const__",
|
||
.keyword_inline1 => "__inline",
|
||
.keyword_inline2 => "__inline__",
|
||
.keyword_volatile1 => "__volatile",
|
||
.keyword_volatile2 => "__volatile__",
|
||
.keyword_restrict1 => "__restrict",
|
||
.keyword_restrict2 => "__restrict__",
|
||
.keyword_alignof1 => "__alignof",
|
||
.keyword_alignof2 => "__alignof__",
|
||
.keyword_typeof1 => "__typeof",
|
||
.keyword_typeof2 => "__typeof__",
|
||
.builtin_choose_expr => "__builtin_choose_expr",
|
||
.builtin_va_arg => "__builtin_va_arg",
|
||
.builtin_offsetof => "__builtin_offsetof",
|
||
.builtin_bitoffsetof => "__builtin_bitoffsetof",
|
||
.builtin_types_compatible_p => "__builtin_types_compatible_p",
|
||
.keyword_attribute1 => "__attribute",
|
||
.keyword_attribute2 => "__attribute__",
|
||
.keyword_extension => "__extension__",
|
||
.keyword_asm => "asm",
|
||
.keyword_asm1 => "__asm",
|
||
.keyword_asm2 => "__asm__",
|
||
.keyword_float80 => "__float80",
|
||
.keyword_float128_1 => "_Float128",
|
||
.keyword_float128_2 => "__float128",
|
||
.keyword_int128 => "__int128",
|
||
.keyword_imag1 => "__imag",
|
||
.keyword_imag2 => "__imag__",
|
||
.keyword_real1 => "__real",
|
||
.keyword_real2 => "__real__",
|
||
.keyword_float16 => "_Float16",
|
||
.keyword_fp16 => "__fp16",
|
||
.keyword_declspec => "__declspec",
|
||
.keyword_int64 => "__int64",
|
||
.keyword_int64_2 => "_int64",
|
||
.keyword_int32 => "__int32",
|
||
.keyword_int32_2 => "_int32",
|
||
.keyword_int16 => "__int16",
|
||
.keyword_int16_2 => "_int16",
|
||
.keyword_int8 => "__int8",
|
||
.keyword_int8_2 => "_int8",
|
||
.keyword_stdcall => "__stdcall",
|
||
.keyword_stdcall2 => "_stdcall",
|
||
.keyword_thiscall => "__thiscall",
|
||
.keyword_thiscall2 => "_thiscall",
|
||
.keyword_vectorcall => "__vectorcall",
|
||
.keyword_vectorcall2 => "_vectorcall",
|
||
};
|
||
}
|
||
|
||
pub fn symbol(id: Id) []const u8 {
|
||
return switch (id) {
|
||
.macro_string, .invalid => unreachable,
|
||
.identifier,
|
||
.extended_identifier,
|
||
.macro_func,
|
||
.macro_function,
|
||
.macro_pretty_func,
|
||
.builtin_choose_expr,
|
||
.builtin_va_arg,
|
||
.builtin_offsetof,
|
||
.builtin_bitoffsetof,
|
||
.builtin_types_compatible_p,
|
||
=> "an identifier",
|
||
.string_literal,
|
||
.string_literal_utf_16,
|
||
.string_literal_utf_8,
|
||
.string_literal_utf_32,
|
||
.string_literal_wide,
|
||
.unterminated_string_literal,
|
||
=> "a string literal",
|
||
.char_literal,
|
||
.char_literal_utf_8,
|
||
.char_literal_utf_16,
|
||
.char_literal_utf_32,
|
||
.char_literal_wide,
|
||
.unterminated_char_literal,
|
||
.empty_char_literal,
|
||
=> "a character literal",
|
||
.pp_num, .embed_byte => "A number",
|
||
else => id.lexeme().?,
|
||
};
|
||
}
|
||
|
||
/// tokens that can start an expression parsed by Preprocessor.expr
|
||
/// Note that eof, r_paren, and string literals cannot actually start a
|
||
/// preprocessor expression, but we include them here so that a nicer
|
||
/// error message can be generated by the parser.
|
||
pub fn validPreprocessorExprStart(id: Id) bool {
|
||
return switch (id) {
|
||
.eof,
|
||
.r_paren,
|
||
.string_literal,
|
||
.string_literal_utf_16,
|
||
.string_literal_utf_8,
|
||
.string_literal_utf_32,
|
||
.string_literal_wide,
|
||
|
||
.char_literal,
|
||
.char_literal_utf_8,
|
||
.char_literal_utf_16,
|
||
.char_literal_utf_32,
|
||
.char_literal_wide,
|
||
.l_paren,
|
||
.plus,
|
||
.minus,
|
||
.tilde,
|
||
.bang,
|
||
.identifier,
|
||
.extended_identifier,
|
||
.keyword_defined,
|
||
.one,
|
||
.zero,
|
||
.pp_num,
|
||
.keyword_true,
|
||
.keyword_false,
|
||
=> true,
|
||
else => false,
|
||
};
|
||
}
|
||
|
||
pub fn allowsDigraphs(id: Id, langopts: LangOpts) bool {
|
||
return switch (id) {
|
||
.l_bracket,
|
||
.r_bracket,
|
||
.l_brace,
|
||
.r_brace,
|
||
.hash,
|
||
.hash_hash,
|
||
=> langopts.hasDigraphs(),
|
||
else => false,
|
||
};
|
||
}
|
||
|
||
pub fn canOpenGCCAsmStmt(id: Id) bool {
|
||
return switch (id) {
|
||
.keyword_volatile, .keyword_volatile1, .keyword_volatile2, .keyword_inline, .keyword_inline1, .keyword_inline2, .keyword_goto, .l_paren => true,
|
||
else => false,
|
||
};
|
||
}
|
||
|
||
pub fn isStringLiteral(id: Id) bool {
|
||
return switch (id) {
|
||
.string_literal, .string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32, .string_literal_wide => true,
|
||
else => false,
|
||
};
|
||
}
|
||
};
|
||
|
||
/// double underscore and underscore + capital letter identifiers
|
||
/// belong to the implementation namespace, so we always convert them
|
||
/// to keywords.
|
||
pub fn getTokenId(langopts: LangOpts, str: []const u8) Token.Id {
|
||
const kw = all_kws.get(str) orelse return .identifier;
|
||
const standard = langopts.standard;
|
||
return switch (kw) {
|
||
.keyword_inline => if (standard.isGNU() or standard.atLeast(.c99)) kw else .identifier,
|
||
.keyword_restrict => if (standard.atLeast(.c99)) kw else .identifier,
|
||
.keyword_typeof => if (standard.isGNU() or standard.atLeast(.c23)) kw else .identifier,
|
||
.keyword_asm => if (standard.isGNU()) kw else .identifier,
|
||
.keyword_declspec => if (langopts.declspec_attrs) kw else .identifier,
|
||
|
||
.keyword_c23_alignas,
|
||
.keyword_c23_alignof,
|
||
.keyword_c23_bool,
|
||
.keyword_c23_static_assert,
|
||
.keyword_c23_thread_local,
|
||
.keyword_constexpr,
|
||
.keyword_true,
|
||
.keyword_false,
|
||
.keyword_nullptr,
|
||
.keyword_typeof_unqual,
|
||
.keyword_elifdef,
|
||
.keyword_elifndef,
|
||
=> if (standard.atLeast(.c23)) kw else .identifier,
|
||
|
||
.keyword_int64,
|
||
.keyword_int64_2,
|
||
.keyword_int32,
|
||
.keyword_int32_2,
|
||
.keyword_int16,
|
||
.keyword_int16_2,
|
||
.keyword_int8,
|
||
.keyword_int8_2,
|
||
.keyword_stdcall2,
|
||
.keyword_thiscall2,
|
||
.keyword_vectorcall2,
|
||
=> if (langopts.ms_extensions) kw else .identifier,
|
||
else => kw,
|
||
};
|
||
}
|
||
|
||
const all_kws = std.ComptimeStringMap(Id, .{
|
||
.{ "auto", auto: {
|
||
@setEvalBranchQuota(3000);
|
||
break :auto .keyword_auto;
|
||
} },
|
||
.{ "break", .keyword_break },
|
||
.{ "case", .keyword_case },
|
||
.{ "char", .keyword_char },
|
||
.{ "const", .keyword_const },
|
||
.{ "continue", .keyword_continue },
|
||
.{ "default", .keyword_default },
|
||
.{ "do", .keyword_do },
|
||
.{ "double", .keyword_double },
|
||
.{ "else", .keyword_else },
|
||
.{ "enum", .keyword_enum },
|
||
.{ "extern", .keyword_extern },
|
||
.{ "float", .keyword_float },
|
||
.{ "for", .keyword_for },
|
||
.{ "goto", .keyword_goto },
|
||
.{ "if", .keyword_if },
|
||
.{ "int", .keyword_int },
|
||
.{ "long", .keyword_long },
|
||
.{ "register", .keyword_register },
|
||
.{ "return", .keyword_return },
|
||
.{ "short", .keyword_short },
|
||
.{ "signed", .keyword_signed },
|
||
.{ "sizeof", .keyword_sizeof },
|
||
.{ "static", .keyword_static },
|
||
.{ "struct", .keyword_struct },
|
||
.{ "switch", .keyword_switch },
|
||
.{ "typedef", .keyword_typedef },
|
||
.{ "union", .keyword_union },
|
||
.{ "unsigned", .keyword_unsigned },
|
||
.{ "void", .keyword_void },
|
||
.{ "volatile", .keyword_volatile },
|
||
.{ "while", .keyword_while },
|
||
.{ "__typeof__", .keyword_typeof2 },
|
||
.{ "__typeof", .keyword_typeof1 },
|
||
|
||
// ISO C99
|
||
.{ "_Bool", .keyword_bool },
|
||
.{ "_Complex", .keyword_complex },
|
||
.{ "_Imaginary", .keyword_imaginary },
|
||
.{ "inline", .keyword_inline },
|
||
.{ "restrict", .keyword_restrict },
|
||
|
||
// ISO C11
|
||
.{ "_Alignas", .keyword_alignas },
|
||
.{ "_Alignof", .keyword_alignof },
|
||
.{ "_Atomic", .keyword_atomic },
|
||
.{ "_Generic", .keyword_generic },
|
||
.{ "_Noreturn", .keyword_noreturn },
|
||
.{ "_Static_assert", .keyword_static_assert },
|
||
.{ "_Thread_local", .keyword_thread_local },
|
||
|
||
// ISO C23
|
||
.{ "_BitInt", .keyword_bit_int },
|
||
.{ "alignas", .keyword_c23_alignas },
|
||
.{ "alignof", .keyword_c23_alignof },
|
||
.{ "bool", .keyword_c23_bool },
|
||
.{ "static_assert", .keyword_c23_static_assert },
|
||
.{ "thread_local", .keyword_c23_thread_local },
|
||
.{ "constexpr", .keyword_constexpr },
|
||
.{ "true", .keyword_true },
|
||
.{ "false", .keyword_false },
|
||
.{ "nullptr", .keyword_nullptr },
|
||
.{ "typeof_unqual", .keyword_typeof_unqual },
|
||
|
||
// Preprocessor directives
|
||
.{ "include", .keyword_include },
|
||
.{ "include_next", .keyword_include_next },
|
||
.{ "embed", .keyword_embed },
|
||
.{ "define", .keyword_define },
|
||
.{ "defined", .keyword_defined },
|
||
.{ "undef", .keyword_undef },
|
||
.{ "ifdef", .keyword_ifdef },
|
||
.{ "ifndef", .keyword_ifndef },
|
||
.{ "elif", .keyword_elif },
|
||
.{ "elifdef", .keyword_elifdef },
|
||
.{ "elifndef", .keyword_elifndef },
|
||
.{ "endif", .keyword_endif },
|
||
.{ "error", .keyword_error },
|
||
.{ "warning", .keyword_warning },
|
||
.{ "pragma", .keyword_pragma },
|
||
.{ "line", .keyword_line },
|
||
.{ "__VA_ARGS__", .keyword_va_args },
|
||
.{ "__VA_OPT__", .keyword_va_opt },
|
||
.{ "__func__", .macro_func },
|
||
.{ "__FUNCTION__", .macro_function },
|
||
.{ "__PRETTY_FUNCTION__", .macro_pretty_func },
|
||
|
||
// gcc keywords
|
||
.{ "__auto_type", .keyword_auto_type },
|
||
.{ "__const", .keyword_const1 },
|
||
.{ "__const__", .keyword_const2 },
|
||
.{ "__inline", .keyword_inline1 },
|
||
.{ "__inline__", .keyword_inline2 },
|
||
.{ "__volatile", .keyword_volatile1 },
|
||
.{ "__volatile__", .keyword_volatile2 },
|
||
.{ "__restrict", .keyword_restrict1 },
|
||
.{ "__restrict__", .keyword_restrict2 },
|
||
.{ "__alignof", .keyword_alignof1 },
|
||
.{ "__alignof__", .keyword_alignof2 },
|
||
.{ "typeof", .keyword_typeof },
|
||
.{ "__attribute", .keyword_attribute1 },
|
||
.{ "__attribute__", .keyword_attribute2 },
|
||
.{ "__extension__", .keyword_extension },
|
||
.{ "asm", .keyword_asm },
|
||
.{ "__asm", .keyword_asm1 },
|
||
.{ "__asm__", .keyword_asm2 },
|
||
.{ "__float80", .keyword_float80 },
|
||
.{ "_Float128", .keyword_float128_1 },
|
||
.{ "__float128", .keyword_float128_2 },
|
||
.{ "__int128", .keyword_int128 },
|
||
.{ "__imag", .keyword_imag1 },
|
||
.{ "__imag__", .keyword_imag2 },
|
||
.{ "__real", .keyword_real1 },
|
||
.{ "__real__", .keyword_real2 },
|
||
.{ "_Float16", .keyword_float16 },
|
||
|
||
// clang keywords
|
||
.{ "__fp16", .keyword_fp16 },
|
||
|
||
// ms keywords
|
||
.{ "__declspec", .keyword_declspec },
|
||
.{ "__int64", .keyword_int64 },
|
||
.{ "_int64", .keyword_int64_2 },
|
||
.{ "__int32", .keyword_int32 },
|
||
.{ "_int32", .keyword_int32_2 },
|
||
.{ "__int16", .keyword_int16 },
|
||
.{ "_int16", .keyword_int16_2 },
|
||
.{ "__int8", .keyword_int8 },
|
||
.{ "_int8", .keyword_int8_2 },
|
||
.{ "__stdcall", .keyword_stdcall },
|
||
.{ "_stdcall", .keyword_stdcall2 },
|
||
.{ "__thiscall", .keyword_thiscall },
|
||
.{ "_thiscall", .keyword_thiscall2 },
|
||
.{ "__vectorcall", .keyword_vectorcall },
|
||
.{ "_vectorcall", .keyword_vectorcall2 },
|
||
|
||
// builtins that require special parsing
|
||
.{ "__builtin_choose_expr", .builtin_choose_expr },
|
||
.{ "__builtin_va_arg", .builtin_va_arg },
|
||
.{ "__builtin_offsetof", .builtin_offsetof },
|
||
.{ "__builtin_bitoffsetof", .builtin_bitoffsetof },
|
||
.{ "__builtin_types_compatible_p", .builtin_types_compatible_p },
|
||
});
|
||
};
|
||
|
||
const Tokenizer = @This();
|
||
|
||
buf: []const u8,
|
||
index: u32 = 0,
|
||
source: Source.Id,
|
||
langopts: LangOpts,
|
||
line: u32 = 1,
|
||
|
||
pub fn next(self: *Tokenizer) Token {
|
||
var state: enum {
|
||
start,
|
||
whitespace,
|
||
u,
|
||
u8,
|
||
U,
|
||
L,
|
||
string_literal,
|
||
char_literal_start,
|
||
char_literal,
|
||
char_escape_sequence,
|
||
string_escape_sequence,
|
||
identifier,
|
||
extended_identifier,
|
||
equal,
|
||
bang,
|
||
pipe,
|
||
colon,
|
||
percent,
|
||
asterisk,
|
||
plus,
|
||
angle_bracket_left,
|
||
angle_bracket_angle_bracket_left,
|
||
angle_bracket_right,
|
||
angle_bracket_angle_bracket_right,
|
||
caret,
|
||
period,
|
||
period2,
|
||
minus,
|
||
slash,
|
||
ampersand,
|
||
hash,
|
||
hash_digraph,
|
||
hash_hash_digraph_partial,
|
||
line_comment,
|
||
multi_line_comment,
|
||
multi_line_comment_asterisk,
|
||
multi_line_comment_done,
|
||
pp_num,
|
||
pp_num_exponent,
|
||
pp_num_digit_separator,
|
||
} = .start;
|
||
|
||
var start = self.index;
|
||
var id: Token.Id = .eof;
|
||
|
||
while (self.index < self.buf.len) : (self.index += 1) {
|
||
const c = self.buf[self.index];
|
||
switch (state) {
|
||
.start => switch (c) {
|
||
'\n' => {
|
||
id = .nl;
|
||
self.index += 1;
|
||
self.line += 1;
|
||
break;
|
||
},
|
||
'"' => {
|
||
id = .string_literal;
|
||
state = .string_literal;
|
||
},
|
||
'\'' => {
|
||
id = .char_literal;
|
||
state = .char_literal_start;
|
||
},
|
||
'u' => state = .u,
|
||
'U' => state = .U,
|
||
'L' => state = .L,
|
||
'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => state = .identifier,
|
||
'=' => state = .equal,
|
||
'!' => state = .bang,
|
||
'|' => state = .pipe,
|
||
'(' => {
|
||
id = .l_paren;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
')' => {
|
||
id = .r_paren;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'[' => {
|
||
id = .l_bracket;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
']' => {
|
||
id = .r_bracket;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
';' => {
|
||
id = .semicolon;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
',' => {
|
||
id = .comma;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'?' => {
|
||
id = .question_mark;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
':' => state = .colon,
|
||
'%' => state = .percent,
|
||
'*' => state = .asterisk,
|
||
'+' => state = .plus,
|
||
'<' => state = .angle_bracket_left,
|
||
'>' => state = .angle_bracket_right,
|
||
'^' => state = .caret,
|
||
'{' => {
|
||
id = .l_brace;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'}' => {
|
||
id = .r_brace;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'~' => {
|
||
id = .tilde;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'.' => state = .period,
|
||
'-' => state = .minus,
|
||
'/' => state = .slash,
|
||
'&' => state = .ampersand,
|
||
'#' => state = .hash,
|
||
'0'...'9' => state = .pp_num,
|
||
'\t', '\x0B', '\x0C', ' ' => state = .whitespace,
|
||
'$' => if (self.langopts.dollars_in_identifiers) {
|
||
state = .extended_identifier;
|
||
} else {
|
||
id = .invalid;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
0x1A => if (self.langopts.ms_extensions) {
|
||
id = .eof;
|
||
break;
|
||
} else {
|
||
id = .invalid;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
0x80...0xFF => state = .extended_identifier,
|
||
else => {
|
||
id = .invalid;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
},
|
||
.whitespace => switch (c) {
|
||
'\t', '\x0B', '\x0C', ' ' => {},
|
||
else => {
|
||
id = .whitespace;
|
||
break;
|
||
},
|
||
},
|
||
.u => switch (c) {
|
||
'8' => {
|
||
state = .u8;
|
||
},
|
||
'\'' => {
|
||
id = .char_literal_utf_16;
|
||
state = .char_literal_start;
|
||
},
|
||
'\"' => {
|
||
id = .string_literal_utf_16;
|
||
state = .string_literal;
|
||
},
|
||
else => {
|
||
self.index -= 1;
|
||
state = .identifier;
|
||
},
|
||
},
|
||
.u8 => switch (c) {
|
||
'\"' => {
|
||
id = .string_literal_utf_8;
|
||
state = .string_literal;
|
||
},
|
||
'\'' => {
|
||
id = .char_literal_utf_8;
|
||
state = .char_literal_start;
|
||
},
|
||
else => {
|
||
self.index -= 1;
|
||
state = .identifier;
|
||
},
|
||
},
|
||
.U => switch (c) {
|
||
'\'' => {
|
||
id = .char_literal_utf_32;
|
||
state = .char_literal_start;
|
||
},
|
||
'\"' => {
|
||
id = .string_literal_utf_32;
|
||
state = .string_literal;
|
||
},
|
||
else => {
|
||
self.index -= 1;
|
||
state = .identifier;
|
||
},
|
||
},
|
||
.L => switch (c) {
|
||
'\'' => {
|
||
id = .char_literal_wide;
|
||
state = .char_literal_start;
|
||
},
|
||
'\"' => {
|
||
id = .string_literal_wide;
|
||
state = .string_literal;
|
||
},
|
||
else => {
|
||
self.index -= 1;
|
||
state = .identifier;
|
||
},
|
||
},
|
||
.string_literal => switch (c) {
|
||
'\\' => {
|
||
state = .string_escape_sequence;
|
||
},
|
||
'"' => {
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'\n' => {
|
||
id = .unterminated_string_literal;
|
||
break;
|
||
},
|
||
'\r' => unreachable,
|
||
else => {},
|
||
},
|
||
.char_literal_start => switch (c) {
|
||
'\\' => {
|
||
state = .char_escape_sequence;
|
||
},
|
||
'\'' => {
|
||
id = .empty_char_literal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'\n' => {
|
||
id = .unterminated_char_literal;
|
||
break;
|
||
},
|
||
else => {
|
||
state = .char_literal;
|
||
},
|
||
},
|
||
.char_literal => switch (c) {
|
||
'\\' => {
|
||
state = .char_escape_sequence;
|
||
},
|
||
'\'' => {
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'\n' => {
|
||
id = .unterminated_char_literal;
|
||
break;
|
||
},
|
||
else => {},
|
||
},
|
||
.char_escape_sequence => switch (c) {
|
||
'\r', '\n' => unreachable, // removed by line splicing
|
||
else => state = .char_literal,
|
||
},
|
||
.string_escape_sequence => switch (c) {
|
||
'\r', '\n' => unreachable, // removed by line splicing
|
||
else => state = .string_literal,
|
||
},
|
||
.identifier, .extended_identifier => switch (c) {
|
||
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
|
||
'$' => if (self.langopts.dollars_in_identifiers) {
|
||
state = .extended_identifier;
|
||
} else {
|
||
id = if (state == .identifier) Token.getTokenId(self.langopts, self.buf[start..self.index]) else .extended_identifier;
|
||
break;
|
||
},
|
||
0x80...0xFF => state = .extended_identifier,
|
||
else => {
|
||
id = if (state == .identifier) Token.getTokenId(self.langopts, self.buf[start..self.index]) else .extended_identifier;
|
||
break;
|
||
},
|
||
},
|
||
.equal => switch (c) {
|
||
'=' => {
|
||
id = .equal_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .equal;
|
||
break;
|
||
},
|
||
},
|
||
.bang => switch (c) {
|
||
'=' => {
|
||
id = .bang_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .bang;
|
||
break;
|
||
},
|
||
},
|
||
.pipe => switch (c) {
|
||
'=' => {
|
||
id = .pipe_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'|' => {
|
||
id = .pipe_pipe;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .pipe;
|
||
break;
|
||
},
|
||
},
|
||
.colon => switch (c) {
|
||
'>' => {
|
||
if (self.langopts.hasDigraphs()) {
|
||
id = .r_bracket;
|
||
self.index += 1;
|
||
} else {
|
||
id = .colon;
|
||
}
|
||
break;
|
||
},
|
||
':' => {
|
||
if (self.langopts.standard.atLeast(.c23)) {
|
||
id = .colon_colon;
|
||
self.index += 1;
|
||
break;
|
||
} else {
|
||
id = .colon;
|
||
break;
|
||
}
|
||
},
|
||
else => {
|
||
id = .colon;
|
||
break;
|
||
},
|
||
},
|
||
.percent => switch (c) {
|
||
'=' => {
|
||
id = .percent_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'>' => {
|
||
if (self.langopts.hasDigraphs()) {
|
||
id = .r_brace;
|
||
self.index += 1;
|
||
} else {
|
||
id = .percent;
|
||
}
|
||
break;
|
||
},
|
||
':' => {
|
||
if (self.langopts.hasDigraphs()) {
|
||
state = .hash_digraph;
|
||
} else {
|
||
id = .percent;
|
||
break;
|
||
}
|
||
},
|
||
else => {
|
||
id = .percent;
|
||
break;
|
||
},
|
||
},
|
||
.asterisk => switch (c) {
|
||
'=' => {
|
||
id = .asterisk_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .asterisk;
|
||
break;
|
||
},
|
||
},
|
||
.plus => switch (c) {
|
||
'=' => {
|
||
id = .plus_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'+' => {
|
||
id = .plus_plus;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .plus;
|
||
break;
|
||
},
|
||
},
|
||
.angle_bracket_left => switch (c) {
|
||
'<' => state = .angle_bracket_angle_bracket_left,
|
||
'=' => {
|
||
id = .angle_bracket_left_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
':' => {
|
||
if (self.langopts.hasDigraphs()) {
|
||
id = .l_bracket;
|
||
self.index += 1;
|
||
} else {
|
||
id = .angle_bracket_left;
|
||
}
|
||
break;
|
||
},
|
||
'%' => {
|
||
if (self.langopts.hasDigraphs()) {
|
||
id = .l_brace;
|
||
self.index += 1;
|
||
} else {
|
||
id = .angle_bracket_left;
|
||
}
|
||
break;
|
||
},
|
||
else => {
|
||
id = .angle_bracket_left;
|
||
break;
|
||
},
|
||
},
|
||
.angle_bracket_angle_bracket_left => switch (c) {
|
||
'=' => {
|
||
id = .angle_bracket_angle_bracket_left_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .angle_bracket_angle_bracket_left;
|
||
break;
|
||
},
|
||
},
|
||
.angle_bracket_right => switch (c) {
|
||
'>' => state = .angle_bracket_angle_bracket_right,
|
||
'=' => {
|
||
id = .angle_bracket_right_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .angle_bracket_right;
|
||
break;
|
||
},
|
||
},
|
||
.angle_bracket_angle_bracket_right => switch (c) {
|
||
'=' => {
|
||
id = .angle_bracket_angle_bracket_right_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .angle_bracket_angle_bracket_right;
|
||
break;
|
||
},
|
||
},
|
||
.caret => switch (c) {
|
||
'=' => {
|
||
id = .caret_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .caret;
|
||
break;
|
||
},
|
||
},
|
||
.period => switch (c) {
|
||
'.' => state = .period2,
|
||
'0'...'9' => state = .pp_num,
|
||
else => {
|
||
id = .period;
|
||
break;
|
||
},
|
||
},
|
||
.period2 => switch (c) {
|
||
'.' => {
|
||
id = .ellipsis;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .period;
|
||
self.index -= 1;
|
||
break;
|
||
},
|
||
},
|
||
.minus => switch (c) {
|
||
'>' => {
|
||
id = .arrow;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'=' => {
|
||
id = .minus_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'-' => {
|
||
id = .minus_minus;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .minus;
|
||
break;
|
||
},
|
||
},
|
||
.ampersand => switch (c) {
|
||
'&' => {
|
||
id = .ampersand_ampersand;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'=' => {
|
||
id = .ampersand_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .ampersand;
|
||
break;
|
||
},
|
||
},
|
||
.hash => switch (c) {
|
||
'#' => {
|
||
id = .hash_hash;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .hash;
|
||
break;
|
||
},
|
||
},
|
||
.hash_digraph => switch (c) {
|
||
'%' => state = .hash_hash_digraph_partial,
|
||
else => {
|
||
id = .hash;
|
||
break;
|
||
},
|
||
},
|
||
.hash_hash_digraph_partial => switch (c) {
|
||
':' => {
|
||
id = .hash_hash;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .hash;
|
||
self.index -= 1; // re-tokenize the percent
|
||
break;
|
||
},
|
||
},
|
||
.slash => switch (c) {
|
||
'/' => state = .line_comment,
|
||
'*' => state = .multi_line_comment,
|
||
'=' => {
|
||
id = .slash_equal;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .slash;
|
||
break;
|
||
},
|
||
},
|
||
.line_comment => switch (c) {
|
||
'\n' => {
|
||
if (self.langopts.preserve_comments) {
|
||
id = .comment;
|
||
break;
|
||
}
|
||
self.index -= 1;
|
||
state = .start;
|
||
},
|
||
else => {},
|
||
},
|
||
.multi_line_comment => switch (c) {
|
||
'*' => state = .multi_line_comment_asterisk,
|
||
'\n' => self.line += 1,
|
||
else => {},
|
||
},
|
||
.multi_line_comment_asterisk => switch (c) {
|
||
'/' => {
|
||
if (self.langopts.preserve_comments) {
|
||
self.index += 1;
|
||
id = .comment;
|
||
break;
|
||
}
|
||
state = .multi_line_comment_done;
|
||
},
|
||
'\n' => {
|
||
self.line += 1;
|
||
state = .multi_line_comment;
|
||
},
|
||
'*' => {},
|
||
else => state = .multi_line_comment,
|
||
},
|
||
.multi_line_comment_done => switch (c) {
|
||
'\n' => {
|
||
start = self.index;
|
||
id = .nl;
|
||
self.index += 1;
|
||
self.line += 1;
|
||
break;
|
||
},
|
||
'\r' => unreachable,
|
||
'\t', '\x0B', '\x0C', ' ' => {
|
||
start = self.index;
|
||
state = .whitespace;
|
||
},
|
||
else => {
|
||
id = .whitespace;
|
||
break;
|
||
},
|
||
},
|
||
.pp_num => switch (c) {
|
||
'a'...'d',
|
||
'A'...'D',
|
||
'f'...'o',
|
||
'F'...'O',
|
||
'q'...'z',
|
||
'Q'...'Z',
|
||
'0'...'9',
|
||
'_',
|
||
'.',
|
||
=> {},
|
||
'e', 'E', 'p', 'P' => state = .pp_num_exponent,
|
||
'\'' => if (self.langopts.standard.atLeast(.c23)) {
|
||
state = .pp_num_digit_separator;
|
||
} else {
|
||
id = .pp_num;
|
||
break;
|
||
},
|
||
else => {
|
||
id = .pp_num;
|
||
break;
|
||
},
|
||
},
|
||
.pp_num_digit_separator => switch (c) {
|
||
'a'...'d',
|
||
'A'...'D',
|
||
'f'...'o',
|
||
'F'...'O',
|
||
'q'...'z',
|
||
'Q'...'Z',
|
||
'0'...'9',
|
||
'_',
|
||
=> state = .pp_num,
|
||
else => {
|
||
self.index -= 1;
|
||
id = .pp_num;
|
||
break;
|
||
},
|
||
},
|
||
.pp_num_exponent => switch (c) {
|
||
'a'...'o',
|
||
'q'...'z',
|
||
'A'...'O',
|
||
'Q'...'Z',
|
||
'0'...'9',
|
||
'_',
|
||
'.',
|
||
'+',
|
||
'-',
|
||
=> state = .pp_num,
|
||
'p', 'P' => {},
|
||
else => {
|
||
id = .pp_num;
|
||
break;
|
||
},
|
||
},
|
||
}
|
||
} else if (self.index == self.buf.len) {
|
||
switch (state) {
|
||
.start, .line_comment => {},
|
||
.u, .u8, .U, .L, .identifier => id = Token.getTokenId(self.langopts, self.buf[start..self.index]),
|
||
.extended_identifier => id = .extended_identifier,
|
||
|
||
.period2 => {
|
||
self.index -= 1;
|
||
id = .period;
|
||
},
|
||
|
||
.multi_line_comment,
|
||
.multi_line_comment_asterisk,
|
||
=> id = .unterminated_comment,
|
||
|
||
.char_escape_sequence, .char_literal, .char_literal_start => id = .unterminated_char_literal,
|
||
.string_escape_sequence, .string_literal => id = .unterminated_string_literal,
|
||
|
||
.whitespace => id = .whitespace,
|
||
.multi_line_comment_done => id = .whitespace,
|
||
|
||
.equal => id = .equal,
|
||
.bang => id = .bang,
|
||
.minus => id = .minus,
|
||
.slash => id = .slash,
|
||
.ampersand => id = .ampersand,
|
||
.hash => id = .hash,
|
||
.period => id = .period,
|
||
.pipe => id = .pipe,
|
||
.angle_bracket_angle_bracket_right => id = .angle_bracket_angle_bracket_right,
|
||
.angle_bracket_right => id = .angle_bracket_right,
|
||
.angle_bracket_angle_bracket_left => id = .angle_bracket_angle_bracket_left,
|
||
.angle_bracket_left => id = .angle_bracket_left,
|
||
.plus => id = .plus,
|
||
.colon => id = .colon,
|
||
.percent => id = .percent,
|
||
.caret => id = .caret,
|
||
.asterisk => id = .asterisk,
|
||
.hash_digraph => id = .hash,
|
||
.hash_hash_digraph_partial => {
|
||
id = .hash;
|
||
self.index -= 1; // re-tokenize the percent
|
||
},
|
||
.pp_num, .pp_num_exponent, .pp_num_digit_separator => id = .pp_num,
|
||
}
|
||
}
|
||
|
||
return .{
|
||
.id = id,
|
||
.start = start,
|
||
.end = self.index,
|
||
.line = self.line,
|
||
.source = self.source,
|
||
};
|
||
}
|
||
|
||
pub fn nextNoWS(self: *Tokenizer) Token {
|
||
var tok = self.next();
|
||
while (tok.id == .whitespace or tok.id == .comment) tok = self.next();
|
||
return tok;
|
||
}
|
||
|
||
pub fn nextNoWSComments(self: *Tokenizer) Token {
|
||
var tok = self.next();
|
||
while (tok.id == .whitespace) tok = self.next();
|
||
return tok;
|
||
}
|
||
|
||
/// Try to tokenize a '::' even if not supported by the current language standard.
|
||
pub fn colonColon(self: *Tokenizer) Token {
|
||
var tok = self.nextNoWS();
|
||
if (tok.id == .colon and self.buf[self.index] == ':') {
|
||
self.index += 1;
|
||
tok.id = .colon_colon;
|
||
}
|
||
return tok;
|
||
}
|
||
|
||
test "operators" {
|
||
try expectTokens(
|
||
\\ ! != | || |= = ==
|
||
\\ ( ) { } [ ] . .. ...
|
||
\\ ^ ^= + ++ += - -- -=
|
||
\\ * *= % %= -> : ; / /=
|
||
\\ , & && &= ? < <= <<
|
||
\\ <<= > >= >> >>= ~ # ##
|
||
\\
|
||
, &.{
|
||
.bang,
|
||
.bang_equal,
|
||
.pipe,
|
||
.pipe_pipe,
|
||
.pipe_equal,
|
||
.equal,
|
||
.equal_equal,
|
||
.nl,
|
||
.l_paren,
|
||
.r_paren,
|
||
.l_brace,
|
||
.r_brace,
|
||
.l_bracket,
|
||
.r_bracket,
|
||
.period,
|
||
.period,
|
||
.period,
|
||
.ellipsis,
|
||
.nl,
|
||
.caret,
|
||
.caret_equal,
|
||
.plus,
|
||
.plus_plus,
|
||
.plus_equal,
|
||
.minus,
|
||
.minus_minus,
|
||
.minus_equal,
|
||
.nl,
|
||
.asterisk,
|
||
.asterisk_equal,
|
||
.percent,
|
||
.percent_equal,
|
||
.arrow,
|
||
.colon,
|
||
.semicolon,
|
||
.slash,
|
||
.slash_equal,
|
||
.nl,
|
||
.comma,
|
||
.ampersand,
|
||
.ampersand_ampersand,
|
||
.ampersand_equal,
|
||
.question_mark,
|
||
.angle_bracket_left,
|
||
.angle_bracket_left_equal,
|
||
.angle_bracket_angle_bracket_left,
|
||
.nl,
|
||
.angle_bracket_angle_bracket_left_equal,
|
||
.angle_bracket_right,
|
||
.angle_bracket_right_equal,
|
||
.angle_bracket_angle_bracket_right,
|
||
.angle_bracket_angle_bracket_right_equal,
|
||
.tilde,
|
||
.hash,
|
||
.hash_hash,
|
||
.nl,
|
||
});
|
||
}
|
||
|
||
test "keywords" {
|
||
try expectTokens(
|
||
\\auto __auto_type break case char const continue default do
|
||
\\double else enum extern float for goto if int
|
||
\\long register return short signed sizeof static
|
||
\\struct switch typedef union unsigned void volatile
|
||
\\while _Bool _Complex _Imaginary inline restrict _Alignas
|
||
\\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local
|
||
\\__attribute __attribute__
|
||
\\
|
||
, &.{
|
||
.keyword_auto,
|
||
.keyword_auto_type,
|
||
.keyword_break,
|
||
.keyword_case,
|
||
.keyword_char,
|
||
.keyword_const,
|
||
.keyword_continue,
|
||
.keyword_default,
|
||
.keyword_do,
|
||
.nl,
|
||
.keyword_double,
|
||
.keyword_else,
|
||
.keyword_enum,
|
||
.keyword_extern,
|
||
.keyword_float,
|
||
.keyword_for,
|
||
.keyword_goto,
|
||
.keyword_if,
|
||
.keyword_int,
|
||
.nl,
|
||
.keyword_long,
|
||
.keyword_register,
|
||
.keyword_return,
|
||
.keyword_short,
|
||
.keyword_signed,
|
||
.keyword_sizeof,
|
||
.keyword_static,
|
||
.nl,
|
||
.keyword_struct,
|
||
.keyword_switch,
|
||
.keyword_typedef,
|
||
.keyword_union,
|
||
.keyword_unsigned,
|
||
.keyword_void,
|
||
.keyword_volatile,
|
||
.nl,
|
||
.keyword_while,
|
||
.keyword_bool,
|
||
.keyword_complex,
|
||
.keyword_imaginary,
|
||
.keyword_inline,
|
||
.keyword_restrict,
|
||
.keyword_alignas,
|
||
.nl,
|
||
.keyword_alignof,
|
||
.keyword_atomic,
|
||
.keyword_generic,
|
||
.keyword_noreturn,
|
||
.keyword_static_assert,
|
||
.keyword_thread_local,
|
||
.nl,
|
||
.keyword_attribute1,
|
||
.keyword_attribute2,
|
||
.nl,
|
||
});
|
||
}
|
||
|
||
test "preprocessor keywords" {
|
||
try expectTokens(
|
||
\\#include
|
||
\\#include_next
|
||
\\#embed
|
||
\\#define
|
||
\\#ifdef
|
||
\\#ifndef
|
||
\\#error
|
||
\\#pragma
|
||
\\
|
||
, &.{
|
||
.hash,
|
||
.keyword_include,
|
||
.nl,
|
||
.hash,
|
||
.keyword_include_next,
|
||
.nl,
|
||
.hash,
|
||
.keyword_embed,
|
||
.nl,
|
||
.hash,
|
||
.keyword_define,
|
||
.nl,
|
||
.hash,
|
||
.keyword_ifdef,
|
||
.nl,
|
||
.hash,
|
||
.keyword_ifndef,
|
||
.nl,
|
||
.hash,
|
||
.keyword_error,
|
||
.nl,
|
||
.hash,
|
||
.keyword_pragma,
|
||
.nl,
|
||
});
|
||
}
|
||
|
||
test "line continuation" {
|
||
try expectTokens(
|
||
\\#define foo \
|
||
\\ bar
|
||
\\"foo\
|
||
\\ bar"
|
||
\\#define "foo"
|
||
\\ "bar"
|
||
\\#define "foo" \
|
||
\\ "bar"
|
||
, &.{
|
||
.hash,
|
||
.keyword_define,
|
||
.identifier,
|
||
.identifier,
|
||
.nl,
|
||
.string_literal,
|
||
.nl,
|
||
.hash,
|
||
.keyword_define,
|
||
.string_literal,
|
||
.nl,
|
||
.string_literal,
|
||
.nl,
|
||
.hash,
|
||
.keyword_define,
|
||
.string_literal,
|
||
.string_literal,
|
||
});
|
||
}
|
||
|
||
test "string prefix" {
|
||
try expectTokens(
|
||
\\"foo"
|
||
\\u"foo"
|
||
\\u8"foo"
|
||
\\U"foo"
|
||
\\L"foo"
|
||
\\'foo'
|
||
\\u8'A'
|
||
\\u'foo'
|
||
\\U'foo'
|
||
\\L'foo'
|
||
\\
|
||
, &.{
|
||
.string_literal,
|
||
.nl,
|
||
.string_literal_utf_16,
|
||
.nl,
|
||
.string_literal_utf_8,
|
||
.nl,
|
||
.string_literal_utf_32,
|
||
.nl,
|
||
.string_literal_wide,
|
||
.nl,
|
||
.char_literal,
|
||
.nl,
|
||
.char_literal_utf_8,
|
||
.nl,
|
||
.char_literal_utf_16,
|
||
.nl,
|
||
.char_literal_utf_32,
|
||
.nl,
|
||
.char_literal_wide,
|
||
.nl,
|
||
});
|
||
}
|
||
|
||
test "num suffixes" {
|
||
try expectTokens(
|
||
\\ 1.0f 1.0L 1.0 .0 1. 0x1p0f 0X1p0
|
||
\\ 0l 0lu 0ll 0llu 0
|
||
\\ 1u 1ul 1ull 1
|
||
\\ 1.0i 1.0I
|
||
\\ 1.0if 1.0If 1.0fi 1.0fI
|
||
\\ 1.0il 1.0Il 1.0li 1.0lI
|
||
\\
|
||
, &.{
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.nl,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.nl,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.nl,
|
||
.pp_num,
|
||
.pp_num,
|
||
.nl,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.nl,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.pp_num,
|
||
.nl,
|
||
});
|
||
}
|
||
|
||
test "comments" {
|
||
try expectTokens(
|
||
\\//foo
|
||
\\#foo
|
||
, &.{
|
||
.nl,
|
||
.hash,
|
||
.identifier,
|
||
});
|
||
}
|
||
|
||
test "extended identifiers" {
|
||
try expectTokens("𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier});
|
||
try expectTokens("u𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier});
|
||
try expectTokens("u8𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier});
|
||
try expectTokens("U𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier});
|
||
try expectTokens("L𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier});
|
||
try expectTokens("1™", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("1.™", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("..™", &.{ .period, .period, .extended_identifier });
|
||
try expectTokens("0™", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("0b\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("0b0\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("01\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("010\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("0x\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("0x0\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("\"\\0\u{E0000}\"", &.{.string_literal});
|
||
try expectTokens("\"\\x\u{E0000}\"", &.{.string_literal});
|
||
try expectTokens("\"\\u\u{E0000}\"", &.{.string_literal});
|
||
try expectTokens("1e\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
try expectTokens("1e1\u{E0000}", &.{ .pp_num, .extended_identifier });
|
||
}
|
||
|
||
test "digraphs" {
|
||
try expectTokens("%:<::><%%>%:%:", &.{ .hash, .l_bracket, .r_bracket, .l_brace, .r_brace, .hash_hash });
|
||
try expectTokens("\"%:<::><%%>%:%:\"", &.{.string_literal});
|
||
try expectTokens("%:%42 %:%", &.{ .hash, .percent, .pp_num, .hash, .percent });
|
||
}
|
||
|
||
test "C23 keywords" {
|
||
try expectTokensExtra("true false alignas alignof bool static_assert thread_local nullptr typeof_unqual", &.{
|
||
.keyword_true,
|
||
.keyword_false,
|
||
.keyword_c23_alignas,
|
||
.keyword_c23_alignof,
|
||
.keyword_c23_bool,
|
||
.keyword_c23_static_assert,
|
||
.keyword_c23_thread_local,
|
||
.keyword_nullptr,
|
||
.keyword_typeof_unqual,
|
||
}, .c23);
|
||
}
|
||
|
||
fn expectTokensExtra(contents: []const u8, expected_tokens: []const Token.Id, standard: ?LangOpts.Standard) !void {
|
||
var comp = Compilation.init(std.testing.allocator);
|
||
defer comp.deinit();
|
||
if (standard) |provided| {
|
||
comp.langopts.standard = provided;
|
||
}
|
||
const source = try comp.addSourceFromBuffer("path", contents);
|
||
var tokenizer = Tokenizer{
|
||
.buf = source.buf,
|
||
.source = source.id,
|
||
.langopts = comp.langopts,
|
||
};
|
||
var i: usize = 0;
|
||
while (i < expected_tokens.len) {
|
||
const token = tokenizer.next();
|
||
if (token.id == .whitespace) continue;
|
||
const expected_token_id = expected_tokens[i];
|
||
i += 1;
|
||
if (!std.meta.eql(token.id, expected_token_id)) {
|
||
std.debug.print("expected {s}, found {s}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
|
||
return error.TokensDoNotEqual;
|
||
}
|
||
}
|
||
const last_token = tokenizer.next();
|
||
try std.testing.expect(last_token.id == .eof);
|
||
}
|
||
|
||
fn expectTokens(contents: []const u8, expected_tokens: []const Token.Id) !void {
|
||
return expectTokensExtra(contents, expected_tokens, null);
|
||
}
|