diff --git a/src/c_tokenizer.cpp b/src/c_tokenizer.cpp index 3532e7db91..920bcacf90 100644 --- a/src/c_tokenizer.cpp +++ b/src/c_tokenizer.cpp @@ -107,8 +107,11 @@ static void begin_token(CTokenize *ctok, CTokId id) { memset(&ctok->cur_tok->data.symbol, 0, sizeof(Buf)); buf_resize(&ctok->cur_tok->data.symbol, 0); break; - case CTokIdCharLit: case CTokIdNumLitInt: + ctok->cur_tok->data.num_lit_int.x = 0; + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixNone; + break; + case CTokIdCharLit: case CTokIdNumLitFloat: case CTokIdMinus: break; @@ -138,9 +141,9 @@ static void add_char(CTokenize *ctok, uint8_t c) { static void hex_digit(CTokenize *ctok, uint8_t value) { // TODO @mul_with_overflow - ctok->cur_tok->data.num_lit_int *= 16; + ctok->cur_tok->data.num_lit_int.x *= 16; // TODO @add_with_overflow - ctok->cur_tok->data.num_lit_int += value; + ctok->cur_tok->data.num_lit_int.x += value; static const uint8_t hex_digit[] = "0123456789abcdef"; buf_append_char(&ctok->buf, hex_digit[value]); @@ -194,19 +197,15 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { break; case DIGIT_NON_ZERO: ctok->state = CTokStateDecimal; - ctok->unsigned_suffix = false; - ctok->long_suffix = false; begin_token(ctok, CTokIdNumLitInt); - ctok->cur_tok->data.num_lit_int = *c - '0'; + ctok->cur_tok->data.num_lit_int.x = *c - '0'; buf_resize(&ctok->buf, 0); buf_append_char(&ctok->buf, *c); break; case '0': ctok->state = CTokStateGotZero; - ctok->unsigned_suffix = false; - ctok->long_suffix = false; begin_token(ctok, CTokIdNumLitInt); - ctok->cur_tok->data.num_lit_int = 0; + ctok->cur_tok->data.num_lit_int.x = 0; buf_resize(&ctok->buf, 0); buf_append_char(&ctok->buf, '0'); break; @@ -289,21 +288,21 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { buf_append_char(&ctok->buf, *c); // TODO @mul_with_overflow - ctok->cur_tok->data.num_lit_int *= 10; + ctok->cur_tok->data.num_lit_int.x *= 10; // TODO @add_with_overflow - ctok->cur_tok->data.num_lit_int += *c - '0'; + ctok->cur_tok->data.num_lit_int.x += *c - '0'; break; case '\'': break; case 'u': case 'U': - ctok->unsigned_suffix = true; - ctok->state = CTokStateIntSuffix; + ctok->state = CTokStateNumLitIntSuffixU; + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU; break; case 'l': case 'L': - ctok->long_suffix = true; - ctok->state = CTokStateIntSuffixLong; + ctok->state = CTokStateNumLitIntSuffixL; + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL; break; case '.': buf_append_char(&ctok->buf, '.'); @@ -317,50 +316,6 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { continue; } break; - case CTokStateIntSuffix: - switch (*c) { - case 'l': - case 'L': - if (ctok->long_suffix) { - return mark_error(ctok); - } - ctok->long_suffix = true; - ctok->state = CTokStateIntSuffixLong; - break; - case 'u': - case 'U': - if (ctok->unsigned_suffix) { - return mark_error(ctok); - } - ctok->unsigned_suffix = true; - break; - default: - c -= 1; - end_token(ctok); - ctok->state = CTokStateStart; - continue; - } - break; - case CTokStateIntSuffixLong: - switch (*c) { - case 'l': - case 'L': - ctok->state = CTokStateIntSuffix; - break; - case 'u': - case 'U': - if (ctok->unsigned_suffix) { - return mark_error(ctok); - } - ctok->unsigned_suffix = true; - break; - default: - c -= 1; - end_token(ctok); - ctok->state = CTokStateStart; - continue; - } - break; case CTokStateGotZero: switch (*c) { case 'x': @@ -389,9 +344,9 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { case '6': case '7': // TODO @mul_with_overflow - ctok->cur_tok->data.num_lit_int *= 8; + ctok->cur_tok->data.num_lit_int.x *= 8; // TODO @add_with_overflow - ctok->cur_tok->data.num_lit_int += *c - '0'; + ctok->cur_tok->data.num_lit_int.x += *c - '0'; break; case '8': case '9': @@ -466,6 +421,82 @@ void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { ctok->cur_tok->id = CTokIdNumLitFloat; ctok->state = CTokStateExpSign; break; + case 'u': + case 'U': + // marks the number literal as unsigned + ctok->state = CTokStateNumLitIntSuffixU; + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixU; + break; + case 'l': + case 'L': + // marks the number literal as long + ctok->state = CTokStateNumLitIntSuffixL; + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixL; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateNumLitIntSuffixU: + switch (*c) { + case 'l': + case 'L': + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU; + ctok->state = CTokStateNumLitIntSuffixUL; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateNumLitIntSuffixL: + switch (*c) { + case 'l': + case 'L': + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLL; + ctok->state = CTokStateNumLitIntSuffixLL; + break; + case 'u': + case 'U': + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLU; + end_token(ctok); + ctok->state = CTokStateStart; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateNumLitIntSuffixLL: + switch (*c) { + case 'u': + case 'U': + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU; + end_token(ctok); + ctok->state = CTokStateStart; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateNumLitIntSuffixUL: + switch (*c) { + case 'l': + case 'L': + ctok->cur_tok->data.num_lit_int.suffix = CNumLitSuffixLLU; + end_token(ctok); + ctok->state = CTokStateStart; + break; default: c -= 1; end_token(ctok); @@ -681,8 +712,10 @@ found_end_of_macro: case CTokStateHex: case CTokStateOctal: case CTokStateGotZero: - case CTokStateIntSuffix: - case CTokStateIntSuffixLong: + case CTokStateNumLitIntSuffixU: + case CTokStateNumLitIntSuffixL: + case CTokStateNumLitIntSuffixUL: + case CTokStateNumLitIntSuffixLL: end_token(ctok); break; case CTokStateFloat: diff --git a/src/c_tokenizer.hpp b/src/c_tokenizer.hpp index 06b2437c88..573391239f 100644 --- a/src/c_tokenizer.hpp +++ b/src/c_tokenizer.hpp @@ -20,12 +20,26 @@ enum CTokId { CTokIdMinus, }; +enum CNumLitSuffix { + CNumLitSuffixNone, + CNumLitSuffixL, + CNumLitSuffixU, + CNumLitSuffixLU, + CNumLitSuffixLL, + CNumLitSuffixLLU, +}; + +struct CNumLitInt { + uint64_t x; + CNumLitSuffix suffix; +}; + struct CTok { enum CTokId id; union { uint8_t char_lit; Buf str_lit; - uint64_t num_lit_int; + CNumLitInt num_lit_int; double num_lit_float; Buf symbol; } data; @@ -47,13 +61,15 @@ enum CTokState { CTokStateOctal, CTokStateGotZero, CTokStateHex, - CTokStateIntSuffix, - CTokStateIntSuffixLong, CTokStateFloat, CTokStateExpSign, CTokStateFloatExp, CTokStateFloatExpFirst, CTokStateStrOctal, + CTokStateNumLitIntSuffixU, + CTokStateNumLitIntSuffixL, + CTokStateNumLitIntSuffixLL, + CTokStateNumLitIntSuffixUL, }; struct CTokenize { @@ -62,8 +78,6 @@ struct CTokenize { bool error; CTok *cur_tok; Buf buf; - bool unsigned_suffix; - bool long_suffix; uint8_t cur_char; int octal_index; }; diff --git a/src/parseh.cpp b/src/parseh.cpp index ddf8cb11e8..fa474850b2 100644 --- a/src/parseh.cpp +++ b/src/parseh.cpp @@ -162,12 +162,16 @@ static Tld *create_global_str_lit_var(Context *c, Buf *name, Buf *value) { return &tld_var->base; } -static Tld *create_global_num_lit_unsigned_negative(Context *c, Buf *name, uint64_t x, bool negative) { - ConstExprValue *var_val = create_const_unsigned_negative(c->codegen->builtin_types.entry_num_lit_int, x, negative); +static Tld *create_global_num_lit_unsigned_negative_type(Context *c, Buf *name, uint64_t x, bool negative, TypeTableEntry *type_entry) { + ConstExprValue *var_val = create_const_unsigned_negative(type_entry, x, negative); TldVar *tld_var = create_global_var(c, name, var_val, true); return &tld_var->base; } +static Tld *create_global_num_lit_unsigned_negative(Context *c, Buf *name, uint64_t x, bool negative) { + return create_global_num_lit_unsigned_negative_type(c, name, x, negative, c->codegen->builtin_types.entry_num_lit_int); +} + static Tld *create_global_num_lit_float(Context *c, Buf *name, double value) { ConstExprValue *var_val = create_const_float(c->codegen->builtin_types.entry_num_lit_float, value); TldVar *tld_var = create_global_var(c, name, var_val, true); @@ -1149,7 +1153,32 @@ static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *ch return; case CTokIdNumLitInt: if (is_last) { - Tld *tld = create_global_num_lit_unsigned_negative(c, name, tok->data.num_lit_int, negate); + Tld *tld; + switch (tok->data.num_lit_int.suffix) { + case CNumLitSuffixNone: + tld = create_global_num_lit_unsigned_negative(c, name, tok->data.num_lit_int.x, negate); + break; + case CNumLitSuffixL: + tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate, + c->codegen->builtin_types.entry_c_int[CIntTypeLong]); + break; + case CNumLitSuffixU: + tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate, + c->codegen->builtin_types.entry_c_int[CIntTypeUInt]); + break; + case CNumLitSuffixLU: + tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate, + c->codegen->builtin_types.entry_c_int[CIntTypeULong]); + break; + case CNumLitSuffixLL: + tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate, + c->codegen->builtin_types.entry_c_int[CIntTypeLongLong]); + break; + case CNumLitSuffixLLU: + tld = create_global_num_lit_unsigned_negative_type(c, name, tok->data.num_lit_int.x, negate, + c->codegen->builtin_types.entry_c_int[CIntTypeULongLong]); + break; + } c->macro_table.put(name, tld); } return; diff --git a/test/parseh.zig b/test/parseh.zig index 08740a0acd..08889c1f2b 100644 --- a/test/parseh.zig +++ b/test/parseh.zig @@ -217,6 +217,48 @@ pub fn addCases(cases: &tests.ParseHContext) { \\pub const SDL_INIT_VIDEO = 32; ); + cases.add("u integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020u /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , + \\pub const SDL_INIT_VIDEO: c_uint = 32; + ); + + cases.add("l integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020l /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , + \\pub const SDL_INIT_VIDEO: c_long = 32; + ); + + cases.add("ul integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020ul /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , + \\pub const SDL_INIT_VIDEO: c_ulong = 32; + ); + + cases.add("lu integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020lu /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , + \\pub const SDL_INIT_VIDEO: c_ulong = 32; + ); + + cases.add("ll integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020ll /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , + \\pub const SDL_INIT_VIDEO: c_longlong = 32; + ); + + cases.add("ull integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020ull /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , + \\pub const SDL_INIT_VIDEO: c_ulonglong = 32; + ); + + cases.add("llu integer suffix after hex literal", + \\#define SDL_INIT_VIDEO 0x00000020llu /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + , + \\pub const SDL_INIT_VIDEO: c_ulonglong = 32; + ); + cases.add("zig keywords in C code", \\struct comptime { \\ int defer;