diff --git a/src/analyze.cpp b/src/analyze.cpp index 5ba0ae16a7..5e6b0a3558 100644 --- a/src/analyze.cpp +++ b/src/analyze.cpp @@ -640,13 +640,7 @@ static bool num_lit_fits_in_other_type(CodeGen *g, TypeTableEntry *literal_type, case TypeTableEntryIdStruct: return false; case TypeTableEntryIdInt: - if (is_num_lit_signed(num_lit)) { - if (!other_type->data.integral.is_signed) { - return false; - } - - return lit_size_in_bits <= other_type->size_in_bits; - } else if (is_num_lit_unsigned(num_lit)) { + if (is_num_lit_unsigned(num_lit)) { return lit_size_in_bits <= other_type->size_in_bits; } else { @@ -883,12 +877,6 @@ static TypeTableEntry * resolve_number_literals(CodeGen *g, AstNode *node1, AstN codegen_num_lit_1->resolved_type = g->builtin_types.entry_f64; codegen_num_lit_2->resolved_type = g->builtin_types.entry_f64; return g->builtin_types.entry_f64; - } else if (is_num_lit_signed(type1->data.num_lit.kind) && - is_num_lit_signed(type2->data.num_lit.kind)) - { - codegen_num_lit_1->resolved_type = g->builtin_types.entry_i64; - codegen_num_lit_2->resolved_type = g->builtin_types.entry_i64; - return g->builtin_types.entry_i64; } else if (is_num_lit_unsigned(type1->data.num_lit.kind) && is_num_lit_unsigned(type2->data.num_lit.kind)) { diff --git a/src/codegen.cpp b/src/codegen.cpp index 254e9f4d23..699769f513 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1254,13 +1254,9 @@ static const NumLit num_lit_kinds[] = { NumLitF32, NumLitF64, NumLitF128, - NumLitI8, NumLitU8, - NumLitI16, NumLitU16, - NumLitI32, NumLitU32, - NumLitI64, NumLitU64, }; @@ -1388,7 +1384,7 @@ static void define_builtin_types(CodeGen *g) { } { TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat); - entry->type_ref = LLVMFloatType(); + entry->type_ref = LLVMDoubleType(); buf_init_from_str(&entry->name, "f64"); entry->size_in_bits = 64; entry->align_in_bits = 64; diff --git a/src/parser.cpp b/src/parser.cpp index 853263cd2e..a0a635da7c 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -11,6 +11,7 @@ #include #include +#include static const char *bin_op_str(BinOpType bin_op) { @@ -278,9 +279,7 @@ void ast_print(AstNode *node, int indent) { NumLit num_lit = node->data.number_literal.kind; const char *name = node_type_str(node->type); const char *kind_str = num_lit_str(num_lit); - if (is_num_lit_signed(num_lit)) { - fprintf(stderr, "%s %s %" PRId64 "\n", name, kind_str, node->data.number_literal.data.x_int); - } else if (is_num_lit_unsigned(num_lit)) { + if (is_num_lit_unsigned(num_lit)) { fprintf(stderr, "%s %s %" PRIu64 "\n", name, kind_str, node->data.number_literal.data.x_uint); } else { fprintf(stderr, "%s %s %f\n", name, kind_str, node->data.number_literal.data.x_float); @@ -585,187 +584,152 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool if (offset_map) offset_map->append(pos); } -enum ParseNumLitState { - ParseNumLitStateStart, - ParseNumLitStateBase, - ParseNumLitStateDigits, - ParseNumLitStateExpectFirstDigit, - ParseNumLitStateDecimal, - ParseNumLitStateESign, - ParseNumLitStateEDigit, -}; +static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix, + unsigned long long initial_value, bool *overflow) { + unsigned long long x = initial_value; + + for (int i = digits_start; i < digits_end; i++) { + uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i); + unsigned long long digit = get_digit_value(c); + + // x *= radix; + if (__builtin_umulll_overflow(x, radix, &x)) { + *overflow = true; + return 0; + } + + // x += digit + if (__builtin_uaddll_overflow(x, digit, &x)) { + *overflow = true; + return 0; + } + } + return x; +} static void parse_number_literal(ParseContext *pc, Token *token, AstNodeNumberLiteral *num_lit) { - ParseNumLitState state = ParseNumLitStateStart; - unsigned long long base = 10; - bool negative = false; - int digits_start; - int digits_end; - int decimal_start = -1; - int decimal_end; - bool e_present = false; - bool e_positive; - int e_digit_start; - int e_digit_end; + assert(token->id == TokenIdNumberLiteral); - for (int i = token->start_pos; i < token->end_pos; i += 1) { - uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i); - switch (state) { - case ParseNumLitStateStart: - if (c == '-') { - negative = true; - } else if (c == '0') { - state = ParseNumLitStateBase; - } else if (c >= '1' && c <= '9') { - digits_start = i; - state = ParseNumLitStateDigits; - } else { - zig_unreachable(); - } - break; - case ParseNumLitStateBase: - if (c == 'x') { - base = 16; - state = ParseNumLitStateExpectFirstDigit; - } else if (c == 'o') { - base = 8; - state = ParseNumLitStateExpectFirstDigit; - } else if (c == 'b') { - base = 2; - state = ParseNumLitStateExpectFirstDigit; - } else { - zig_unreachable(); - } - break; - - case ParseNumLitStateExpectFirstDigit: - state = ParseNumLitStateDigits; - break; - - case ParseNumLitStateDigits: - if (c == '.') { - assert(base == 10); - digits_end = i; - decimal_start = i + 1; - state = ParseNumLitStateDecimal; - } - break; - case ParseNumLitStateDecimal: - if (c == 'E') { - e_present = false; - decimal_end = i; - state = ParseNumLitStateESign; - } - break; - case ParseNumLitStateESign: - if (c == '+') { - e_positive = true; - e_digit_start = i + 1; - state = ParseNumLitStateEDigit; - } else if (c == '-') { - e_positive = false; - e_digit_start = i + 1; - state = ParseNumLitStateEDigit; - } else { - zig_unreachable(); - } - break; - case ParseNumLitStateEDigit: - assert(c >= '0' && c <= '9'); - break; - } + int whole_number_start = token->start_pos; + if (token->radix != 10) { + // skip the "0x" + whole_number_start += 2; } - switch (state) { - case ParseNumLitStateDigits: - digits_end = token->end_pos; - break; - case ParseNumLitStateDecimal: - decimal_end = token->end_pos; - break; - case ParseNumLitStateEDigit: - e_digit_end = token->end_pos; - break; - case ParseNumLitStateBase: - num_lit->kind = NumLitU8; - num_lit->data.x_uint = 0; - return; - case ParseNumLitStateESign: - case ParseNumLitStateExpectFirstDigit: - case ParseNumLitStateStart: - zig_unreachable(); + int whole_number_end = token->decimal_point_pos; + if (whole_number_end <= whole_number_start) { + // TODO: error for empty whole number part + return; } - if (decimal_start >= 0) { - // float - double x; - - (void)x; - (void)decimal_end; - (void)e_present; - (void)e_positive; - (void)e_digit_start; - (void)e_digit_end; - zig_panic("TODO parse float"); - } else { + if (token->decimal_point_pos == token->end_pos) { // integer - unsigned long long x = 0; + unsigned long long whole_number = parse_int_digits(pc, whole_number_start, whole_number_end, + token->radix, 0, &num_lit->overflow); + if (num_lit->overflow) return; - unsigned long long mult = 1; - for (int i = digits_end - 1; ; i -= 1) { - uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i); - unsigned long long digit = (c - '0'); + num_lit->data.x_uint = whole_number; - // digit *= mult - if (__builtin_umulll_overflow(digit, mult, &digit)) { - num_lit->overflow = true; - return; - } - - // x += digit - if (__builtin_uaddll_overflow(x, digit, &x)) { - num_lit->overflow = true; - return; - } - - if (i == digits_start) - break; - - // mult *= base - if (__builtin_umulll_overflow(mult, base, &mult)) { - num_lit->overflow = true; - return; - } - } - - if (negative) { - if (x <= 128ull) { - num_lit->kind = NumLitI8; - } else if (x <= 32768ull) { - num_lit->kind = NumLitI16; - } else if (x <= 2147483648ull) { - num_lit->kind = NumLitI32; - } else if (x <= 9223372036854775808ull) { - num_lit->kind = NumLitI64; - } else { - num_lit->overflow = true; - return; - } - - num_lit->data.x_int = -((int64_t)x); + if (whole_number <= UINT8_MAX) { + num_lit->kind = NumLitU8; + } else if (whole_number <= UINT16_MAX) { + num_lit->kind = NumLitU16; + } else if (whole_number <= UINT32_MAX) { + num_lit->kind = NumLitU32; } else { - num_lit->data.x_uint = x; + num_lit->kind = NumLitU64; + } + } else { + // float + // TODO: trim leading and trailing zeros in the significand digit sequence + unsigned long long significand_as_int = parse_int_digits(pc, whole_number_start, whole_number_end, + token->radix, 0, &num_lit->overflow); + if (num_lit->overflow) return; - if (x <= UINT8_MAX) { - num_lit->kind = NumLitU8; - } else if (x <= UINT16_MAX) { - num_lit->kind = NumLitU16; - } else if (x <= UINT32_MAX) { - num_lit->kind = NumLitU32; + int exponent = 0; + if (token->decimal_point_pos < token->exponent_marker_pos) { + // fraction + int fraction_start = token->decimal_point_pos + 1; + int fraction_end = token->exponent_marker_pos; + if (fraction_end <= fraction_start) { + // TODO: error for empty fraction part + return; + } + + // TODO: check for where the fraction got too precise instead of just saying overflow + significand_as_int = parse_int_digits(pc, fraction_start, fraction_end, + token->radix, significand_as_int, &num_lit->overflow); + if (num_lit->overflow) return; + + // adjust the exponent to compensate for us effectively moving + // the decimal point all the way to the right + exponent = -(fraction_end - fraction_start); + } + + if (token->exponent_marker_pos < token->end_pos) { + // exponent + int exponent_start = token->exponent_marker_pos + 1; + int exponent_end = token->end_pos; + if (exponent_end <= exponent_start) { + // TODO: error for empty exponent part + return; + } + + bool is_exponent_negative = false; + uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + exponent_start); + if (c == '+') { + exponent_start += 1; + } else if (c == '-') { + exponent_start += 1; + is_exponent_negative = true; + } + + if (exponent_end <= exponent_start) { + // TODO: error for empty exponent part + return; + } + + unsigned long long specified_exponent = parse_int_digits(pc, exponent_start, exponent_end, + 10, 0, &num_lit->overflow); + // TODO: this check is a little silly + if (specified_exponent >= LONG_LONG_MAX) { + num_lit->overflow = true; + return; + } + if (is_exponent_negative) { + exponent -= specified_exponent; } else { - num_lit->kind = NumLitU64; + exponent += specified_exponent; } } + + uint64_t significand_bits; + uint64_t exponent_bits; + if (significand_as_int != 0) { + // normalize the significand + int significand_magnitude = __builtin_clzll(1) - __builtin_clzll(significand_as_int); + exponent += significand_magnitude; + if (!(-1023 <= exponent && exponent < 1023)) { + num_lit->overflow = true; + return; + } + + // this should chop off exactly one 1 bit from the top. + significand_bits = ((uint64_t)significand_as_int << (52 - significand_magnitude)) & 0xfffffffffffffULL; + exponent_bits = exponent + 1023; + } else { + // 0 is all 0's + significand_bits = 0; + exponent_bits = 0; + } + + uint64_t double_bits = (exponent_bits << 52) | significand_bits; + // TODO: check and swap endian + double x = *(double *)&double_bits; + + num_lit->data.x_float = x; + // TODO: see if we can store it in f32 + num_lit->kind = NumLitF64; } } @@ -2366,14 +2330,6 @@ const char *num_lit_str(NumLit num_lit) { return "f64"; case NumLitF128: return "f128"; - case NumLitI8: - return "i8"; - case NumLitI16: - return "i16"; - case NumLitI32: - return "i32"; - case NumLitI64: - return "i64"; case NumLitU8: return "u8"; case NumLitU16: @@ -2388,37 +2344,11 @@ const char *num_lit_str(NumLit num_lit) { zig_unreachable(); } -bool is_num_lit_signed(NumLit num_lit) { - switch (num_lit) { - case NumLitI8: - case NumLitI16: - case NumLitI32: - case NumLitI64: - return true; - - case NumLitF32: - case NumLitF64: - case NumLitF128: - case NumLitU8: - case NumLitU16: - case NumLitU32: - case NumLitU64: - return false; - case NumLitCount: - zig_unreachable(); - } - zig_unreachable(); -} - bool is_num_lit_unsigned(NumLit num_lit) { switch (num_lit) { case NumLitF32: case NumLitF64: case NumLitF128: - case NumLitI8: - case NumLitI16: - case NumLitI32: - case NumLitI64: return false; case NumLitU8: case NumLitU16: @@ -2437,10 +2367,6 @@ bool is_num_lit_float(NumLit num_lit) { case NumLitF64: case NumLitF128: return true; - case NumLitI8: - case NumLitI16: - case NumLitI32: - case NumLitI64: case NumLitU8: case NumLitU16: case NumLitU32: @@ -2454,17 +2380,13 @@ bool is_num_lit_float(NumLit num_lit) { uint64_t num_lit_bit_count(NumLit num_lit) { switch (num_lit) { - case NumLitI8: case NumLitU8: return 8; - case NumLitI16: case NumLitU16: return 16; - case NumLitI32: case NumLitU32: case NumLitF32: return 32; - case NumLitI64: case NumLitU64: case NumLitF64: return 64; diff --git a/src/parser.hpp b/src/parser.hpp index d3a29bd7a3..45b4b029e5 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -273,13 +273,9 @@ enum NumLit { NumLitF32, NumLitF64, NumLitF128, - NumLitI8, NumLitU8, - NumLitI16, NumLitU16, - NumLitI32, NumLitU32, - NumLitI64, NumLitU64, NumLitCount @@ -294,7 +290,6 @@ struct AstNodeNumberLiteral { union { uint64_t x_uint; - int64_t x_int; double x_float; } data; }; @@ -362,7 +357,6 @@ const char *node_type_str(NodeType node_type); void ast_print(AstNode *node, int indent); const char *num_lit_str(NumLit num_lit); -bool is_num_lit_signed(NumLit num_lit); bool is_num_lit_unsigned(NumLit num_lit); bool is_num_lit_float(NumLit num_lit); uint64_t num_lit_bit_count(NumLit num_lit); diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 00334dde9e..5678fa9df5 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -167,6 +167,9 @@ static void begin_token(Tokenize *t, TokenId id) { token->start_column = t->column; token->id = id; token->start_pos = t->pos; + token->radix = 0; + token->decimal_point_pos = 0; + token->exponent_marker_pos = 0; t->cur_tok = token; } @@ -242,7 +245,8 @@ static bool is_exponent_signifier(uint8_t c, int radix) { return c == 'e' || c == 'E'; } } -static int get_digit_value(uint8_t c) { + +int get_digit_value(uint8_t c) { if ('0' <= c && c <= '9') { return c - '0'; } diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index 713a264031..e96a51256c 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -110,5 +110,6 @@ void tokenize(Buf *buf, Tokenization *out_tokenization); void print_tokens(Buf *buf, ZigList *tokens); bool is_printable(uint8_t c); +int get_digit_value(uint8_t c); #endif diff --git a/test/run_tests.cpp b/test/run_tests.cpp index 9696e0395f..8c64a54cea 100644 --- a/test/run_tests.cpp +++ b/test/run_tests.cpp @@ -477,6 +477,19 @@ export fn main(argc : isize, argv : &&u8, env : &&u8) -> i32 { } )SOURCE", "OK\n"); + add_simple_case("number literals", R"SOURCE( +#link("c") +extern { + fn printf(__format: &const u8, ...) -> i32; + fn exit(__status: i32) -> unreachable; +} + +export fn _start() -> unreachable { + printf(c"0=%d\n", 0 as i32); // TODO: more tests + exit(0); +} + )SOURCE", "0=0\n"); + add_simple_case("structs", R"SOURCE( use "std.zig";