more number literal syntax is supported. floats still need work

2025-12-06 14:23:09 +00:00 · 2015-12-15 04:05:43 -07:00 · 2015-12-15 04:05:43 -07:00 · f2a9b40231
commit f2a9b40231
parent cf88fcb2ad
7 changed files with 154 additions and 236 deletions
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@ -640,13 +640,7 @@ static bool num_lit_fits_in_other_type(CodeGen *g, TypeTableEntry *literal_type,
        case TypeTableEntryIdStruct:
            return false;
        case TypeTableEntryIdInt:
-            if (is_num_lit_signed(num_lit)) {
+            if (is_num_lit_unsigned(num_lit)) {
                if (!other_type->data.integral.is_signed) {
                    return false;
                }
                return lit_size_in_bits <= other_type->size_in_bits;
            } else if (is_num_lit_unsigned(num_lit)) {
                return lit_size_in_bits <= other_type->size_in_bits;
            } else {
@ -883,12 +877,6 @@ static TypeTableEntry * resolve_number_literals(CodeGen *g, AstNode *node1, AstN
            codegen_num_lit_1->resolved_type = g->builtin_types.entry_f64;
            codegen_num_lit_2->resolved_type = g->builtin_types.entry_f64;
            return g->builtin_types.entry_f64;
        } else if (is_num_lit_signed(type1->data.num_lit.kind) &&
                   is_num_lit_signed(type2->data.num_lit.kind))
        {
            codegen_num_lit_1->resolved_type = g->builtin_types.entry_i64;
            codegen_num_lit_2->resolved_type = g->builtin_types.entry_i64;
            return g->builtin_types.entry_i64;
        } else if (is_num_lit_unsigned(type1->data.num_lit.kind) &&
                   is_num_lit_unsigned(type2->data.num_lit.kind))
        {
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@ -1254,13 +1254,9 @@ static const NumLit num_lit_kinds[] = {
    NumLitF32,
    NumLitF64,
    NumLitF128,
    NumLitI8,
    NumLitU8,
    NumLitI16,
    NumLitU16,
    NumLitI32,
    NumLitU32,
    NumLitI64,
    NumLitU64,
 };
@ -1388,7 +1384,7 @@ static void define_builtin_types(CodeGen *g) {
    }
    {
        TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdFloat);
-        entry->type_ref = LLVMFloatType();
+        entry->type_ref = LLVMDoubleType();
        buf_init_from_str(&entry->name, "f64");
        entry->size_in_bits = 64;
        entry->align_in_bits = 64;
--- a/src/parser.cpp
+++ b/src/parser.cpp
@ -11,6 +11,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <limits.h>
 static const char *bin_op_str(BinOpType bin_op) {
@ -278,9 +279,7 @@ void ast_print(AstNode *node, int indent) {
                NumLit num_lit = node->data.number_literal.kind;
                const char *name = node_type_str(node->type);
                const char *kind_str = num_lit_str(num_lit);
-                if (is_num_lit_signed(num_lit)) {
+                if (is_num_lit_unsigned(num_lit)) {
                    fprintf(stderr, "%s %s %" PRId64 "\n", name, kind_str, node->data.number_literal.data.x_int);
                } else if (is_num_lit_unsigned(num_lit)) {
                    fprintf(stderr, "%s %s %" PRIu64 "\n", name, kind_str, node->data.number_literal.data.x_uint);
                } else {
                    fprintf(stderr, "%s %s %f\n", name, kind_str, node->data.number_literal.data.x_float);
@ -585,187 +584,152 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
    if (offset_map) offset_map->append(pos);
 }
-enum ParseNumLitState {
+static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix,
-    ParseNumLitStateStart,
+    unsigned long long initial_value, bool *overflow) {
-    ParseNumLitStateBase,
+    unsigned long long x = initial_value;
    ParseNumLitStateDigits,
    ParseNumLitStateExpectFirstDigit,
    ParseNumLitStateDecimal,
    ParseNumLitStateESign,
    ParseNumLitStateEDigit,
 };
-static void parse_number_literal(ParseContext *pc, Token *token, AstNodeNumberLiteral *num_lit) {
+    for (int i = digits_start; i < digits_end; i++) {
    ParseNumLitState state = ParseNumLitStateStart;
    unsigned long long base = 10;
    bool negative = false;
    int digits_start;
    int digits_end;
    int decimal_start = -1;
    int decimal_end;
    bool e_present = false;
    bool e_positive;
    int e_digit_start;
    int e_digit_end;
    for (int i = token->start_pos; i < token->end_pos; i += 1) {
        uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
-        switch (state) {
+        unsigned long long digit = get_digit_value(c);
            case ParseNumLitStateStart:
                if (c == '-') {
                    negative = true;
                } else if (c == '0') {
                    state = ParseNumLitStateBase;
                } else if (c >= '1' && c <= '9') {
                    digits_start = i;
                    state = ParseNumLitStateDigits;
                } else {
                    zig_unreachable();
                }
                break;
            case ParseNumLitStateBase:
                if (c == 'x') {
                    base = 16;
                    state = ParseNumLitStateExpectFirstDigit;
                } else if (c == 'o') {
                    base = 8;
                    state = ParseNumLitStateExpectFirstDigit;
                } else if (c == 'b') {
                    base = 2;
                    state = ParseNumLitStateExpectFirstDigit;
                } else {
                    zig_unreachable();
                }
                break;
-            case ParseNumLitStateExpectFirstDigit:
+        // x *= radix;
-                state = ParseNumLitStateDigits;
+        if (__builtin_umulll_overflow(x, radix, &x)) {
-                break;
+            *overflow = true;
-
+            return 0;
            case ParseNumLitStateDigits:
                if (c == '.') {
                    assert(base == 10);
                    digits_end = i;
                    decimal_start = i + 1;
                    state = ParseNumLitStateDecimal;
                }
                break;
            case ParseNumLitStateDecimal:
                if (c == 'E') {
                    e_present = false;
                    decimal_end = i;
                    state = ParseNumLitStateESign;
                }
                break;
            case ParseNumLitStateESign:
                if (c == '+') {
                    e_positive = true;
                    e_digit_start = i + 1;
                    state = ParseNumLitStateEDigit;
                } else if (c == '-') {
                    e_positive = false;
                    e_digit_start = i + 1;
                    state = ParseNumLitStateEDigit;
                } else {
                    zig_unreachable();
                }
                break;
            case ParseNumLitStateEDigit:
                assert(c >= '0' && c <= '9');
                break;
        }
    }
    switch (state) {
        case ParseNumLitStateDigits:
            digits_end = token->end_pos;
            break;
        case ParseNumLitStateDecimal:
            decimal_end = token->end_pos;
            break;
        case ParseNumLitStateEDigit:
            e_digit_end = token->end_pos;
            break;
        case ParseNumLitStateBase:
            num_lit->kind = NumLitU8;
            num_lit->data.x_uint = 0;
            return;
        case ParseNumLitStateESign:
        case ParseNumLitStateExpectFirstDigit:
        case ParseNumLitStateStart:
            zig_unreachable();
    }
    if (decimal_start >= 0) {
        // float
        double x;
        (void)x;
        (void)decimal_end;
        (void)e_present;
        (void)e_positive;
        (void)e_digit_start;
        (void)e_digit_end;
        zig_panic("TODO parse float");
    } else {
        // integer
        unsigned long long x = 0;
        unsigned long long mult = 1;
        for (int i = digits_end - 1; ; i -= 1) {
            uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
            unsigned long long digit = (c - '0');
            // digit *= mult
            if (__builtin_umulll_overflow(digit, mult, &digit)) {
                num_lit->overflow = true;
                return;
        }
        // x += digit
        if (__builtin_uaddll_overflow(x, digit, &x)) {
-                num_lit->overflow = true;
+            *overflow = true;
            return 0;
        }
    }
    return x;
 }
 static void parse_number_literal(ParseContext *pc, Token *token, AstNodeNumberLiteral *num_lit) {
    assert(token->id == TokenIdNumberLiteral);
    int whole_number_start = token->start_pos;
    if (token->radix != 10) {
        // skip the "0x"
        whole_number_start += 2;
    }
    int whole_number_end = token->decimal_point_pos;
    if (whole_number_end <= whole_number_start) {
        // TODO: error for empty whole number part
        return;
    }
-            if (i == digits_start)
+    if (token->decimal_point_pos == token->end_pos) {
-                break;
+        // integer
        unsigned long long whole_number = parse_int_digits(pc, whole_number_start, whole_number_end,
            token->radix, 0, &num_lit->overflow);
        if (num_lit->overflow) return;
-            // mult *= base
+        num_lit->data.x_uint = whole_number;
            if (__builtin_umulll_overflow(mult, base, &mult)) {
                num_lit->overflow = true;
                return;
            }
        }
-        if (negative) {
+        if (whole_number <= UINT8_MAX) {
            if (x <= 128ull) {
                num_lit->kind = NumLitI8;
            } else if (x <= 32768ull) {
                num_lit->kind = NumLitI16;
            } else if (x <= 2147483648ull) {
                num_lit->kind = NumLitI32;
            } else if (x <= 9223372036854775808ull) {
                num_lit->kind = NumLitI64;
            } else {
                num_lit->overflow = true;
                return;
            }
            num_lit->data.x_int = -((int64_t)x);
        } else {
            num_lit->data.x_uint = x;
            if (x <= UINT8_MAX) {
            num_lit->kind = NumLitU8;
-            } else if (x <= UINT16_MAX) {
+        } else if (whole_number <= UINT16_MAX) {
            num_lit->kind = NumLitU16;
-            } else if (x <= UINT32_MAX) {
+        } else if (whole_number <= UINT32_MAX) {
            num_lit->kind = NumLitU32;
        } else {
            num_lit->kind = NumLitU64;
        }
    } else {
        // float
        // TODO: trim leading and trailing zeros in the significand digit sequence
        unsigned long long significand_as_int = parse_int_digits(pc, whole_number_start, whole_number_end,
            token->radix, 0, &num_lit->overflow);
        if (num_lit->overflow) return;
        int exponent = 0;
        if (token->decimal_point_pos < token->exponent_marker_pos) {
            // fraction
            int fraction_start = token->decimal_point_pos + 1;
            int fraction_end = token->exponent_marker_pos;
            if (fraction_end <= fraction_start) {
                // TODO: error for empty fraction part
                return;
            }
            // TODO: check for where the fraction got too precise instead of just saying overflow
            significand_as_int = parse_int_digits(pc, fraction_start, fraction_end,
                    token->radix, significand_as_int, &num_lit->overflow);
            if (num_lit->overflow) return;
            // adjust the exponent to compensate for us effectively moving
            // the decimal point all the way to the right
            exponent = -(fraction_end - fraction_start);
        }
        if (token->exponent_marker_pos < token->end_pos) {
            // exponent
            int exponent_start = token->exponent_marker_pos + 1;
            int exponent_end = token->end_pos;
            if (exponent_end <= exponent_start) {
                // TODO: error for empty exponent part
                return;
            }
            bool is_exponent_negative = false;
            uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + exponent_start);
            if (c == '+') {
                exponent_start += 1;
            } else if (c == '-') {
                exponent_start += 1;
                is_exponent_negative = true;
            }
            if (exponent_end <= exponent_start) {
                // TODO: error for empty exponent part
                return;
            }
            unsigned long long specified_exponent = parse_int_digits(pc, exponent_start, exponent_end,
                10, 0, &num_lit->overflow);
            // TODO: this check is a little silly
            if (specified_exponent >= LONG_LONG_MAX) {
                num_lit->overflow = true;
                return;
            }
            if (is_exponent_negative) {
                exponent -= specified_exponent;
            } else {
                exponent += specified_exponent;
            }
        }
        uint64_t significand_bits;
        uint64_t exponent_bits;
        if (significand_as_int != 0) {
            // normalize the significand
            int significand_magnitude = __builtin_clzll(1) - __builtin_clzll(significand_as_int);
            exponent += significand_magnitude;
            if (!(-1023 <= exponent && exponent < 1023)) {
                num_lit->overflow = true;
                return;
            }
            // this should chop off exactly one 1 bit from the top.
            significand_bits = ((uint64_t)significand_as_int << (52 - significand_magnitude)) & 0xfffffffffffffULL;
            exponent_bits = exponent + 1023;
        } else {
            // 0 is all 0's
            significand_bits = 0;
            exponent_bits = 0;
        }
        uint64_t double_bits = (exponent_bits << 52) | significand_bits;
        // TODO: check and swap endian
        double x = *(double *)&double_bits;
        num_lit->data.x_float = x;
        // TODO: see if we can store it in f32
        num_lit->kind = NumLitF64;
    }
 }
@ -2366,14 +2330,6 @@ const char *num_lit_str(NumLit num_lit) {
            return "f64";
        case NumLitF128:
            return "f128";
        case NumLitI8:
            return "i8";
        case NumLitI16:
            return "i16";
        case NumLitI32:
            return "i32";
        case NumLitI64:
            return "i64";
        case NumLitU8:
            return "u8";
        case NumLitU16:
@ -2388,37 +2344,11 @@ const char *num_lit_str(NumLit num_lit) {
    zig_unreachable();
 }
 bool is_num_lit_signed(NumLit num_lit) {
    switch (num_lit) {
        case NumLitI8:
        case NumLitI16:
        case NumLitI32:
        case NumLitI64:
            return true;
        case NumLitF32:
        case NumLitF64:
        case NumLitF128:
        case NumLitU8:
        case NumLitU16:
        case NumLitU32:
        case NumLitU64:
            return false;
        case NumLitCount:
            zig_unreachable();
    }
    zig_unreachable();
 }
 bool is_num_lit_unsigned(NumLit num_lit) {
    switch (num_lit) {
        case NumLitF32:
        case NumLitF64:
        case NumLitF128:
        case NumLitI8:
        case NumLitI16:
        case NumLitI32:
        case NumLitI64:
            return false;
        case NumLitU8:
        case NumLitU16:
@ -2437,10 +2367,6 @@ bool is_num_lit_float(NumLit num_lit) {
        case NumLitF64:
        case NumLitF128:
            return true;
        case NumLitI8:
        case NumLitI16:
        case NumLitI32:
        case NumLitI64:
        case NumLitU8:
        case NumLitU16:
        case NumLitU32:
@ -2454,17 +2380,13 @@ bool is_num_lit_float(NumLit num_lit) {
 uint64_t num_lit_bit_count(NumLit num_lit) {
    switch (num_lit) {
        case NumLitI8:
        case NumLitU8:
            return 8;
        case NumLitI16:
        case NumLitU16:
            return 16;
        case NumLitI32:
        case NumLitU32:
        case NumLitF32:
            return 32;
        case NumLitI64:
        case NumLitU64:
        case NumLitF64:
            return 64;
--- a/src/parser.hpp
+++ b/src/parser.hpp
@ -273,13 +273,9 @@ enum NumLit {
    NumLitF32,
    NumLitF64,
    NumLitF128,
    NumLitI8,
    NumLitU8,
    NumLitI16,
    NumLitU16,
    NumLitI32,
    NumLitU32,
    NumLitI64,
    NumLitU64,
    NumLitCount
@ -294,7 +290,6 @@ struct AstNodeNumberLiteral {
    union {
        uint64_t x_uint;
        int64_t x_int;
        double x_float;
    } data;
 };
@ -362,7 +357,6 @@ const char *node_type_str(NodeType node_type);
 void ast_print(AstNode *node, int indent);
 const char *num_lit_str(NumLit num_lit);
 bool is_num_lit_signed(NumLit num_lit);
 bool is_num_lit_unsigned(NumLit num_lit);
 bool is_num_lit_float(NumLit num_lit);
 uint64_t num_lit_bit_count(NumLit num_lit);
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -167,6 +167,9 @@ static void begin_token(Tokenize *t, TokenId id) {
    token->start_column = t->column;
    token->id = id;
    token->start_pos = t->pos;
    token->radix = 0;
    token->decimal_point_pos = 0;
    token->exponent_marker_pos = 0;
    t->cur_tok = token;
 }
@ -242,7 +245,8 @@ static bool is_exponent_signifier(uint8_t c, int radix) {
        return c == 'e' || c == 'E';
    }
 }
-static int get_digit_value(uint8_t c) {
+
 int get_digit_value(uint8_t c) {
    if ('0' <= c && c <= '9') {
        return c - '0';
    }
--- a/src/tokenizer.hpp
+++ b/src/tokenizer.hpp
@ -110,5 +110,6 @@ void tokenize(Buf *buf, Tokenization *out_tokenization);
 void print_tokens(Buf *buf, ZigList<Token> *tokens);
 bool is_printable(uint8_t c);
 int get_digit_value(uint8_t c);
 #endif
--- a/test/run_tests.cpp
+++ b/test/run_tests.cpp
@ -477,6 +477,19 @@ export fn main(argc : isize, argv : &&u8, env : &&u8) -> i32 {
 }
    )SOURCE", "OK\n");
    add_simple_case("number literals", R"SOURCE(
 #link("c")
 extern {
    fn printf(__format: &const u8, ...) -> i32;
    fn exit(__status: i32) -> unreachable;
 }
 export fn _start() -> unreachable {
    printf(c"0=%d\n", 0 as i32); // TODO: more tests
    exit(0);
 }
    )SOURCE", "0=0\n");
    add_simple_case("structs", R"SOURCE(
 use "std.zig";