From 4055e6055b5c0727f5e85a30760c9bccf525f4de Mon Sep 17 00:00:00 2001 From: zooster Date: Thu, 18 Aug 2022 18:54:51 +0200 Subject: [PATCH] AstGen: disallow leading zeroes in int literals and int types This makes `0123` and `u0123` etc. illegal. I'm now confident that this is a good change because I actually caught two C header translation mistakes in `haiku.zig` with this. Clearly, `0123` being octal in C (TIL) can cause confusion, and we make this easier to read by requiring `0o` as the prefix and now also disallowing leading zeroes in integers. For consistency and because it looks weird, we disallow it for integer types too (e.g. `u0123`). Fixes #11963 Fixes #12417 --- lib/std/c/haiku.zig | 4 +- lib/std/zig/parser_test.zig | 16 +++---- src/AstGen.zig | 43 ++++++++++++++----- test/behavior/math.zig | 7 ++- ..._underscore_placement_in_int_literal-1.zig | 4 +- .../leading_zero_in_integer.zig | 27 ++++++++++++ 6 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 test/cases/compile_errors/leading_zero_in_integer.zig diff --git a/lib/std/c/haiku.zig b/lib/std/c/haiku.zig index 672f4fa4ba..28935ffa34 100644 --- a/lib/std/c/haiku.zig +++ b/lib/std/c/haiku.zig @@ -702,7 +702,7 @@ pub const T = struct { pub const CSETAF = 0x8002; pub const CSETAW = 0x8003; pub const CWAITEVENT = 0x8004; - pub const CSBRK = 08005; + pub const CSBRK = 0x8005; pub const CFLSH = 0x8006; pub const CXONC = 0x8007; pub const CQUERYCONNECTED = 0x8008; @@ -874,7 +874,7 @@ pub const S = struct { pub const IFDIR = 0o040000; pub const IFCHR = 0o020000; pub const IFIFO = 0o010000; - pub const INDEX_DIR = 04000000000; + pub const INDEX_DIR = 0o4000000000; pub const IUMSK = 0o7777; pub const ISUID = 0o4000; diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index bee9375b5a..2bb8c848bc 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -4254,10 +4254,10 @@ test "zig fmt: integer literals with underscore separators" { \\const \\ x = \\ 1_234_567 - \\ + (0b0_1-0o7_0+0xff_FF ) + 0_0; + \\ + (0b0_1-0o7_0+0xff_FF ) + 1_0; , \\const x = - \\ 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0; + \\ 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 1_0; \\ ); } @@ -4266,7 +4266,7 @@ test "zig fmt: hex literals with underscore separators" { try testTransform( \\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 { \\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000; - \\ for (c [ 0_0 .. ]) |_, i| { + \\ for (c [ 1_0 .. ]) |_, i| { \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA; \\ } \\ return c; @@ -4276,7 +4276,7 @@ test "zig fmt: hex literals with underscore separators" { , \\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 { \\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000; - \\ for (c[0_0..]) |_, i| { + \\ for (c[1_0..]) |_, i| { \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA; \\ } \\ return c; @@ -4288,14 +4288,14 @@ test "zig fmt: hex literals with underscore separators" { test "zig fmt: decimal float literals with underscore separators" { try testTransform( \\pub fn main() void { - \\ const a:f64=(10.0e-0+(10.0e+0))+10_00.00_00e-2+00_00.00_10e+4; - \\ const b:f64=010.0--0_10.0+0_1_0.0_0+1e2; + \\ const a:f64=(10.0e-0+(10.0e+0))+10_00.00_00e-2+20_00.00_10e+4; + \\ const b:f64=1_0.0--10_10.0+1_0_0.0_0+1e2; \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); \\} , \\pub fn main() void { - \\ const a: f64 = (10.0e-0 + (10.0e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4; - \\ const b: f64 = 010.0 - -0_10.0 + 0_1_0.0_0 + 1e2; + \\ const a: f64 = (10.0e-0 + (10.0e+0)) + 10_00.00_00e-2 + 20_00.00_10e+4; + \\ const b: f64 = 1_0.0 - -10_10.0 + 1_0_0.0_0 + 1e2; \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); \\} \\ diff --git a/src/AstGen.zig b/src/AstGen.zig index ee1dbeffa4..2001e6950a 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -4088,6 +4088,13 @@ fn testDecl( true => .signed, false => .unsigned, }; + if (ident_name_raw.len >= 3 and ident_name_raw[1] == '0') { + return astgen.failTok( + test_name_token, + "primitive integer type '{s}' has leading zero", + .{ident_name_raw}, + ); + } _ = parseBitCount(ident_name_raw[1..]) catch |err| switch (err) { error.Overflow => return astgen.failTok( test_name_token, @@ -6791,6 +6798,13 @@ fn identifier( true => .signed, false => .unsigned, }; + if (ident_name_raw.len >= 3 and ident_name_raw[1] == '0') { + return astgen.failNode( + ident, + "primitive integer type '{s}' has leading zero", + .{ident_name_raw}, + ); + } const bit_count = parseBitCount(ident_name_raw[1..]) catch |err| switch (err) { error.Overflow => return astgen.failNode( ident, @@ -7021,17 +7035,6 @@ fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) InnerError!Z const main_tokens = tree.nodes.items(.main_token); const int_token = main_tokens[node]; const prefixed_bytes = tree.tokenSlice(int_token); - if (std.fmt.parseInt(u64, prefixed_bytes, 0)) |small_int| { - const result: Zir.Inst.Ref = switch (small_int) { - 0 => .zero, - 1 => .one, - else => try gz.addInt(small_int), - }; - return rvalue(gz, rl, result, node); - } else |err| switch (err) { - error.InvalidCharacter => unreachable, // Caught by the parser. - error.Overflow => {}, - } var base: u8 = 10; var non_prefixed: []const u8 = prefixed_bytes; @@ -7046,6 +7049,24 @@ fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) InnerError!Z non_prefixed = prefixed_bytes[2..]; } + if (base == 10 and prefixed_bytes.len >= 2 and prefixed_bytes[0] == '0') { + return astgen.failNodeNotes(node, "integer literal '{s}' has leading zero", .{prefixed_bytes}, &.{ + try astgen.errNoteNode(node, "use '0o' prefix for octal literals", .{}), + }); + } + + if (std.fmt.parseUnsigned(u64, non_prefixed, base)) |small_int| { + const result: Zir.Inst.Ref = switch (small_int) { + 0 => .zero, + 1 => .one, + else => try gz.addInt(small_int), + }; + return rvalue(gz, rl, result, node); + } else |err| switch (err) { + error.InvalidCharacter => unreachable, // Caught by the parser. + error.Overflow => {}, + } + const gpa = astgen.gpa; var big_int = try std.math.big.int.Managed.init(gpa); defer big_int.deinit(); diff --git a/test/behavior/math.zig b/test/behavior/math.zig index ce476fea04..d3f9676f11 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -239,10 +239,9 @@ test "quad hex float literal parsing in range" { } test "underscore separator parsing" { - try expect(0_0_0_0 == 0); try expect(1_234_567 == 1234567); - try expect(001_234_567 == 1234567); - try expect(0_0_1_2_3_4_5_6_7 == 1234567); + try expect(1_234_567 == 1234567); + try expect(1_2_3_4_5_6_7 == 1234567); try expect(0b0_0_0_0 == 0); try expect(0b1010_1010 == 0b10101010); @@ -260,7 +259,7 @@ test "underscore separator parsing" { try expect(0x1_0_1_0_1_0_1_0 == 0x10101010); try expect(123_456.789_000e1_0 == 123456.789000e10); - try expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10); + try expect(1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10); try expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10); try expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10); diff --git a/test/cases/compile_errors/invalid_underscore_placement_in_int_literal-1.zig b/test/cases/compile_errors/invalid_underscore_placement_in_int_literal-1.zig index 868ea8d42a..b90c733dea 100644 --- a/test/cases/compile_errors/invalid_underscore_placement_in_int_literal-1.zig +++ b/test/cases/compile_errors/invalid_underscore_placement_in_int_literal-1.zig @@ -1,5 +1,5 @@ fn main() void { - var bad: u128 = 0010_; + var bad: u128 = 10_; _ = bad; } @@ -8,4 +8,4 @@ fn main() void { // target=native // // :2:21: error: expected expression, found 'invalid bytes' -// :2:26: note: invalid byte: ';' +// :2:24: note: invalid byte: ';' diff --git a/test/cases/compile_errors/leading_zero_in_integer.zig b/test/cases/compile_errors/leading_zero_in_integer.zig new file mode 100644 index 0000000000..a818a3d75d --- /dev/null +++ b/test/cases/compile_errors/leading_zero_in_integer.zig @@ -0,0 +1,27 @@ +export fn entry1() void { + const T = u000123; + _ = T; +} +export fn entry2() void { + _ = i0; + _ = u0; + var x: i01 = 1; + _ = x; +} +export fn entry3() void { + _ = 000123; +} +export fn entry4() void { + _ = 01; +} + +// error +// backend=llvm +// target=native +// +// :2:15: error: primitive integer type 'u000123' has leading zero +// :8:12: error: primitive integer type 'i01' has leading zero +// :12:9: error: integer literal '000123' has leading zero +// :12:9: note: use '0o' prefix for octal literals +// :15:9: error: integer literal '01' has leading zero +// :15:9: note: use '0o' prefix for octal literals