diff --git a/lib/std/zig.zig b/lib/std/zig.zig index b070fbdcd5..36dfe74086 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -80,6 +80,107 @@ pub fn binNameAlloc( } } +/// Only validates escape sequence characters. +/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between. +pub fn parseCharLiteral( + slice: []const u8, + bad_index: *usize, // populated if error.InvalidCharacter is returned) +) error{InvalidCharacter}!u32 { + std.debug.assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\''); + + if (slice[1] == '\\') { + switch (slice[2]) { + 'n' => return '\n', + 'r' => return '\r', + '\\' => return '\\', + 't' => return '\t', + '\'' => return '\'', + '"' => return '"', + 'x' => { + if (slice.len != 6) { + bad_index.* = slice.len - 2; + return error.InvalidCharacter; + } + + var value: u32 = 0; + for (slice[3..5]) |c, i| { + switch (slice[3]) { + '0'...'9' => { + value *= 16; + value += c - '0'; + }, + 'a'...'f' => { + value *= 16; + value += c - 'a'; + }, + 'A'...'F' => { + value *= 16; + value += c - 'a'; + }, + else => { + bad_index.* = i; + return error.InvalidCharacter; + }, + } + } + return value; + }, + 'u' => { + if (slice.len < 6 or slice[3] != '{') { + bad_index.* = 2; + return error.InvalidCharacter; + } + var value: u32 = 0; + for (slice[4..]) |c, i| { + if (value > 0x10ffff) { + bad_index.* = i; + return error.InvalidCharacter; + } + switch (c) { + '0'...'9' => { + value *= 16; + value += c - '0'; + }, + 'a'...'f' => { + value *= 16; + value += c - 'a'; + }, + 'A'...'F' => { + value *= 16; + value += c - 'A'; + }, + '}' => break, + else => { + bad_index.* = i; + return error.InvalidCharacter; + }, + } + } + return value; + }, + else => { + bad_index.* = 2; + return error.InvalidCharacter; + } + } + } + return std.unicode.utf8Decode(slice[1 .. slice.len - 1]) catch unreachable; +} + +test "parseCharLiteral" { + var bad_index: usize = undefined; + std.testing.expectEqual(try parseCharLiteral("'a'", &bad_index), 'a'); + std.testing.expectEqual(try parseCharLiteral("'ä'", &bad_index), 'ä'); + std.testing.expectEqual(try parseCharLiteral("'\\x00'", &bad_index), 0); + std.testing.expectEqual(try parseCharLiteral("'ぁ'", &bad_index), 0x3041); + std.testing.expectEqual(try parseCharLiteral("'\\u{3041}'", &bad_index), 0x3041); + + std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\x0'", &bad_index)); + std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\y'", &bad_index)); + std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u'", &bad_index)); + std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u{FFFFFF}'", &bad_index)); +} + test "" { @import("std").meta.refAllDecls(@This()); } diff --git a/src-self-hosted/astgen.zig b/src-self-hosted/astgen.zig index 87f69e053a..f05020f6df 100644 --- a/src-self-hosted/astgen.zig +++ b/src-self-hosted/astgen.zig @@ -131,6 +131,8 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr .ArrayType => return rlWrap(mod, scope, rl, try arrayType(mod, scope, node.castTag(.ArrayType).?)), .ArrayTypeSentinel => return rlWrap(mod, scope, rl, try arrayTypeSentinel(mod, scope, node.castTag(.ArrayTypeSentinel).?)), .EnumLiteral => return rlWrap(mod, scope, rl, try enumLiteral(mod, scope, node.castTag(.EnumLiteral).?)), + .MultilineStringLiteral => return rlWrap(mod, scope, rl, try multilineStrLiteral(mod, scope, node.castTag(.MultilineStringLiteral).?)), + .CharLiteral => return rlWrap(mod, scope, rl, try charLiteral(mod, scope, node.castTag(.CharLiteral).?)), .Defer => return mod.failNode(scope, node, "TODO implement astgen.expr for .Defer", .{}), .Catch => return mod.failNode(scope, node, "TODO implement astgen.expr for .Catch", .{}), @@ -159,8 +161,6 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr .ErrorType => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorType", .{}), .FnProto => return mod.failNode(scope, node, "TODO implement astgen.expr for .FnProto", .{}), .AnyFrameType => return mod.failNode(scope, node, "TODO implement astgen.expr for .AnyFrameType", .{}), - .MultilineStringLiteral => return mod.failNode(scope, node, "TODO implement astgen.expr for .MultilineStringLiteral", .{}), - .CharLiteral => return mod.failNode(scope, node, "TODO implement astgen.expr for .CharLiteral", .{}), .ErrorSetDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorSetDecl", .{}), .ContainerDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ContainerDecl", .{}), .Comptime => return mod.failNode(scope, node, "TODO implement astgen.expr for .Comptime", .{}), @@ -497,6 +497,7 @@ fn arrayType(mod: *Module, scope: *Scope, node: *ast.Node.ArrayType) !*zir.Inst .val = Value.initTag(.usize_type), }); + // TODO check for [_]T const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr); const child_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs); @@ -515,6 +516,7 @@ fn arrayTypeSentinel(mod: *Module, scope: *Scope, node: *ast.Node.ArrayTypeSenti .val = Value.initTag(.usize_type), }); + // TODO check for [_]T const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr); const sentinel_uncasted = try expr(mod, scope, .none, node.sentinel); const elem_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs); @@ -1120,6 +1122,53 @@ fn stringLiteral(mod: *Module, scope: *Scope, str_lit: *ast.Node.OneToken) Inner return addZIRInst(mod, scope, src, zir.Inst.Str, .{ .bytes = bytes }, .{}); } +fn multilineStrLiteral(mod: *Module, scope: *Scope, node: *ast.Node.MultilineStringLiteral) !*zir.Inst { + const tree = scope.tree(); + const lines = node.linesConst(); + const src = tree.token_locs[lines[0]].start; + + // line lengths and new lines + var len = lines.len - 1; + for (lines) |line| { + len += tree.tokenSlice(line).len - 2; + } + + const bytes = try scope.arena().alloc(u8, len); + var i: usize = 0; + for (lines) |line, line_i| { + if (line_i != 0) { + bytes[i] = '\n'; + i += 1; + } + const slice = tree.tokenSlice(line)[2..]; + mem.copy(u8, bytes[i..], slice); + i += slice.len; + } + + return addZIRInst(mod, scope, src, zir.Inst.Str, .{ .bytes = bytes }, .{}); +} + +fn charLiteral(mod: *Module, scope: *Scope, node: *ast.Node.OneToken) !*zir.Inst { + const tree = scope.tree(); + const src = tree.token_locs[node.token].start; + const slice = tree.tokenSlice(node.token); + + var bad_index: usize = undefined; + const value = std.zig.parseCharLiteral(slice, &bad_index) catch |err| switch (err) { + error.InvalidCharacter => { + const bad_byte = slice[bad_index]; + return mod.fail(scope, src + bad_index, "invalid character: '{c}'\n", .{bad_byte}); + }, + }; + + const int_payload = try scope.arena().create(Value.Payload.Int_u64); + int_payload.* = .{ .int = value }; + return addZIRInstConst(mod, scope, src, .{ + .ty = Type.initTag(.comptime_int), + .val = Value.initPayload(&int_payload.base), + }); +} + fn integerLiteral(mod: *Module, scope: *Scope, int_lit: *ast.Node.OneToken) InnerError!*zir.Inst { const arena = scope.arena(); const tree = scope.tree(); diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig index 5c473494fb..3a2a593539 100644 --- a/src-self-hosted/zir_sema.zig +++ b/src-self-hosted/zir_sema.zig @@ -365,6 +365,7 @@ fn analyzeInstEnsureResultNonError(mod: *Module, scope: *Scope, inst: *zir.Inst. fn analyzeInstAlloc(mod: *Module, scope: *Scope, inst: *zir.Inst.UnOp) InnerError!*Inst { const var_type = try resolveType(mod, scope, inst.positionals.operand); + // TODO this should happen only for var allocs if (!var_type.isValidVarType()) { return mod.fail(scope, inst.base.src, "variable of type '{}' must be const or comptime", .{var_type}); } diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig index 4208cc3911..8b92674e9c 100644 --- a/test/stage2/compare_output.zig +++ b/test/stage2/compare_output.zig @@ -543,6 +543,38 @@ pub fn addCases(ctx: *TestContext) !void { , "", ); + + case.addCompareOutput( + \\export fn _start() noreturn { + \\ const ignore = + \\ \\ cool thx + \\ \\ + \\ ; + \\ add('ぁ', '\x03'); + \\ + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) void { + \\ assert(a + b == 12356); + \\} + \\ + \\pub fn assert(ok: bool) void { + \\ if (!ok) unreachable; // assertion failure + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("syscall" + \\ : + \\ : [number] "{rax}" (231), + \\ [arg1] "{rdi}" (0) + \\ : "rcx", "r11", "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); } {