diff --git a/lib/std/zig.zig b/lib/std/zig.zig index 9b8e2294f2..0d3c94d37b 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -14,6 +14,10 @@ pub const Ast = @import("zig/Ast.zig"); pub const system = @import("zig/system.zig"); pub const CrossTarget = @import("zig/CrossTarget.zig"); +// Character literal parsing +pub const ParsedCharLiteral = string_literal.ParsedCharLiteral; +pub const parseCharLiteral = string_literal.parseCharLiteral; + // Files needed by translate-c. pub const c_builtins = @import("zig/c_builtins.zig"); pub const c_translation = @import("zig/c_translation.zig"); @@ -185,205 +189,6 @@ pub fn binNameAlloc(allocator: std.mem.Allocator, options: BinNameOptions) error } } -pub const ParsedCharLiteral = union(enum) { - success: u32, - /// The character after backslash is not recognized. - invalid_escape_character: usize, - /// Expected hex digit at this index. - expected_hex_digit: usize, - /// Unicode escape sequence had no digits with rbrace at this index. - empty_unicode_escape_sequence: usize, - /// Expected hex digit or '}' at this index. - expected_hex_digit_or_rbrace: usize, - /// The unicode point is outside the range of Unicode codepoints. - unicode_escape_overflow: usize, - /// Expected '{' at this index. - expected_lbrace: usize, - /// Expected the terminating single quote at this index. - expected_end: usize, - /// The character at this index cannot be represented without an escape sequence. - invalid_character: usize, -}; - -/// Only validates escape sequence characters. -/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between. -pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral { - assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\''); - - switch (slice[1]) { - 0 => return .{ .invalid_character = 1 }, - '\\' => switch (slice[2]) { - 'n' => return .{ .success = '\n' }, - 'r' => return .{ .success = '\r' }, - '\\' => return .{ .success = '\\' }, - 't' => return .{ .success = '\t' }, - '\'' => return .{ .success = '\'' }, - '"' => return .{ .success = '"' }, - 'x' => { - if (slice.len < 4) { - return .{ .expected_hex_digit = 3 }; - } - var value: u32 = 0; - var i: usize = 3; - while (i < 5) : (i += 1) { - const c = slice[i]; - switch (c) { - '0'...'9' => { - value *= 16; - value += c - '0'; - }, - 'a'...'f' => { - value *= 16; - value += c - 'a' + 10; - }, - 'A'...'F' => { - value *= 16; - value += c - 'A' + 10; - }, - else => { - return .{ .expected_hex_digit = i }; - }, - } - } - if (slice[i] != '\'') { - return .{ .expected_end = i }; - } - return .{ .success = value }; - }, - 'u' => { - var i: usize = 3; - if (slice[i] != '{') { - return .{ .expected_lbrace = i }; - } - i += 1; - if (slice[i] == '}') { - return .{ .empty_unicode_escape_sequence = i }; - } - - var value: u32 = 0; - while (i < slice.len) : (i += 1) { - const c = slice[i]; - switch (c) { - '0'...'9' => { - value *= 16; - value += c - '0'; - }, - 'a'...'f' => { - value *= 16; - value += c - 'a' + 10; - }, - 'A'...'F' => { - value *= 16; - value += c - 'A' + 10; - }, - '}' => { - i += 1; - break; - }, - else => return .{ .expected_hex_digit_or_rbrace = i }, - } - if (value > 0x10ffff) { - return .{ .unicode_escape_overflow = i }; - } - } - if (slice[i] != '\'') { - return .{ .expected_end = i }; - } - return .{ .success = value }; - }, - else => return .{ .invalid_escape_character = 2 }, - }, - else => { - const codepoint = std.unicode.utf8Decode(slice[1 .. slice.len - 1]) catch unreachable; - return .{ .success = codepoint }; - }, - } -} - -test "parseCharLiteral" { - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 'a' }, - parseCharLiteral("'a'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 'ä' }, - parseCharLiteral("'ä'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0 }, - parseCharLiteral("'\\x00'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0x4f }, - parseCharLiteral("'\\x4f'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0x4f }, - parseCharLiteral("'\\x4F'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0x3041 }, - parseCharLiteral("'ぁ'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0 }, - parseCharLiteral("'\\u{0}'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0x3041 }, - parseCharLiteral("'\\u{3041}'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0x7f }, - parseCharLiteral("'\\u{7f}'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .success = 0x7fff }, - parseCharLiteral("'\\u{7FFF}'"), - ); - - try std.testing.expectEqual( - ParsedCharLiteral{ .expected_hex_digit = 4 }, - parseCharLiteral("'\\x0'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .expected_end = 5 }, - parseCharLiteral("'\\x000'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .invalid_escape_character = 2 }, - parseCharLiteral("'\\y'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .expected_lbrace = 3 }, - parseCharLiteral("'\\u'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .expected_lbrace = 3 }, - parseCharLiteral("'\\uFFFF'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .empty_unicode_escape_sequence = 4 }, - parseCharLiteral("'\\u{}'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .unicode_escape_overflow = 9 }, - parseCharLiteral("'\\u{FFFFFF}'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .expected_hex_digit_or_rbrace = 8 }, - parseCharLiteral("'\\u{FFFF'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .expected_end = 9 }, - parseCharLiteral("'\\u{FFFF}x'"), - ); - try std.testing.expectEqual( - ParsedCharLiteral{ .invalid_character = 1 }, - parseCharLiteral("'\x00'"), - ); -} - test { @import("std").testing.refAllDecls(@This()); } diff --git a/lib/std/zig/string_literal.zig b/lib/std/zig/string_literal.zig index 1eaab26e3a..07ce08f491 100644 --- a/lib/std/zig/string_literal.zig +++ b/lib/std/zig/string_literal.zig @@ -1,129 +1,268 @@ const std = @import("../std.zig"); const assert = std.debug.assert; +const utf8Decode = std.unicode.utf8Decode; +const utf8Encode = std.unicode.utf8Encode; pub const ParseError = error{ OutOfMemory, - InvalidStringLiteral, + InvalidLiteral, +}; + +pub const ParsedCharLiteral = union(enum) { + success: u21, + failure: Error, }; pub const Result = union(enum) { success, - /// Found an invalid character at this index. - invalid_character: usize, - /// Expected hex digits at this index. - expected_hex_digits: usize, - /// Invalid hex digits at this index. - invalid_hex_escape: usize, - /// Invalid unicode escape at this index. - invalid_unicode_escape: usize, - /// The left brace at this index is missing a matching right brace. - missing_matching_rbrace: usize, - /// Expected unicode digits at this index. - expected_unicode_digits: usize, + failure: Error, }; +pub const Error = union(enum) { + /// The character after backslash is missing or not recognized. + invalid_escape_character: usize, + /// Expected hex digit at this index. + expected_hex_digit: usize, + /// Unicode escape sequence had no digits with rbrace at this index. + empty_unicode_escape_sequence: usize, + /// Expected hex digit or '}' at this index. + expected_hex_digit_or_rbrace: usize, + /// Invalid unicode codepoint at this index. + invalid_unicode_codepoint: usize, + /// Expected '{' at this index. + expected_lbrace: usize, + /// Expected '}' at this index. + expected_rbrace: usize, + /// Expected '\'' at this index. + expected_single_quote: usize, + /// The character at this index cannot be represented without an escape sequence. + invalid_character: usize, +}; + +/// Only validates escape sequence characters. +/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between. +pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral { + assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\''); + + switch (slice[1]) { + '\\' => { + var offset: usize = 1; + const result = parseEscapeSequence(slice, &offset); + if (result == .success and (offset + 1 != slice.len or slice[offset] != '\'')) + return .{ .failure = .{ .expected_single_quote = offset } }; + + return result; + }, + 0 => return .{ .failure = .{ .invalid_character = 1 } }, + else => { + const codepoint = utf8Decode(slice[1 .. slice.len - 1]) catch unreachable; + return .{ .success = codepoint }; + }, + } +} + +/// Parse an escape sequence from `slice[offset..]`. If parsing is successful, +/// offset is updated to reflect the characters consumed. +fn parseEscapeSequence(slice: []const u8, offset: *usize) ParsedCharLiteral { + assert(slice.len > offset.*); + assert(slice[offset.*] == '\\'); + + if (slice.len == offset.* + 1) + return .{ .failure = .{ .invalid_escape_character = offset.* + 1 } }; + + offset.* += 2; + switch (slice[offset.* - 1]) { + 'n' => return .{ .success = '\n' }, + 'r' => return .{ .success = '\r' }, + '\\' => return .{ .success = '\\' }, + 't' => return .{ .success = '\t' }, + '\'' => return .{ .success = '\'' }, + '"' => return .{ .success = '"' }, + 'x' => { + var value: u8 = 0; + var i: usize = offset.*; + while (i < offset.* + 2) : (i += 1) { + if (i == slice.len) return .{ .failure = .{ .expected_hex_digit = i } }; + + const c = slice[i]; + switch (c) { + '0'...'9' => { + value *= 16; + value += c - '0'; + }, + 'a'...'f' => { + value *= 16; + value += c - 'a' + 10; + }, + 'A'...'F' => { + value *= 16; + value += c - 'A' + 10; + }, + else => { + return .{ .failure = .{ .expected_hex_digit = i } }; + }, + } + } + offset.* = i; + return .{ .success = value }; + }, + 'u' => { + var i: usize = offset.*; + if (i >= slice.len or slice[i] != '{') return .{ .failure = .{ .expected_lbrace = i } }; + i += 1; + if (i >= slice.len) return .{ .failure = .{ .expected_hex_digit_or_rbrace = i } }; + if (slice[i] == '}') return .{ .failure = .{ .empty_unicode_escape_sequence = i } }; + + var value: u32 = 0; + while (i < slice.len) : (i += 1) { + const c = slice[i]; + switch (c) { + '0'...'9' => { + value *= 16; + value += c - '0'; + }, + 'a'...'f' => { + value *= 16; + value += c - 'a' + 10; + }, + 'A'...'F' => { + value *= 16; + value += c - 'A' + 10; + }, + '}' => { + i += 1; + break; + }, + else => return .{ .failure = .{ .expected_hex_digit_or_rbrace = i } }, + } + if (value > 0x10ffff) { + return .{ .failure = .{ .invalid_unicode_codepoint = i } }; + } + } else { + return .{ .failure = .{ .expected_rbrace = i } }; + } + offset.* = i; + return .{ .success = @intCast(u21, value) }; + }, + else => return .{ .failure = .{ .invalid_escape_character = offset.* - 1 } }, + } +} + +test "parseCharLiteral" { + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 'a' }, + parseCharLiteral("'a'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 'ä' }, + parseCharLiteral("'ä'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0 }, + parseCharLiteral("'\\x00'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0x4f }, + parseCharLiteral("'\\x4f'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0x4f }, + parseCharLiteral("'\\x4F'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0x3041 }, + parseCharLiteral("'ぁ'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0 }, + parseCharLiteral("'\\u{0}'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0x3041 }, + parseCharLiteral("'\\u{3041}'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0x7f }, + parseCharLiteral("'\\u{7f}'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .success = 0x7fff }, + parseCharLiteral("'\\u{7FFF}'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .expected_hex_digit = 4 } }, + parseCharLiteral("'\\x0'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .expected_single_quote = 5 } }, + parseCharLiteral("'\\x000'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .invalid_escape_character = 2 } }, + parseCharLiteral("'\\y'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .expected_lbrace = 3 } }, + parseCharLiteral("'\\u'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .expected_lbrace = 3 } }, + parseCharLiteral("'\\uFFFF'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .empty_unicode_escape_sequence = 4 } }, + parseCharLiteral("'\\u{}'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .invalid_unicode_codepoint = 9 } }, + parseCharLiteral("'\\u{FFFFFF}'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .expected_hex_digit_or_rbrace = 8 } }, + parseCharLiteral("'\\u{FFFF'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .expected_single_quote = 9 } }, + parseCharLiteral("'\\u{FFFF}x'"), + ); + try std.testing.expectEqual( + ParsedCharLiteral{ .failure = .{ .invalid_character = 1 } }, + parseCharLiteral("'\x00'"), + ); +} + /// Parses `bytes` as a Zig string literal and appends the result to `buf`. /// Asserts `bytes` has '"' at beginning and end. pub fn parseAppend(buf: *std.ArrayList(u8), bytes: []const u8) error{OutOfMemory}!Result { assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"'); - const slice = bytes[1..]; + try buf.ensureUnusedCapacity(bytes.len - 2); - const prev_len = buf.items.len; - try buf.ensureUnusedCapacity(slice.len - 1); - errdefer buf.shrinkRetainingCapacity(prev_len); + var index: usize = 1; + while (true) { + const b = bytes[index]; - const State = enum { - Start, - Backslash, - }; - - var state = State.Start; - var index: usize = 0; - while (true) : (index += 1) { - const b = slice[index]; - - switch (state) { - State.Start => switch (b) { - '\\' => state = State.Backslash, - '\n' => { - return Result{ .invalid_character = index }; - }, - '"' => return Result.success, - else => try buf.append(b), - }, - State.Backslash => switch (b) { - 'n' => { - try buf.append('\n'); - state = State.Start; - }, - 'r' => { - try buf.append('\r'); - state = State.Start; - }, - '\\' => { - try buf.append('\\'); - state = State.Start; - }, - 't' => { - try buf.append('\t'); - state = State.Start; - }, - '\'' => { - try buf.append('\''); - state = State.Start; - }, - '"' => { - try buf.append('"'); - state = State.Start; - }, - 'x' => { - // TODO: add more/better/broader tests for this. - const index_continue = index + 3; - if (slice.len < index_continue) { - return Result{ .expected_hex_digits = index }; - } - if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |byte| { - try buf.append(byte); - state = State.Start; - index = index_continue - 1; // loop-header increments again - } else |err| switch (err) { - error.Overflow => unreachable, // 2 digits base 16 fits in a u8. - error.InvalidCharacter => { - return Result{ .invalid_hex_escape = index + 1 }; - }, - } - }, - 'u' => { - // TODO: add more/better/broader tests for this. - // TODO: we are already inside a nice, clean state machine... use it - // instead of this hacky code. - if (slice.len > index + 2 and slice[index + 1] == '{') { - if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| { - const hex_str = slice[index + 2 .. index_end]; - if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| { - if (uint <= 0x10ffff) { - // TODO this incorrectly depends on endianness - try buf.appendSlice(std.mem.toBytes(uint)[0..]); - state = State.Start; - index = index_end; // loop-header increments - continue; - } - } else |err| switch (err) { - error.Overflow => unreachable, - error.InvalidCharacter => { - return Result{ .invalid_unicode_escape = index + 1 }; - }, - } + switch (b) { + '\\' => { + const escape_char_index = index + 1; + const result = parseEscapeSequence(bytes, &index); + switch (result) { + .success => |codepoint| { + if (bytes[escape_char_index] == 'u') { + buf.items.len += utf8Encode(codepoint, buf.unusedCapacitySlice()) catch { + return Result{ .failure = .{ .invalid_unicode_codepoint = escape_char_index + 1 } }; + }; } else { - return Result{ .missing_matching_rbrace = index + 1 }; + buf.appendAssumeCapacity(@intCast(u8, codepoint)); } - } else { - return Result{ .expected_unicode_digits = index }; - } - }, - else => { - return Result{ .invalid_character = index }; - }, + }, + .failure => |err| return Result{ .failure = err }, + } + }, + '\n' => return Result{ .failure = .{ .invalid_character = index } }, + '"' => return Result.success, + else => { + try buf.append(b); + index += 1; }, } } else unreachable; // TODO should not need else unreachable on while(true) @@ -137,18 +276,23 @@ pub fn parseAlloc(allocator: std.mem.Allocator, bytes: []const u8) ParseError![] switch (try parseAppend(&buf, bytes)) { .success => return buf.toOwnedSlice(), - else => return error.InvalidStringLiteral, + .failure => return error.InvalidLiteral, } } test "parse" { const expect = std.testing.expect; + const expectError = std.testing.expectError; const eql = std.mem.eql; - var fixed_buf_mem: [32]u8 = undefined; - var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]); + var fixed_buf_mem: [64]u8 = undefined; + var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(&fixed_buf_mem); var alloc = fixed_buf_alloc.allocator(); + try expectError(error.InvalidLiteral, parseAlloc(alloc, "\"\\x6\"")); + try expect(eql(u8, "foo\nbar", try parseAlloc(alloc, "\"foo\\nbar\""))); + try expect(eql(u8, "\x12foo", try parseAlloc(alloc, "\"\\x12foo\""))); + try expect(eql(u8, "bytes\u{1234}foo", try parseAlloc(alloc, "\"bytes\\u{1234}foo\""))); try expect(eql(u8, "foo", try parseAlloc(alloc, "\"foo\""))); try expect(eql(u8, "foo", try parseAlloc(alloc, "\"f\x6f\x6f\""))); try expect(eql(u8, "f💯", try parseAlloc(alloc, "\"f\u{1f4af}\""))); diff --git a/src/AstGen.zig b/src/AstGen.zig index 5de68e43e5..e7e7af3354 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -6447,7 +6447,7 @@ fn multilineStringLiteral( return rvalue(gz, rl, result, node); } -fn charLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) !Zir.Inst.Ref { +fn charLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) InnerError!Zir.Inst.Ref { const astgen = gz.astgen; const tree = astgen.tree; const main_tokens = tree.nodes.items(.main_token); @@ -6459,70 +6459,7 @@ fn charLiteral(gz: *GenZir, rl: ResultLoc, node: Ast.Node.Index) !Zir.Inst.Ref { const result = try gz.addInt(codepoint); return rvalue(gz, rl, result, node); }, - .invalid_escape_character => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "invalid escape character: '{c}'", - .{slice[bad_index]}, - ); - }, - .expected_hex_digit => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "expected hex digit, found '{c}'", - .{slice[bad_index]}, - ); - }, - .empty_unicode_escape_sequence => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "empty unicode escape sequence", - .{}, - ); - }, - .expected_hex_digit_or_rbrace => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "expected hex digit or '}}', found '{c}'", - .{slice[bad_index]}, - ); - }, - .unicode_escape_overflow => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "unicode escape too large to be a valid codepoint", - .{}, - ); - }, - .expected_lbrace => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "expected '{{', found '{c}", - .{slice[bad_index]}, - ); - }, - .expected_end => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "expected ending single quote ('), found '{c}", - .{slice[bad_index]}, - ); - }, - .invalid_character => |bad_index| { - return astgen.failOff( - main_token, - @intCast(u32, bad_index), - "invalid byte in character literal: '{c}'", - .{slice[bad_index]}, - ); - }, + .failure => |err| return astgen.failWithStrLitError(err, main_token, slice, 0), } } @@ -8958,54 +8895,85 @@ fn parseStrLit( buf.* = buf_managed.moveToUnmanaged(); switch (try result) { .success => return, + .failure => |err| return astgen.failWithStrLitError(err, token, bytes, offset), + } +} + +fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token: Ast.TokenIndex, bytes: []const u8, offset: u32) InnerError { + const raw_string = bytes[offset..]; + switch (err) { + .invalid_escape_character => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "invalid escape character: '{c}'", + .{raw_string[bad_index]}, + ); + }, + .expected_hex_digit => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "expected hex digit, found '{c}'", + .{raw_string[bad_index]}, + ); + }, + .empty_unicode_escape_sequence => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "empty unicode escape sequence", + .{}, + ); + }, + .expected_hex_digit_or_rbrace => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "expected hex digit or '}}', found '{c}'", + .{raw_string[bad_index]}, + ); + }, + .invalid_unicode_codepoint => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "unicode escape does not correspond to a valid codepoint", + .{}, + ); + }, + .expected_lbrace => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "expected '{{', found '{c}", + .{raw_string[bad_index]}, + ); + }, + .expected_rbrace => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "expected '}}', found '{c}", + .{raw_string[bad_index]}, + ); + }, + .expected_single_quote => |bad_index| { + return astgen.failOff( + token, + offset + @intCast(u32, bad_index), + "expected single quote ('), found '{c}", + .{raw_string[bad_index]}, + ); + }, .invalid_character => |bad_index| { return astgen.failOff( token, offset + @intCast(u32, bad_index), - "invalid string literal character: '{c}'", + "invalid byte in string or character literal: '{c}'", .{raw_string[bad_index]}, ); }, - .expected_hex_digits => |bad_index| { - return astgen.failOff( - token, - offset + @intCast(u32, bad_index), - "expected hex digits after '\\x'", - .{}, - ); - }, - .invalid_hex_escape => |bad_index| { - return astgen.failOff( - token, - offset + @intCast(u32, bad_index), - "invalid hex digit: '{c}'", - .{raw_string[bad_index]}, - ); - }, - .invalid_unicode_escape => |bad_index| { - return astgen.failOff( - token, - offset + @intCast(u32, bad_index), - "invalid unicode digit: '{c}'", - .{raw_string[bad_index]}, - ); - }, - .missing_matching_rbrace => |bad_index| { - return astgen.failOff( - token, - offset + @intCast(u32, bad_index), - "missing matching '}}' character", - .{}, - ); - }, - .expected_unicode_digits => |bad_index| { - return astgen.failOff( - token, - offset + @intCast(u32, bad_index), - "expected unicode digits after '\\u'", - .{}, - ); - }, } } diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig index 2bb4bb3e44..3d3c44cbbc 100644 --- a/test/behavior/basic.zig +++ b/test/behavior/basic.zig @@ -662,7 +662,11 @@ test "multiline string literal is null terminated" { } test "string escapes" { - if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; try expectEqualStrings("\"", "\x22"); try expectEqualStrings("\'", "\x27");