From 0cef727e59d7b0c34756c09f64cbfe4490dcc3e7 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Tue, 11 Jun 2024 22:13:22 -0700 Subject: [PATCH] More precise error message for unencodable `\u` escapes The surrogate code points U+D800 to U+DFFF are valid code points but are not Unicode scalar values. This commit makes the error message more accurately reflect what is actually allowed in `\u` escape sequences. From https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf: > D71 High-surrogate code point: A Unicode code point in the range U+D800 to U+DBFF. > D73 Low-surrogate code point: A Unicode code point in the range U+DC00 to U+DFFF. > > 3.9 Unicode Encoding Forms > D76 Unicode scalar value: Any Unicode code point except high-surrogate and low-surrogate code points. Related: #20270 --- doc/langref.html.in | 4 ++-- lib/std/zig/AstGen.zig | 2 +- src/Package/Manifest.zig | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 42aebdcac2..9ef3691991 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -728,12 +728,12 @@ \u{NNNNNN} - hexadecimal Unicode code point UTF-8 encoded (1 or more digits) + hexadecimal Unicode scalar value UTF-8 encoded (1 or more digits) -

Note that the maximum valid Unicode point is {#syntax#}0x10ffff{#endsyntax#}.

+

Note that the maximum valid Unicode scalar value is {#syntax#}0x10ffff{#endsyntax#}.

{#header_close#} {#header_open|Multiline String Literals#}

diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index 9c068eb597..f02dcaa2db 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -11306,7 +11306,7 @@ fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token return astgen.failOff( token, offset + @as(u32, @intCast(bad_index)), - "unicode escape does not correspond to a valid codepoint", + "unicode escape does not correspond to a valid unicode scalar value", .{}, ); }, diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 589be91357..3bcb4a7958 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -522,7 +522,7 @@ const Parse = struct { try p.appendErrorOff( token, offset + @as(u32, @intCast(bad_index)), - "unicode escape does not correspond to a valid codepoint", + "unicode escape does not correspond to a valid unicode scalar value", .{}, ); },