From 559bbf1cc669501ff1f1f59d75577691d96450a0 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 16:11:20 -0700 Subject: [PATCH 1/9] langref: explicitly mention inline combined with multiple cases closes #18524 --- doc/langref.html.in | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index d5bbb20f7a..b7431b8bf5 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -4311,10 +4311,11 @@ test "enum literals with switch" { {#code_end#} {#header_close#} - {#header_open|Inline switch#} + {#header_open|Inline Switch Prongs#}

Switch prongs can be marked as {#syntax#}inline{#endsyntax#} to generate - the prong's body for each possible value it could have: + the prong's body for each possible value it could have, making the + captured value {#link|comptime#}.

{#code_begin|test|test_inline_switch#} const std = @import("std"); @@ -4324,9 +4325,9 @@ const expectError = std.testing.expectError; fn isFieldOptional(comptime T: type, field_index: usize) !bool { const fields = @typeInfo(T).Struct.fields; return switch (field_index) { - // This prong is analyzed `fields.len - 1` times with `idx` being a - // unique comptime-known value each time. - inline 0...fields.len - 1 => |idx| @typeInfo(fields[idx].type) == .Optional, + // This prong is analyzed twice with `idx` being a + // comptime-known value each time. + inline 0, 1 => |idx| @typeInfo(fields[idx].type) == .Optional, else => return error.IndexOutOfBounds, }; } @@ -4350,6 +4351,16 @@ fn isFieldOptionalUnrolled(field_index: usize) !bool { 1 => true, else => return error.IndexOutOfBounds, }; +} + {#code_end#} +

The {#syntax#}inline{#endsyntax#} keyword may also be combined with ranges:

+ {#code_begin|syntax|inline_prong_range#} +fn isFieldOptional(comptime T: type, field_index: usize) !bool { + const fields = @typeInfo(T).Struct.fields; + return switch (field_index) { + inline 0...fields.len - 1 => |idx| @typeInfo(fields[idx].type) == .Optional, + else => return error.IndexOutOfBounds, + }; } {#code_end#}

From 2d9c4792ae2cab0ff3b1df54b15913dcbcaef112 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 16:19:54 -0700 Subject: [PATCH 2/9] std.fmt: clarify the use of "character" Currently, std.fmt has a misguided, half-assed Unicode implementation with an ambiguous definition of the word "character". This commit does almost nothing to mitigate the problem, but it lets me close an open PR. In the future I will revert 473cb1fd74d6d478bb3d5fda4707ce3f6e6e5bf6 as well as 279607cae58f7be46335793df6a4a753d0a800aa, and redo the whole std.fmt API, breaking everyone's code and unfortunately causing nearly every Zig user to have a bad day. std.fmt will go back to only dealing in bytes, with zero Unicode awareness whatsoever. I suggest a third party package provide Unicode functionality as well as a more advanced text formatting function for when Unicode awareness is needed. I have always suggested this, and I sincerely apologize for merging pull requests that compromised my stance on this matter. Most applications should, instead, strive to make their code independent of Unicode, dealing strictly in encoded UTF-8 bytes, and never attempt operations such as: substring manipulation, capitalization, alignment, word replacement, or column number calculations. Exceptions to this include web browsers, GUI toolkits, and terminals. If you're not making one of these, any dependency on Unicode is probably a bug or worse, a poor design decision. closes #18536 --- lib/std/fmt.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index 6bb7e26da8..a9e1d73c0a 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -40,9 +40,9 @@ pub const FormatOptions = struct { /// - when using a field name, you are required to enclose the field name (an identifier) in square /// brackets, e.g. {[score]...} as opposed to the numeric index form which can be written e.g. {2...} /// - *specifier* is a type-dependent formatting option that determines how a type should formatted (see below) -/// - *fill* is a single character which is used to pad the formatted text -/// - *alignment* is one of the three characters `<`, `^`, or `>` to make the text left-, center-, or right-aligned, respectively -/// - *width* is the total width of the field in characters +/// - *fill* is a single unicode codepoint which is used to pad the formatted text +/// - *alignment* is one of the three bytes '<', '^', or '>' to make the text left-, center-, or right-aligned, respectively +/// - *width* is the total width of the field in unicode codepoints /// - *precision* specifies how many decimals a formatted number should have /// /// Note that most of the parameters are optional and may be omitted. Also you can leave out separators like `:` and `.` when From 6fef362992826cea3fba991ad3d63aaaa59a2385 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 16:36:27 -0700 Subject: [PATCH 3/9] Revert "langref: emphasize the use of dereferencing string literals" This reverts commit 27353bb936a161e6a09f1424ce38bf84e78e94e4. * unnecessary example * poor phrasing (avoid "you") --- doc/langref.html.in | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index b7431b8bf5..6ee55d3b31 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -896,18 +896,9 @@ pub fn main() void { The type of string literals encodes both the length, and the fact that they are null-terminated, and thus they can be {#link|coerced|Type Coercion#} to both {#link|Slices#} and {#link|Null-Terminated Pointers|Sentinel-Terminated Pointers#}. + Dereferencing string literals converts them to {#link|Arrays#}.

- Dereferencing string literals converts them to {#link|Arrays#}, allowing you to initialize a buffer with the contents of a string literal. -

- {#code_begin|syntax|mutable_string_buffer#} -test { - var buffer = [_]u8{0}**256; - const home_dir = "C:/Users/root"; - buffer[0..home_dir.len].* = home_dir.*; -} - {#code_end#} -

The encoding of a string in Zig is de-facto assumed to be UTF-8. Because Zig source code is {#link|UTF-8 encoded|Source Encoding#}, any non-ASCII bytes appearing within a string literal in source code carry their UTF-8 meaning into the content of the string in the Zig program; From e9c7ebe79e968b5a173b58d908aad7d7040eed23 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 16:47:18 -0700 Subject: [PATCH 4/9] langref: simplify Hello World section reverts f510f385920b9a22bd1e68839cd4be3eea092e4d --- doc/langref.html.in | 103 +++----------------------------------------- 1 file changed, 7 insertions(+), 96 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 6ee55d3b31..8096e70e09 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -410,109 +410,20 @@ pub fn main() !void { } {#code_end#}

- The Zig code sample above demonstrates one way to create a program that will output: Hello, world!. -

-

- The code sample shows the contents of a file named hello.zig. Files storing Zig - source code are {#link|UTF-8 encoded|Source Encoding#} text files. The files storing - Zig source code must be named with the .zig extension. -

-

- Following the hello.zig Zig code sample, the {#link|Zig Build System#} is used - to build an executable program from the hello.zig source code. Then, the - hello program is executed showing its output Hello, world!. The - lines beginning with $ represent command line prompts and a command. - Everything else is program output. -

-

- The code sample begins by adding the {#link|Zig Standard Library#} to the build using the {#link|@import#} builtin function. - The {#syntax#}@import("std"){#endsyntax#} function call creates a structure that represents the Zig Standard Library. - The code then {#link|declares|Container Level Variables#} a - {#link|constant identifier|Assignment#}, named {#syntax#}std{#endsyntax#}, that gives access to the features of the Zig Standard Library. -

-

- Next, a {#link|public function|Functions#}, {#syntax#}pub fn{#endsyntax#}, named {#syntax#}main{#endsyntax#} - is declared. The {#syntax#}main{#endsyntax#} function is necessary because it tells the Zig compiler where the program starts. Programs - designed to be executed will need a {#syntax#}pub fn main{#endsyntax#} function. -

- -

- A function is a block of any number of statements and expressions, that as a whole, perform a task. - Functions may or may not return data after they are done performing their task. If a function - cannot perform its task, it might return an error. Zig makes all of this explicit. -

-

- In the hello.zig code sample, the main function is declared - with the {#syntax#}!void{#endsyntax#} return type. This return type is known as an {#link|Error Union Type#}. - This syntax tells the Zig compiler that the function will either return an - error or a value. An error union type combines an {#link|Error Set Type#} and any other data type - (e.g. a {#link|Primitive Type|Primitive Types#} or a user-defined type such as a {#link|struct#}, {#link|enum#}, or {#link|union#}). - The full form of an error union type is - <error set type>{#syntax#}!{#endsyntax#}<any data type>. In the code - sample, the error set type is not explicitly written on the left side of the {#syntax#}!{#endsyntax#} operator. - When written this way, the error set type is an {#link|inferred error set type|Inferred Error Sets#}. The - {#syntax#}void{#endsyntax#} after the {#syntax#}!{#endsyntax#} operator - tells the compiler that the function will not return a value under normal circumstances (i.e. when no errors occur). -

- -

- In Zig, a function's block of statements and expressions are surrounded by an open curly-brace { and - close curly-brace }. In hello.zig, the {#syntax#}main{#endsyntax#} function - contains two statements. -

-

- In the first statement, a constant identifier, {#syntax#}stdout{#endsyntax#}, is initialized to represent standard output's - writer. In the second statement, the program tries to print the Hello, world! message to standard output. -

-

- Functions sometimes need inputs to perform their task. Inputs are passed, in between parentheses, to functions. These - inputs are also known as arguments. When multiple arguments are passed to a function, they are separated by commas. -

-

- Two arguments are passed to the {#syntax#}stdout.print(){#endsyntax#} function: {#syntax#}"Hello, {s}!\n"{#endsyntax#} - and {#syntax#}.{"world"}{#endsyntax#}. The first argument is called a format string, which is a string containing one or - more placeholders. {#syntax#}"Hello, {s}!\n"{#endsyntax#} contains the placeholder {#syntax#}{s}{#endsyntax#}, which is - replaced with {#syntax#}"world"{#endsyntax#} from the second argument. The file string_literals.zig in - {#link|String Literals and Unicode Code Point Literals|String Literals and Unicode Code Point Literals#} contains examples of format - strings that can be used with the {#syntax#}stdout.print(){#endsyntax#} function. The \n inside of - {#syntax#}"Hello, {s}!\n"{#endsyntax#} is the {#link|escape sequence|Escape Sequences#} for the newline character. -

-

- The {#link|try#} expression evaluates the result of {#syntax#}stdout.print{#endsyntax#}. If the result is an error, then the - {#syntax#}try{#endsyntax#} expression will return from {#syntax#}main{#endsyntax#} with the error. Otherwise, the program will continue. - In this case, there are no more statements or expressions left to execute in the {#syntax#}main{#endsyntax#} function, so the program exits. -

-

- In Zig, the standard output writer's {#syntax#}print{#endsyntax#} function is allowed to fail because - it is actually a function defined as part of a generic Writer. Consider a generic Writer that - represents writing data to a file. When the disk is full, a write to the file will fail. - However, we typically do not expect writing text to the standard output to fail. To avoid having - to handle the failure case of printing to standard output, you can use alternate functions: the - functions in {#syntax#}std.log{#endsyntax#} for proper logging or the {#syntax#}std.debug.print{#endsyntax#} function. - This documentation will use the latter option to print to standard error (stderr) and silently return - on failure. The next code sample, hello_again.zig demonstrates the use of - {#syntax#}std.debug.print{#endsyntax#}. + Most of the time, it more appropriate to write to stderr rather than stdout, and + whether or not the message is successfully written to the stream is irrelevant. + For this common case, there is a simpler API:

{#code_begin|exe|hello_again#} -const print = @import("std").debug.print; +const std = @import("std"); pub fn main() void { - print("Hello, world!\n", .{}); + std.debug.print("Hello, world!\n", .{}); } {#code_end#}

- Note that you can leave off the {#syntax#}!{#endsyntax#} from the return type because {#syntax#}std.debug.print{#endsyntax#} cannot fail. + In this case, the {#syntax#}!{#endsyntax#} may be omitted from the return + type because no errors are returned from the function.

{#see_also|Values|@import|Errors|Root Source File|Source Encoding#} {#header_close#} From a054c01f5ceb0575909e9d416320f393514472dc Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 16:54:09 -0700 Subject: [PATCH 5/9] Revert "langref: add section numbers" This reverts commit 3542dbf0ea5bc1ddb1c5e1c856745dc07e6c0a18. I don't like them --- doc/langref.html.in | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 8096e70e09..334d6c37c5 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -301,39 +301,6 @@ color: #fff; } } - - @media all { - main { - counter-reset: section-2; - } - h2 { - counter-reset: section-3; - } - h2::before { - counter-increment: section-2; - content: counter(section-2) ". "; - font-weight: normal; - } - h3 { - counter-reset: section-4; - } - h3::before { - counter-increment: section-3; - content: counter(section-2) "." counter(section-3) ". "; - font-weight: normal; - } - h4::before { - counter-increment: section-4; - content: counter(section-2) "." counter(section-3) "." counter(section-4) ". "; - font-weight: normal; - } - #zig-version::before { - content: ""; - } - #table-of-contents::before { - content: ""; - } - } From 9be831e15ac28c1e183cebe36e7e1267114efa60 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 17:13:59 -0700 Subject: [PATCH 6/9] langref: remove line numbers from code samples It's unnecessary, more complicated, bloated, and it messes up the table of operators. --- doc/langref.html.in | 15 --------------- tools/docgen.zig | 34 ++++++---------------------------- 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 334d6c37c5..745518a04c 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -200,24 +200,9 @@ visibility: visible; } - pre { - counter-reset: line; - } - pre .line:before { - counter-increment: line; - content: counter(line); - display: inline-block; - padding-right: 1em; - width: 2em; - text-align: right; - color: #999; - } th pre code { background: none; } - th .line:before { - display: none; - } @media (prefers-color-scheme: dark) { body{ diff --git a/tools/docgen.zig b/tools/docgen.zig index bb4300426a..559b1937e9 100644 --- a/tools/docgen.zig +++ b/tools/docgen.zig @@ -947,19 +947,8 @@ fn isType(name: []const u8) bool { return false; } -const start_line = ""; -const end_line = ""; - fn writeEscapedLines(out: anytype, text: []const u8) !void { - for (text) |char| { - if (char == '\n') { - try out.writeAll(end_line); - try out.writeAll("\n"); - try out.writeAll(start_line); - } else { - try writeEscaped(out, &[_]u8{char}); - } - } + return writeEscaped(out, text); } fn tokenizeAndPrintRaw( @@ -972,7 +961,7 @@ fn tokenizeAndPrintRaw( const src_non_terminated = mem.trim(u8, raw_src, " \r\n"); const src = try allocator.dupeZ(u8, src_non_terminated); - try out.writeAll("" ++ start_line); + try out.writeAll(""); var tokenizer = std.zig.Tokenizer.init(src); var index: usize = 0; var next_tok_is_fn = false; @@ -1062,6 +1051,7 @@ fn tokenizeAndPrintRaw( }, .string_literal, + .multiline_string_literal_line, .char_literal, => { try out.writeAll(""); @@ -1069,18 +1059,6 @@ fn tokenizeAndPrintRaw( try out.writeAll(""); }, - .multiline_string_literal_line => { - if (src[token.loc.end - 1] == '\n') { - try out.writeAll(""); - try writeEscaped(out, src[token.loc.start .. token.loc.end - 1]); - try out.writeAll("" ++ end_line ++ "\n" ++ start_line); - } else { - try out.writeAll(""); - try writeEscaped(out, src[token.loc.start..token.loc.end]); - try out.writeAll(""); - } - }, - .builtin => { try out.writeAll(""); try writeEscaped(out, src[token.loc.start..token.loc.end]); @@ -1211,7 +1189,7 @@ fn tokenizeAndPrintRaw( } index = token.loc.end; } - try out.writeAll(end_line ++ ""); + try out.writeAll(""); } fn tokenizeAndPrint( @@ -1234,9 +1212,9 @@ fn printSourceBlock(allocator: Allocator, docgen_tokenizer: *Tokenizer, out: any const raw_source = docgen_tokenizer.buffer[syntax_block.source_token.start..syntax_block.source_token.end]; const trimmed_raw_source = mem.trim(u8, raw_source, " \r\n"); - try out.writeAll("" ++ start_line); + try out.writeAll(""); try writeEscapedLines(out, trimmed_raw_source); - try out.writeAll(end_line ++ ""); + try out.writeAll(""); }, } try out.writeAll(""); From f29217ae0c3ddb26f3bb437f26852ffe5f6d1623 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 17:14:31 -0700 Subject: [PATCH 7/9] langref: reduce verbosity of string literal section --- doc/langref.html.in | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 745518a04c..125bdaa36b 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -762,30 +762,19 @@ pub fn main() void { Dereferencing string literals converts them to {#link|Arrays#}.

- The encoding of a string in Zig is de-facto assumed to be UTF-8. - Because Zig source code is {#link|UTF-8 encoded|Source Encoding#}, any non-ASCII bytes appearing within a string literal - in source code carry their UTF-8 meaning into the content of the string in the Zig program; - the bytes are not modified by the compiler. - However, it is possible to embed non-UTF-8 bytes into a string literal using \xNN notation. -

-

- Indexing into a string containing non-ASCII bytes will return individual bytes, whether valid - UTF-8 or not. - The {#link|Zig Standard Library#} provides routines for checking the validity of UTF-8 encoded - strings, accessing their code points and other encoding/decoding related tasks in - {#syntax#}std.unicode{#endsyntax#}. + Because Zig source code is {#link|UTF-8 encoded|Source Encoding#}, any + non-ASCII bytes appearing within a string literal in source code carry + their UTF-8 meaning into the content of the string in the Zig program; + the bytes are not modified by the compiler. It is possible to embed + non-UTF-8 bytes into a string literal using \xNN notation.

+

Indexing into a string containing non-ASCII bytes returns individual + bytes, whether valid UTF-8 or not.

Unicode code point literals have type {#syntax#}comptime_int{#endsyntax#}, the same as {#link|Integer Literals#}. All {#link|Escape Sequences#} are valid in both string literals and Unicode code point literals.

-

- In many other programming languages, a Unicode code point literal is called a "character literal". - However, there is no precise technical definition of a "character" - in recent versions of the Unicode specification (as of Unicode 13.0). - In Zig, a Unicode code point literal corresponds to the Unicode definition of a code point. -

{#code_begin|exe|string_literals#} const print = @import("std").debug.print; const mem = @import("std").mem; // will be used to compare bytes From 1b8e6b8ba9c6a0f52e2d823218732bb2dc8a0362 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jan 2024 19:40:41 -0700 Subject: [PATCH 8/9] langref: clean up the table of operators * remove whitespace * add column for name * rename "description" to "remarks" * clarify bit shift left and bit shift right closes #17126 --- doc/langref.html.in | 294 +++++++++++++++++++++++++------------------- 1 file changed, 170 insertions(+), 124 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 125bdaa36b..9fbdb08676 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -8,7 +8,7 @@