docgen: re-enable syntax checking for code blocks

In a previous commit (f4d3d29), syntax checking for code blocks with the
`syntax` type was disabled due to a change in astgen now checking the existence of
identifiers. The change in astgen caused some code samples in the language
reference to cause compilation errors.

This commit updates the code samples in the language reference and
re-enables syntax checking. Some code samples have been changed to unchecked
syntax blocks using `{#syntax_block#}` when suitable.
This commit is contained in:
Mr. Paul 2021-09-20 15:32:34 +07:00 committed by Andrew Kelley
parent abc30f7948
commit 380ca26855
2 changed files with 148 additions and 104 deletions

View File

@ -1222,9 +1222,7 @@ fn genHtml(
try printSourceBlock(allocator, tokenizer, out, syntax_block);
// TODO: remove code.just_check_syntax after updating code samples
// that have stopped working due to a change in the compiler.
if (!do_code_tests or code.just_check_syntax) {
if (!do_code_tests) {
continue;
}

View File

@ -4771,6 +4771,8 @@ test "parse u64" {
{#header_open|catch#}
<p>If you want to provide a default value, you can use the {#syntax#}catch{#endsyntax#} binary operator:</p>
{#code_begin|syntax#}
const parseU64 = @import("error_union_parsing_u64.zig").parseU64;
fn doAThing(str: []u8) void {
const number = parseU64(str, 10) catch 13;
_ = number; // ...
@ -4786,6 +4788,8 @@ fn doAThing(str: []u8) void {
<p>Let's say you wanted to return the error if you got one, otherwise continue with the
function logic:</p>
{#code_begin|syntax#}
const parseU64 = @import("error_union_parsing_u64.zig").parseU64;
fn doAThing(str: []u8) !void {
const number = parseU64(str, 10) catch |err| return err;
_ = number; // ...
@ -4795,6 +4799,8 @@ fn doAThing(str: []u8) !void {
There is a shortcut for this. The {#syntax#}try{#endsyntax#} expression:
</p>
{#code_begin|syntax#}
const parseU64 = @import("error_union_parsing_u64.zig").parseU64;
fn doAThing(str: []u8) !void {
const number = try parseU64(str, 10);
_ = number; // ...
@ -4810,7 +4816,7 @@ fn doAThing(str: []u8) !void {
Maybe you know with complete certainty that an expression will never be an error.
In this case you can do this:
</p>
{#code_begin|syntax#}const number = parseU64("1234", 10) catch unreachable;{#code_end#}
{#syntax#}const number = parseU64("1234", 10) catch unreachable;{#endsyntax#}
<p>
Here we know for sure that "1234" will parse successfully. So we put the
{#syntax#}unreachable{#endsyntax#} value on the right hand side. {#syntax#}unreachable{#endsyntax#} generates
@ -4822,7 +4828,7 @@ fn doAThing(str: []u8) !void {
Finally, you may want to take a different action for every situation. For that, we combine
the {#link|if#} and {#link|switch#} expression:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|handle_all_error_scenarios.zig#}
fn doAThing(str: []u8) void {
if (parseU64(str, 10)) |number| {
doSomethingWithNumber(number);
@ -4834,7 +4840,7 @@ fn doAThing(str: []u8) void {
error.InvalidChar => unreachable,
}
}
{#code_end#}
{#end_syntax_block#}
{#header_open|errdefer#}
<p>
The other component to error handling is defer statements.
@ -4845,7 +4851,7 @@ fn doAThing(str: []u8) void {
<p>
Example:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|errdefer_example.zig#}
fn createFoo(param: i32) !Foo {
const foo = try tryToAllocateFoo();
// now we have allocated foo. we need to free it if the function fails.
@ -4863,7 +4869,7 @@ fn createFoo(param: i32) !Foo {
// but the defer will run!
return foo;
}
{#code_end#}
{#end_syntax_block#}
<p>
The neat thing about this is that you get robust error handling without
the verbosity and cognitive overhead of trying to make sure every exit path
@ -5132,12 +5138,12 @@ fn bang2() void {
For the case when no errors are returned, the cost is a single memory write operation, only in the first non-failable function in the call graph that calls a failable function, i.e. when a function returning {#syntax#}void{#endsyntax#} calls a function returning {#syntax#}error{#endsyntax#}.
This is to initialize this struct in the stack memory:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|stack_trace_struct.zig#}
pub const StackTrace = struct {
index: usize,
instruction_addresses: [N]usize,
};
{#code_end#}
{#end_syntax_block#}
<p>
Here, N is the maximum function call depth as determined by call graph analysis. Recursion is ignored and counts for 2.
</p>
@ -5150,13 +5156,13 @@ pub const StackTrace = struct {
<p>
When generating the code for a function that returns an error, just before the {#syntax#}return{#endsyntax#} statement (only for the {#syntax#}return{#endsyntax#} statements that return errors), Zig generates a call to this function:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|zig_return_error_fn.zig#}
// marked as "no-inline" in LLVM IR
fn __zig_return_error(stack_trace: *StackTrace) void {
stack_trace.instruction_addresses[stack_trace.index] = @returnAddress();
stack_trace.index = (stack_trace.index + 1) % N;
}
{#code_end#}
{#end_syntax_block#}
<p>
The cost is 2 math operations plus some memory reads and writes. The memory accessed is constrained and should remain cached for the duration of the error return bubbling.
</p>
@ -5206,16 +5212,16 @@ const optional_int: ?i32 = 5678;
Task: call malloc, if the result is null, return null.
</p>
<p>C code</p>
<pre><code class="cpp">// malloc prototype included for reference
{#syntax_block|c|call_malloc_in_c.c#}// malloc prototype included for reference
void *malloc(size_t size);
struct Foo *do_a_thing(void) {
char *ptr = malloc(1234);
if (!ptr) return NULL;
// ...
}</code></pre>
}{#end_syntax_block#}
<p>Zig code</p>
{#code_begin|syntax#}
{#syntax_block|zig|call_malloc_from_zig.zig#}
// malloc prototype included for reference
extern fn malloc(size: size_t) ?*u8;
@ -5223,7 +5229,7 @@ fn doAThing() ?*Foo {
const ptr = malloc(1234) orelse return null;
_ = ptr; // ...
}
{#code_end#}
{#end_syntax_block#}
<p>
Here, Zig is at least as convenient, if not more, than C. And, the type of "ptr"
is {#syntax#}*u8{#endsyntax#} <em>not</em> {#syntax#}?*u8{#endsyntax#}. The {#syntax#}orelse{#endsyntax#} keyword
@ -5233,7 +5239,7 @@ fn doAThing() ?*Foo {
<p>
The other form of checking against NULL you might see looks like this:
</p>
<pre><code class="cpp">void do_a_thing(struct Foo *foo) {
{#syntax_block|c|checking_null_in_c.c#}void do_a_thing(struct Foo *foo) {
// do some stuff
if (foo) {
@ -5241,11 +5247,14 @@ fn doAThing() ?*Foo {
}
// do some stuff
}</code></pre>
}{#end_syntax_block#}
<p>
In Zig you can accomplish the same thing:
</p>
{#code_begin|syntax#}
{#code_begin|syntax|checking_null_in_zig#}
const Foo = struct{};
fn doSomethingWithFoo(foo: *Foo) void { _ = foo; }
fn doAThing(optional_foo: ?*Foo) void {
// do some stuff
@ -6111,7 +6120,7 @@ test "perform fn" {
different code. In this example, the function {#syntax#}performFn{#endsyntax#} is generated three different times,
for the different values of {#syntax#}prefix_char{#endsyntax#} provided:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|performFn_1#}
// From the line:
// expect(performFn('t', 1) == 6);
fn performFn(start_value: i32) i32 {
@ -6120,8 +6129,8 @@ fn performFn(start_value: i32) i32 {
result = three(result);
return result;
}
{#code_end#}
{#code_begin|syntax#}
{#end_syntax_block#}
{#syntax_block|zig|performFn_2#}
// From the line:
// expect(performFn('o', 0) == 1);
fn performFn(start_value: i32) i32 {
@ -6129,15 +6138,15 @@ fn performFn(start_value: i32) i32 {
result = one(result);
return result;
}
{#code_end#}
{#code_begin|syntax#}
{#end_syntax_block#}
{#syntax_block|zig|performFn_3#}
// From the line:
// expect(performFn('w', 99) == 99);
fn performFn(start_value: i32) i32 {
var result: i32 = start_value;
return result;
}
{#code_end#}
{#end_syntax_block#}
<p>
Note that this happens even in a debug build; in a release build these generated functions still
pass through rigorous LLVM optimizations. The important thing to note, however, is not that this
@ -6367,11 +6376,11 @@ const Node = struct {
it works fine.
</p>
{#header_close#}
{#header_open|Case Study: printf in Zig#}
{#header_open|Case Study: print in Zig#}
<p>
Putting all of this together, let's see how {#syntax#}printf{#endsyntax#} works in Zig.
Putting all of this together, let's see how {#syntax#}print{#endsyntax#} works in Zig.
</p>
{#code_begin|exe|printf#}
{#code_begin|exe|print#}
const print = @import("std").debug.print;
const a_number: i32 = 1234;
@ -6386,67 +6395,84 @@ pub fn main() void {
Let's crack open the implementation of this and see how it works:
</p>
{#code_begin|syntax#}
/// Calls print and then flushes the buffer.
pub fn printf(self: *Writer, comptime format: []const u8, args: anytype) anyerror!void {
const State = enum {
start,
open_brace,
close_brace,
};
{#code_begin|syntax|poc_print_fn#}
const Writer = struct {
/// Calls print and then flushes the buffer.
pub fn print(self: *Writer, comptime format: []const u8, args: anytype) anyerror!void {
const State = enum {
start,
open_brace,
close_brace,
};
comptime var start_index: usize = 0;
comptime var state = State.start;
comptime var next_arg: usize = 0;
comptime var start_index: usize = 0;
comptime var state = State.start;
comptime var next_arg: usize = 0;
inline for (format) |c, i| {
switch (state) {
State.start => switch (c) {
'{' => {
if (start_index < i) try self.write(format[start_index..i]);
state = State.open_brace;
inline for (format) |c, i| {
switch (state) {
State.start => switch (c) {
'{' => {
if (start_index < i) try self.write(format[start_index..i]);
state = State.open_brace;
},
'}' => {
if (start_index < i) try self.write(format[start_index..i]);
state = State.close_brace;
},
else => {},
},
'}' => {
if (start_index < i) try self.write(format[start_index..i]);
state = State.close_brace;
State.open_brace => switch (c) {
'{' => {
state = State.start;
start_index = i;
},
'}' => {
try self.printValue(args[next_arg]);
next_arg += 1;
state = State.start;
start_index = i + 1;
},
's' => {
continue;
},
else => @compileError("Unknown format character: " ++ [1]u8{c}),
},
else => {},
},
State.open_brace => switch (c) {
'{' => {
state = State.start;
start_index = i;
State.close_brace => switch (c) {
'}' => {
state = State.start;
start_index = i;
},
else => @compileError("Single '}' encountered in format string"),
},
'}' => {
try self.printValue(args[next_arg]);
next_arg += 1;
state = State.start;
start_index = i + 1;
},
else => @compileError("Unknown format character: " ++ c),
},
State.close_brace => switch (c) {
'}' => {
state = State.start;
start_index = i;
},
else => @compileError("Single '}' encountered in format string"),
},
}
}
}
comptime {
if (args.len != next_arg) {
@compileError("Unused arguments");
comptime {
if (args.len != next_arg) {
@compileError("Unused arguments");
}
if (state != State.start) {
@compileError("Incomplete format string: " ++ format);
}
}
if (state != State.Start) {
@compileError("Incomplete format string: " ++ format);
if (start_index < format.len) {
try self.write(format[start_index..format.len]);
}
try self.flush();
}
if (start_index < format.len) {
try self.write(format[start_index..format.len]);
fn write(self: *Writer, value: []const u8) !void {
_ = self;
_ = value;
}
try self.flush();
}
pub fn printValue(self: *Writer, value: anytype) !void {
_ = self;
_ = value;
}
fn flush(self: *Writer) !void {
_ = self;
}
};
{#code_end#}
<p>
This is a proof of concept implementation; the actual function in the standard library has more
@ -6459,8 +6485,8 @@ pub fn printf(self: *Writer, comptime format: []const u8, args: anytype) anyerro
When this function is analyzed from our example code above, Zig partially evaluates the function
and emits a function that actually looks like this:
</p>
{#code_begin|syntax#}
pub fn printf(self: *Writer, arg0: i32, arg1: []const u8) !void {
{#syntax_block|zig|Emitted print Function#}
pub fn print(self: *Writer, arg0: []const u8, arg1: i32) !void {
try self.write("here is a string: '");
try self.printValue(arg0);
try self.write("' here is a number: ");
@ -6468,28 +6494,46 @@ pub fn printf(self: *Writer, arg0: i32, arg1: []const u8) !void {
try self.write("\n");
try self.flush();
}
{#code_end#}
{#end_syntax_block#}
<p>
{#syntax#}printValue{#endsyntax#} is a function that takes a parameter of any type, and does different things depending
on the type:
</p>
{#code_begin|syntax#}
pub fn printValue(self: *Writer, value: anytype) !void {
switch (@typeInfo(@TypeOf(value))) {
.Int => {
return self.printInt(T, value);
},
.Float => {
return self.printFloat(T, value);
},
else => {
@compileError("Unable to print type '" ++ @typeName(T) ++ "'");
},
{#code_begin|syntax|poc_printValue_fn#}
const Writer = struct {
pub fn printValue(self: *Writer, value: anytype) !void {
switch (@typeInfo(@TypeOf(value))) {
.Int => {
return self.writeInt(value);
},
.Float => {
return self.writeFloat(value);
},
.Pointer => {
return self.write(value);
},
else => {
@compileError("Unable to print type '" ++ @typeName(@TypeOf(value)) ++ "'");
},
}
}
}
fn write(self: *Writer, value: []const u8) !void {
_ = self;
_ = value;
}
fn writeInt(self: *Writer, value: anytype) !void {
_ = self;
_ = value;
}
fn writeFloat(self: *Writer, value: anytype) !void {
_ = self;
_ = value;
}
};
{#code_end#}
<p>
And now, what happens if we give too many arguments to {#syntax#}printf{#endsyntax#}?
And now, what happens if we give too many arguments to {#syntax#}print{#endsyntax#}?
</p>
{#code_begin|test_err|Unused argument in 'here is a string: '{s}' here is a number: {}#}
const print = @import("std").debug.print;
@ -6497,7 +6541,7 @@ const print = @import("std").debug.print;
const a_number: i32 = 1234;
const a_string = "foobar";
test "printf too many arguments" {
test "print too many arguments" {
print("here is a string: '{s}' here is a number: {}\n", .{
a_string,
a_number,
@ -6512,7 +6556,7 @@ test "printf too many arguments" {
Zig doesn't care whether the format argument is a string literal,
only that it is a compile-time known value that can be coerced to a {#syntax#}[]const u8{#endsyntax#}:
</p>
{#code_begin|exe|printf#}
{#code_begin|exe|print#}
const print = @import("std").debug.print;
const a_number: i32 = 1234;
@ -7401,9 +7445,11 @@ fn add(a: i32, b: i32) i32 {
{#syntax#}@call{#endsyntax#} allows more flexibility than normal function call syntax does. The
{#syntax#}CallOptions{#endsyntax#} struct is reproduced here:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|builtin.CallOptions struct#}
pub const CallOptions = struct {
modifier: Modifier = .auto,
/// Only valid when `Modifier` is `Modifier.async_kw`.
stack: ?[]align(std.Target.stack_align) u8 = null,
pub const Modifier = enum {
@ -7440,7 +7486,7 @@ pub const CallOptions = struct {
compile_time,
};
};
{#code_end#}
{#end_syntax_block#}
{#header_close#}
{#header_open|@cDefine#}
@ -7554,7 +7600,7 @@ fn cmpxchgStrongButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_v
This function performs a weak atomic compare exchange operation. It's the equivalent of this code,
except atomic:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|cmpxchgWeakButNotAtomic#}
fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_value: T) ?T {
const old_value = ptr.*;
if (old_value == expected_value and usuallyTrueButSometimesFalse()) {
@ -7564,7 +7610,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
return old_value;
}
}
{#code_end#}
{#end_syntax_block#}
<p>
If you are using cmpxchg in a loop, the sporadic failure will be no problem, and {#syntax#}cmpxchgWeak{#endsyntax#}
is the better choice, because it can be implemented more efficiently in machine instructions.
@ -10159,7 +10205,7 @@ pub fn main() void {
This expression is evaluated at compile-time and is used to control
preprocessor directives and include multiple <code class="file">.h</code> files:
</p>
{#code_begin|syntax#}
{#syntax_block|zig|@cImport Expression#}
const builtin = @import("builtin");
const c = @cImport({
@ -10173,7 +10219,7 @@ const c = @cImport({
}
@cInclude("soundio.h");
});
{#code_end#}
{#end_syntax_block#}
{#see_also|@cImport|@cInclude|@cDefine|@cUndef|@import#}
{#header_close#}