From 5e39328542094043bc7b34787ced45dbffe3abee Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 14 Sep 2018 10:35:03 -0400 Subject: [PATCH] docs: more syntax highlighting --- doc/docgen.zig | 34 +- doc/langref.html.in | 1313 ++++++++++++++++++++++--------------------- 2 files changed, 687 insertions(+), 660 deletions(-) diff --git a/doc/docgen.zig b/doc/docgen.zig index 0143de5b8b..2ee53bbb99 100644 --- a/doc/docgen.zig +++ b/doc/docgen.zig @@ -300,6 +300,7 @@ const Node = union(enum) { SeeAlso: []const SeeAlsoItem, Code: Code, Link: Link, + Syntax: Token, }; const Toc = struct { @@ -530,6 +531,17 @@ fn genToc(allocator: *mem.Allocator, tokenizer: *Tokenizer) !Toc { }, }); tokenizer.code_node_count += 1; + } else if (mem.eql(u8, tag_name, "syntax")) { + _ = try eatToken(tokenizer, Token.Id.BracketClose); + const content_tok = try eatToken(tokenizer, Token.Id.Content); + _ = try eatToken(tokenizer, Token.Id.BracketOpen); + const end_syntax_tag = try eatToken(tokenizer, Token.Id.TagContent); + const end_tag_name = tokenizer.buffer[end_syntax_tag.start..end_syntax_tag.end]; + if (!mem.eql(u8, end_tag_name, "endsyntax")) { + return parseError(tokenizer, end_syntax_tag, "invalid token inside syntax: {}", end_tag_name); + } + _ = try eatToken(tokenizer, Token.Id.BracketClose); + try nodes.append(Node{ .Syntax = content_tok }); } else { return parseError(tokenizer, tag_token, "unrecognized tag name: {}", tag_name); } @@ -706,8 +718,10 @@ fn isType(name: []const u8) bool { return false; } -fn tokenizeAndPrint(allocator: *mem.Allocator, out: var, src: []const u8) !void { - try out.write("
");
+fn tokenizeAndPrint(allocator: *mem.Allocator, docgen_tokenizer: *Tokenizer, out: var, source_token: Token) !void {
+    const raw_src = docgen_tokenizer.buffer[source_token.start..source_token.end];
+    const src = mem.trim(u8, raw_src, " \n");
+    try out.write("");
     var tokenizer = std.zig.Tokenizer.init(src);
     var index: usize = 0;
     var next_tok_is_fn = false;
@@ -900,12 +914,17 @@ fn tokenizeAndPrint(allocator: *mem.Allocator, out: var, src: []const u8) !void
             std.zig.Token.Id.AngleBracketAngleBracketRightEqual,
             std.zig.Token.Id.Tilde,
             std.zig.Token.Id.BracketStarBracket,
-            std.zig.Token.Id.Invalid,
             => try writeEscaped(out, src[token.start..token.end]),
+
+            std.zig.Token.Id.Invalid => return parseError(
+                docgen_tokenizer,
+                source_token,
+                "syntax error",
+            ),
         }
         index = token.end;
     }
-    try out.write("
"); + try out.write(""); } fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var, zig_exe: []const u8) !void { @@ -947,6 +966,9 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var } try out.write("\n"); }, + Node.Syntax => |content_tok| { + try tokenizeAndPrint(allocator, tokenizer, out, content_tok); + }, Node.Code => |code| { code_progress_index += 1; warn("docgen example code {}/{}...", code_progress_index, tokenizer.code_node_count); @@ -956,7 +978,9 @@ fn genHtml(allocator: *mem.Allocator, tokenizer: *Tokenizer, toc: *Toc, out: var if (!code.is_inline) { try out.print("

{}.zig

", code.name); } - try tokenizeAndPrint(allocator, out, trimmed_raw_source); + try out.write("
");
+                try tokenizeAndPrint(allocator, tokenizer, out, code.source_token);
+                try out.write("
"); const name_plus_ext = try std.fmt.allocPrint(allocator, "{}.zig", code.name); const tmp_source_file_name = try os.path.join(allocator, tmp_dir_name, name_plus_ext); try io.writeFile(tmp_source_file_name, trimmed_raw_source); diff --git a/doc/langref.html.in b/doc/langref.html.in index 3f2e741e36..4e39787ede 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -161,8 +161,8 @@ pub fn main() void { } {#code_end#}

- Note that we also left off the ! from the return type. - In Zig, if your main function cannot fail, you must use the void return type. + Note that we also left off the {#syntax#}!{#endsyntax#} from the return type. + In Zig, if your main function cannot fail, you must use the {#syntax#}void{#endsyntax#} return type.

{#see_also|Values|@import|Errors|Root Source File#} {#header_close#} @@ -181,14 +181,14 @@ test "comments" { } {#code_end#}

- There are no multiline comments in Zig (e.g. like /* */ + There are no multiline comments in Zig (e.g. like /* */ comments in C). This helps allow Zig to have the property that each line of code can be tokenized out of context.

{#header_open|Doc comments#}

A doc comment is one that begins with exactly three slashes (i.e. - /// but not ////); + {#syntax#}///{#endsyntax#} but not {#syntax#}////{#endsyntax#}); multiple doc comments in a row are merged together to form a multiline doc comment. The doc comment documents whatever immediately follows it.

@@ -280,169 +280,169 @@ pub fn main() void { - i8 - int8_t + {#syntax#}i8{#endsyntax#} + int8_t signed 8-bit integer - u8 - uint8_t + {#syntax#}u8{#endsyntax#} + uint8_t unsigned 8-bit integer - i16 - int16_t + {#syntax#}i16{#endsyntax#} + int16_t signed 16-bit integer - u16 - uint16_t + {#syntax#}u16{#endsyntax#} + uint16_t unsigned 16-bit integer - i32 - int32_t + {#syntax#}i32{#endsyntax#} + int32_t signed 32-bit integer - u32 - uint32_t + {#syntax#}u32{#endsyntax#} + uint32_t unsigned 32-bit integer - i64 - int64_t + {#syntax#}i64{#endsyntax#} + int64_t signed 64-bit integer - u64 - uint64_t + {#syntax#}u64{#endsyntax#} + uint64_t unsigned 64-bit integer - i128 - __int128 + {#syntax#}i128{#endsyntax#} + __int128 signed 128-bit integer - u128 - unsigned __int128 + {#syntax#}u128{#endsyntax#} + unsigned __int128 unsigned 128-bit integer - isize - intptr_t + {#syntax#}isize{#endsyntax#} + intptr_t signed pointer sized integer - usize - uintptr_t + {#syntax#}usize{#endsyntax#} + uintptr_t unsigned pointer sized integer - c_short - short + {#syntax#}c_short{#endsyntax#} + short for ABI compatibility with C - c_ushort - unsigned short + {#syntax#}c_ushort{#endsyntax#} + unsigned short for ABI compatibility with C - c_int - int + {#syntax#}c_int{#endsyntax#} + int for ABI compatibility with C - c_uint - unsigned int + {#syntax#}c_uint{#endsyntax#} + unsigned int for ABI compatibility with C - c_long - long + {#syntax#}c_long{#endsyntax#} + long for ABI compatibility with C - c_ulong - unsigned long + {#syntax#}c_ulong{#endsyntax#} + unsigned long for ABI compatibility with C - c_longlong - long long + {#syntax#}c_longlong{#endsyntax#} + long long for ABI compatibility with C - c_ulonglong - unsigned long long + {#syntax#}c_ulonglong{#endsyntax#} + unsigned long long for ABI compatibility with C - c_longdouble - long double + {#syntax#}c_longdouble{#endsyntax#} + long double for ABI compatibility with C - c_void - void + {#syntax#}c_void{#endsyntax#} + void for ABI compatibility with C - f16 - float + {#syntax#}f16{#endsyntax#} + float 16-bit floating point (10-bit mantissa) IEEE-754-2008 binary16 - f32 - float + {#syntax#}f32{#endsyntax#} + float 32-bit floating point (23-bit mantissa) IEEE-754-2008 binary32 - f64 - double + {#syntax#}f64{#endsyntax#} + double 64-bit floating point (52-bit mantissa) IEEE-754-2008 binary64 - f128 + {#syntax#}f128{#endsyntax#} (none) 128-bit floating point (112-bit mantissa) IEEE-754-2008 binary128 - bool - bool - true or false + {#syntax#}bool{#endsyntax#} + bool + {#syntax#}true{#endsyntax#} or {#syntax#}false{#endsyntax#} - void + {#syntax#}void{#endsyntax#} (none) 0 bit type - noreturn + {#syntax#}noreturn{#endsyntax#} (none) - the type of break, continue, return, unreachable, and while (true) {} + the type of {#syntax#}break{#endsyntax#}, {#syntax#}continue{#endsyntax#}, {#syntax#}return{#endsyntax#}, {#syntax#}unreachable{#endsyntax#}, and {#syntax#}while (true) {}{#endsyntax#} - type + {#syntax#}type{#endsyntax#} (none) the type of types - error + {#syntax#}error{#endsyntax#} (none) an error code - comptime_int + {#syntax#}comptime_int{#endsyntax#} (none) Only allowed for {#link|comptime#}-known values. The type of integer literals. - comptime_float + {#syntax#}comptime_float{#endsyntax#} (none) Only allowed for {#link|comptime#}-known values. The type of float literals. @@ -451,7 +451,7 @@ pub fn main() void {

In addition to the integer types above, arbitrary bit-width integers can be referenced by using an identifier of i or u followed by digits. For example, the identifier - i7 refers to a signed 7-bit integer. + {#syntax#}i7{#endsyntax#} refers to a signed 7-bit integer.

{#see_also|Integers|Floats|void|Errors#} {#header_close#} @@ -467,15 +467,15 @@ pub fn main() void { - true and false - bool values + {#syntax#}true{#endsyntax#} and {#syntax#}false{#endsyntax#} + {#syntax#}bool{#endsyntax#} values - null - used to set an optional type to null + {#syntax#}null{#endsyntax#} + used to set an optional type to {#syntax#}null{#endsyntax#} - undefined + {#syntax#}undefined{#endsyntax#} used to leave a value unspecified @@ -515,52 +515,52 @@ test "string literals" { - \n + \n Newline - \r + \r Carriage Return - \t + \t Tab - \\ + \\ Backslash - \' + \' Single Quote - \" + \" Double Quote - \xNN + \xNN hexadecimal 8-bit character code (2 digits) - \uNNNN + \uNNNN hexadecimal 16-bit Unicode character code UTF-8 encoded (4 digits) - \UNNNNNN + \UNNNNNN hexadecimal 24-bit Unicode character code UTF-8 encoded (6 digits) -

Note that the maximum valid Unicode point is 0x10ffff.

+

Note that the maximum valid Unicode point is {#syntax#}0x10ffff{#endsyntax#}.

{#header_close#} {#header_open|Multiline String Literals#}

Multiline string literals have no escapes and can span across multiple lines. - To start a multiline string literal, use the \\ token. Just like a comment, + To start a multiline string literal, use the {#syntax#}\\{#endsyntax#} token. Just like a comment, the string literal goes until the end of the line. The end of the line is not included in the string literal. - However, if the next line begins with \\ then a newline is appended and + However, if the next line begins with {#syntax#}\\{#endsyntax#} then a newline is appended and the string literal continues.

{#code_begin|syntax#} @@ -574,7 +574,7 @@ const hello_world_in_c = ; {#code_end#}

- For a multiline C string literal, prepend c to each \\: + For a multiline C string literal, prepend c to each {#syntax#}\\{#endsyntax#}:

{#code_begin|syntax#} const c_string_literal = @@ -587,14 +587,14 @@ const c_string_literal = ; {#code_end#}

- In this example the variable c_string_literal has type [*]const char and + In this example the variable {#syntax#}c_string_literal{#endsyntax#} has type {#syntax#}[*]const char{#endsyntax#} and has a terminating null byte.

{#see_also|@embedFile#} {#header_close#} {#header_close#} {#header_open|Assignment#} -

Use the const keyword to assign a value to an identifier:

+

Use the {#syntax#}const{#endsyntax#} keyword to assign a value to an identifier:

{#code_begin|test_err|cannot assign to constant#} const x = 1234; @@ -610,8 +610,8 @@ test "assignment" { foo(); } {#code_end#} -

const applies to all of the bytes that the identifier immediately addresses. {#link|Pointers#} have their own const-ness.

-

If you need a variable that you can modify, use the var keyword:

+

{#syntax#}const{#endsyntax#} applies to all of the bytes that the identifier immediately addresses. {#link|Pointers#} have their own const-ness.

+

If you need a variable that you can modify, use the {#syntax#}var{#endsyntax#} keyword:

{#code_begin|test#} const assert = @import("std").debug.assert; @@ -632,7 +632,7 @@ test "initialization" { } {#code_end#} {#header_open|undefined#} -

Use undefined to leave variables uninitialized:

+

Use {#syntax#}undefined{#endsyntax#} to leave variables uninitialized:

{#code_begin|test#} const assert = @import("std").debug.assert; @@ -643,14 +643,14 @@ test "init with undefined" { } {#code_end#}

- undefined can be {#link|implicitly cast|Implicit Casts#} to any type. - Once this happens, it is no longer possible to detect that the value is undefined. - undefined means the value could be anything, even something that is nonsense - according to the type. Translated into English, undefined means "Not a meaningful + {#syntax#}undefined{#endsyntax#} can be {#link|implicitly cast|Implicit Casts#} to any type. + Once this happens, it is no longer possible to detect that the value is {#syntax#}undefined{#endsyntax#}. + {#syntax#}undefined{#endsyntax#} means the value could be anything, even something that is nonsense + according to the type. Translated into English, {#syntax#}undefined{#endsyntax#} means "Not a meaningful value. Using this value would be a bug. The value will be unused, or overwritten before being used."

- In {#link|Debug#} mode, Zig writes 0xaa bytes to undefined memory. This is to catch + In {#link|Debug#} mode, Zig writes {#syntax#}0xaa{#endsyntax#} bytes to undefined memory. This is to catch bugs early, and to help detect use of undefined memory in a debugger.

{#header_close#} @@ -681,14 +681,14 @@ fn divide(a: i32, b: i32) i32 { } {#code_end#}

- In this function, values a and b are known only at runtime, + In this function, values {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} are known only at runtime, and thus this division operation is vulnerable to both integer overflow and division by zero.

- Operators such as + and - cause undefined behavior on - integer overflow. Also available are operations such as +% and - -% which are defined to have wrapping arithmetic on all targets. + Operators such as {#syntax#}+{#endsyntax#} and {#syntax#}-{#endsyntax#} cause undefined behavior on + integer overflow. Also available are operations such as {#syntax#}+%{#endsyntax#} and + {#syntax#}-%{#endsyntax#} which are defined to have wrapping arithmetic on all targets.

{#see_also|Integer Overflow|Division by Zero|Wrapping Operations#} {#header_close#} @@ -696,15 +696,15 @@ fn divide(a: i32, b: i32) i32 { {#header_open|Floats#}

Zig has the following floating point types:

{#header_open|Float Literals#}

- Float literals have type comptime_float which is guaranteed to hold at least all possible values + Float literals have type {#syntax#}comptime_float{#endsyntax#} which is guaranteed to hold at least all possible values that the largest other floating point type can hold. Float literals {#link|implicitly cast|Implicit Casts#} to any other type.

{#code_begin|syntax#} @@ -718,8 +718,8 @@ const yet_another_hex_float = 0x103.70P-5; {#code_end#} {#header_close#} {#header_open|Floating Point Operations#} -

By default floating point operations use Strict mode, - but you can switch to Optimized mode on a per-block basis:

+

By default floating point operations use {#syntax#}Strict{#endsyntax#} mode, + but you can switch to {#syntax#}Optimized{#endsyntax#} mode on a per-block basis:

{#code_begin|obj|foo#} {#code_release_fast#} const builtin = @import("builtin"); @@ -772,8 +772,8 @@ pub fn main() void { -
a + b
-a += b
+
{#syntax#}a + b
+a += b{#endsyntax#}
-
2 + 5 == 7
+
{#syntax#}2 + 5 == 7{#endsyntax#}
-
a +% b
-a +%= b
+
{#syntax#}a +% b
+a +%= b{#endsyntax#}
-
u32(@maxValue(u32)) +% 1 == 0
+
{#syntax#}u32(@maxValue(u32)) +% 1 == 0{#endsyntax#}
-
a - b
-a -= b
+
{#syntax#}a - b
+a -= b{#endsyntax#}
-
2 - 5 == -3
+
{#syntax#}2 - 5 == -3{#endsyntax#}
-
a -% b
-a -%= b
+
{#syntax#}a -% b
+a -%= b{#endsyntax#}
-
u32(0) -% 1 == @maxValue(u32)
+
{#syntax#}u32(0) -% 1 == @maxValue(u32){#endsyntax#}
-
-a
+
{#syntax#}-a{#endsyntax#}
-
-1 == 0 - 1
+
{#syntax#}-1 == 0 - 1{#endsyntax#}
-
-%a
+
{#syntax#}-%a{#endsyntax#}
  • {#link|Integers#}
  • @@ -881,12 +881,12 @@ a -%= b
-
-%i32(@minValue(i32)) == @minValue(i32)
+
{#syntax#}-%i32(@minValue(i32)) == @minValue(i32){#endsyntax#}
-
a * b
-a *= b
+
{#syntax#}a * b
+a *= b{#endsyntax#}
-
2 * 5 == 10
+
{#syntax#}2 * 5 == 10{#endsyntax#}
-
a *% b
-a *%= b
+
{#syntax#}a *% b
+a *%= b{#endsyntax#}
-
u8(200) *% 2 == 144
+
{#syntax#}u8(200) *% 2 == 144{#endsyntax#}
-
a / b
-a /= b
+
{#syntax#}a / b
+a /= b{#endsyntax#}
-
10 / 5 == 2
+
{#syntax#}10 / 5 == 2{#endsyntax#}
-
a % b
-a %= b
+
{#syntax#}a % b
+a %= b{#endsyntax#}
-
10 % 3 == 1
+
{#syntax#}10 % 3 == 1{#endsyntax#}
-
a << b
-a <<= b
+
{#syntax#}a << b
+a <<= b{#endsyntax#}