From 5193da3422f7a63bb7ff4395da08db3782908e33 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sun, 19 Nov 2023 17:03:25 +0000 Subject: [PATCH] langref: add basic documentation of RLS --- doc/langref.html.in | 263 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 262 insertions(+), 1 deletion(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 6dae425de1..02069ee4cc 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -6806,8 +6806,269 @@ fn foo() i32 { {#header_open|Result Location Semantics#}

- TODO add documentation for this + During compilation, every Zig expression and sub-expression is assigned optional result location + information. This information dictates what type the expression should have (its result type), and + where the resulting value should be placed in memory (its result location). The information is + optional in the sense that not every expression has this information: assignment to + {#syntax#}_{#endsyntax#}, for instance, does not provide any information about the type of an + expression, nor does it provide a concrete memory location to place it in.

+

+ As a motivating example, consider the statement {#syntax#}const x: u32 = 42;{#endsyntax#}. The type + annotation here provides a result type of {#syntax#}u32{#endsyntax#} to the initialization expression + {#syntax#}42{#endsyntax#}, instructing the compiler to coerce this integer (initally of type + {#syntax#}comptime_int{#endsyntax#}) to this type. We will see more examples shortly. +

+

+ This is not an implementation detail: the logic outlined above is codified into the Zig language + specification, and is the primary mechanism of type inference in the language. This system is + collectively referred to as "Result Location Semantics". +

+ {#header_open|Result Types#} +

+ Result types are propagated recursively through expressions where possible. For instance, if the + expression {#syntax#}&e{#endsyntax#} has result type {#syntax#}*u32{#endsyntax#}, then + {#syntax#}e{#endsyntax#} is given a result type of {#syntax#}u32{#endsyntax#}, allowing the + language to perform this coercion before taking a reference. +

+

+ The result type mechanism is utilized by casting builtins such as {#syntax#}@intCast{#endsyntax#}. + Rather than taking as an argument the type to cast to, these builtins use their result type to + determine this information. The result type is often known from context; where it is not, the + {#syntax#}@as{#endsyntax#} builtin can be used to explicitly provide a result type. +

+

+ We can break down the result types for each component of a simple expression as follows: +

+ {#code_begin|test|result_type_propagation#} +const expectEqual = @import("std").testing.expectEqual; +test "result type propagates through struct initializer" { + const S = struct { x: u32 }; + const val: u64 = 123; + const s: S = .{ .x = @intCast(val) }; + // .{ .x = @intCast(val) } has result type `S` due to the type annotation + // @intCast(val) has result type `u32` due to the type of the field `S.x` + // val has no result type, as it is permitted to be any integer type + try expectEqual(@as(u32, 123), s.x); +} + {#code_end#} +

+ This result type information is useful for the aforementioned cast builtins, as well as to avoid + the construction of pre-coercion values, and to avoid the need for explicit type coercions in some + cases. The following table details how some common expressions propagate result types, where + {#syntax#}x{#endsyntax#} and {#syntax#}y{#endsyntax#} are arbitrary sub-expressions. +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExpressionParent Result TypeSub-expression Result Type
{#syntax#}const val: T = x{#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}T{#endsyntax#}
{#syntax#}var val: T = x{#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}T{#endsyntax#}
{#syntax#}val = x{#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}@TypeOf(val){#endsyntax#}
{#syntax#}@as(T, x){#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}T{#endsyntax#}
{#syntax#}&x{#endsyntax#}{#syntax#}*T{#endsyntax#}{#syntax#}x{#endsyntax#} is a {#syntax#}T{#endsyntax#}
{#syntax#}&x{#endsyntax#}{#syntax#}[]T{#endsyntax#}{#syntax#}x{#endsyntax#} is some array of {#syntax#}T{#endsyntax#}
{#syntax#}f(x){#endsyntax#}-{#syntax#}x{#endsyntax#} has the type of the first parameter of {#syntax#}f{#endsyntax#}
{#syntax#}.{x}{#endsyntax#}{#syntax#}T{#endsyntax#}{#syntax#}x{#endsyntax#} is a {#syntax#}std.meta.FieldType(T, .@"0"){#endsyntax#}
{#syntax#}.{ .a = x }{#endsyntax#}{#syntax#}T{#endsyntax#}{#syntax#}x{#endsyntax#} is a {#syntax#}std.meta.FieldType(T, .a){#endsyntax#}
{#syntax#}T{x}{#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}std.meta.FieldType(T, .@"0"){#endsyntax#}
{#syntax#}T{ .a = x }{#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}std.meta.FieldType(T, .a){#endsyntax#}
{#syntax#}@Type(x){#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}std.builtin.Type{#endsyntax#}
{#syntax#}@typeInfo(x){#endsyntax#}-{#syntax#}x{#endsyntax#} is a {#syntax#}type{#endsyntax#}
{#syntax#}x << y{#endsyntax#}-{#syntax#}y{#endsyntax#} is a {#syntax#}std.math.Log2IntCeil(@TypeOf(x)){#endsyntax#}
+
+ {#header_close#} + {#header_open|Result Locations#} +

+ In addition to result type information, every expression may be optionally assigned a result + location: a pointer to which the value must be directly written. This system can be used to prevent + intermediate copies when initializing data structures, which can be important for types which must + have a fixed memory address ("pinned" types). +

+

+ When compiling the simple assignment expression {#syntax#}x = e{#endsyntax#}, many languages would + create the temporary value {#syntax#}e{#endsyntax#} on the stack, and then assign it to + {#syntax#}x{#endsyntax#}, potentially performing a type coercion in the process. Zig approaches this + differently. The expression {#syntax#}e{#endsyntax#} is given a result type matching the type of + {#syntax#}x{#endsyntax#}, and a result location of {#syntax#}&x{#endsyntax#}. For many syntactic + forms of {#syntax#}e{#endsyntax#}, this has no practical impact. However, it can have important + semantic effects when working with more complex syntax forms. +

+

+ For instance, if the expression {#syntax#}.{ .a = x, .b = y }{#endsyntax#} has a result location of + {#syntax#}ptr{#endsyntax#}, then {#syntax#}x{#endsyntax#} is given a result location of + {#syntax#}&ptr.a{#endsyntax#}, and {#syntax#}y{#endsyntax#} a result location of {#syntax#}&ptr.b{#endsyntax#}. + Without this system, this expression would construct a temporary struct value entirely on the stack, and + only then copy it to the destination address. In essence, Zig desugars the assignment + {#syntax#}foo = .{ .a = x, .b = y }{#endsyntax#} to the two statements {#syntax#}foo.a = x; foo.b = y;{#endsyntax#}. +

+

+ This can sometimes be important when assigning an aggregate value where the initialization + expression depends on the previous value of the aggregate. The easiest way to demonstrate this is by + attempting to swap fields of a struct or array - the following logic looks sound, but in fact is not: +

+ {#code_begin|test_err|result_location_interfering_with_swap#} +const expect = @import("std").testing.expect; +test "attempt to swap array elements with array initializer" { + var arr: [2]u32 = .{ 1, 2 }; + arr = .{ arr[1], arr[0] }; + // The previous line is equivalent to the following two lines: + // arr[0] = arr[1]; + // arr[1] = arr[0]; + // So this fails! + try expect(arr[0] == 2); // succeeds + try expect(arr[1] == 1); // fails +} + {#code_end#} +

+ The following table details how some common expressions propagate result locations, where + {#syntax#}x{#endsyntax#} and {#syntax#}y{#endsyntax#} are arbitrary sub-expressions. Note that + some expressions cannot provide meaningful result locations to sub-expressions, even if they + themselves have a result location. +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExpressionResult LocationSub-expression Result Locations
{#syntax#}const val: T = x{#endsyntax#}-{#syntax#}x{#endsyntax#} has result location {#syntax#}&val{#endsyntax#}
{#syntax#}var val: T = x{#endsyntax#}-{#syntax#}x{#endsyntax#} has result location {#syntax#}&val{#endsyntax#}
{#syntax#}val = x{#endsyntax#}-{#syntax#}x{#endsyntax#} has result location {#syntax#}&val{#endsyntax#}
{#syntax#}@as(T, x){#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has no result location
{#syntax#}&x{#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has no result location
{#syntax#}f(x){#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has no result location
{#syntax#}.{x}{#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has result location {#syntax#}&ptr[0]{#endsyntax#}
{#syntax#}.{ .a = x }{#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has result location {#syntax#}&ptr.a{#endsyntax#}
{#syntax#}T{x}{#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has no result location (typed initializers do not propagate result locations)
{#syntax#}T{ .a = x }{#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has no result location (typed initializers do not propagate result locations)
{#syntax#}@Type(x){#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has no result location
{#syntax#}@typeInfo(x){#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} has no result location
{#syntax#}x << y{#endsyntax#}{#syntax#}ptr{#endsyntax#}{#syntax#}x{#endsyntax#} and {#syntax#}y{#endsyntax#} do not have result locations
+
+ {#header_close#} {#header_close#} {#header_open|usingnamespace#}