From f0a4bb6bd15b8a605e450af3359fe1622302463a Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 26 Feb 2024 15:11:38 +0000 Subject: [PATCH 1/9] AstGen: avoid unnecessary coercion instructions Coercions such as `@as(usize, 0)` can be trivially elided by matching these cases and translating to fixed InternPool indices. --- lib/std/zig/AstGen.zig | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index 20b1077420..c7399b63bd 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -10874,6 +10874,7 @@ fn rvalueInner( const as_comptime_int = @as(u64, @intFromEnum(Zir.Inst.Ref.comptime_int_type)) << 32; const as_bool = @as(u64, @intFromEnum(Zir.Inst.Ref.bool_type)) << 32; const as_usize = @as(u64, @intFromEnum(Zir.Inst.Ref.usize_type)) << 32; + const as_u8 = @as(u64, @intFromEnum(Zir.Inst.Ref.u8_type)) << 32; const as_void = @as(u64, @intFromEnum(Zir.Inst.Ref.void_type)) << 32; switch ((@as(u64, @intFromEnum(ty_inst)) << 32) | @as(u64, @intFromEnum(result))) { as_ty | @intFromEnum(Zir.Inst.Ref.u1_type), @@ -10939,13 +10940,30 @@ fn rvalueInner( as_ty | @intFromEnum(Zir.Inst.Ref.empty_struct_type), as_comptime_int | @intFromEnum(Zir.Inst.Ref.zero), as_comptime_int | @intFromEnum(Zir.Inst.Ref.one), - as_bool | @intFromEnum(Zir.Inst.Ref.bool_true), - as_bool | @intFromEnum(Zir.Inst.Ref.bool_false), + as_comptime_int | @intFromEnum(Zir.Inst.Ref.negative_one), as_usize | @intFromEnum(Zir.Inst.Ref.zero_usize), as_usize | @intFromEnum(Zir.Inst.Ref.one_usize), + as_u8 | @intFromEnum(Zir.Inst.Ref.zero_u8), + as_u8 | @intFromEnum(Zir.Inst.Ref.one_u8), + as_u8 | @intFromEnum(Zir.Inst.Ref.four_u8), + as_bool | @intFromEnum(Zir.Inst.Ref.bool_true), + as_bool | @intFromEnum(Zir.Inst.Ref.bool_false), as_void | @intFromEnum(Zir.Inst.Ref.void_value), => return result, // type of result is already correct + as_usize | @intFromEnum(Zir.Inst.Ref.zero) => return .zero_usize, + as_u8 | @intFromEnum(Zir.Inst.Ref.zero) => return .zero_u8, + as_usize | @intFromEnum(Zir.Inst.Ref.one) => return .one_usize, + as_u8 | @intFromEnum(Zir.Inst.Ref.one) => return .one_u8, + as_comptime_int | @intFromEnum(Zir.Inst.Ref.zero_usize) => return .zero, + as_u8 | @intFromEnum(Zir.Inst.Ref.zero_usize) => return .zero_u8, + as_comptime_int | @intFromEnum(Zir.Inst.Ref.one_usize) => return .one, + as_u8 | @intFromEnum(Zir.Inst.Ref.one_usize) => return .one_u8, + as_comptime_int | @intFromEnum(Zir.Inst.Ref.zero_u8) => return .zero, + as_usize | @intFromEnum(Zir.Inst.Ref.zero_u8) => return .zero_usize, + as_comptime_int | @intFromEnum(Zir.Inst.Ref.one_u8) => return .one, + as_usize | @intFromEnum(Zir.Inst.Ref.one_u8) => return .one_usize, + // Need an explicit type coercion instruction. else => return gz.addPlNode(ri.zirTag(), src_node, Zir.Inst.As{ .dest_type = ty_inst, From f6abf022b790847e6145569241e4e5685abf359c Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 27 Feb 2024 02:00:55 +0000 Subject: [PATCH 2/9] AstGen: elide block instruction when already in empty body In the code `if (cond) { ... }`, the "then body" of the `if` is technically a block. However, we don't need to emit a real ZIR `block` corresponding to it, because we are already within a condbr body; we have a separate gz, and appropriate scoping for allocs and debug variables. In this case, and many like it, we can trivially elide the block here, instead emitting the block statements directly into the current `GenZir`. This results in a significant decrease in ZIR bytes for real code. --- lib/std/zig/AstGen.zig | 110 ++++++++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 29 deletions(-) diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index c7399b63bd..27cc5e0ad1 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -1232,7 +1232,7 @@ fn suspendExpr( suspend_scope.suspend_node = node; defer suspend_scope.unstack(); - const body_result = try expr(&suspend_scope, &suspend_scope.base, .{ .rl = .none }, body_node); + const body_result = try fullBodyExpr(&suspend_scope, &suspend_scope.base, .{ .rl = .none }, body_node); if (!gz.refIsNoReturn(body_result)) { _ = try suspend_scope.addBreak(.break_inline, suspend_inst, .void_value); } @@ -1353,7 +1353,7 @@ fn fnProtoExpr( assert(param_type_node != 0); var param_gz = block_scope.makeSubBlock(scope); defer param_gz.unstack(); - const param_type = try expr(¶m_gz, scope, coerced_type_ri, param_type_node); + const param_type = try fullBodyExpr(¶m_gz, scope, coerced_type_ri, param_type_node); const param_inst_expected: Zir.Inst.Index = @enumFromInt(astgen.instructions.len + 1); _ = try param_gz.addBreakWithSrcNode(.break_inline, param_inst_expected, param_type, param_type_node); const main_tokens = tree.nodes.items(.main_token); @@ -2060,7 +2060,7 @@ fn comptimeExpr( else .none, }; - const block_result = try expr(&block_scope, scope, ty_only_ri, node); + const block_result = try fullBodyExpr(&block_scope, scope, ty_only_ri, node); if (!gz.refIsNoReturn(block_result)) { _ = try block_scope.addBreak(.@"break", block_inst, block_result); } @@ -2291,6 +2291,53 @@ fn continueExpr(parent_gz: *GenZir, parent_scope: *Scope, node: Ast.Node.Index) } } +/// Similar to `expr`, but intended for use when `gz` corresponds to a body +/// which will contain only this node's code. Differs from `expr` in that if the +/// root expression is an unlabeled block, does not emit an actual block. +/// Instead, the block contents are emitted directly into `gz`. +fn fullBodyExpr( + gz: *GenZir, + scope: *Scope, + ri: ResultInfo, + node: Ast.Node.Index, +) InnerError!Zir.Inst.Ref { + const tree = gz.astgen.tree; + const node_tags = tree.nodes.items(.tag); + const node_datas = tree.nodes.items(.data); + const main_tokens = tree.nodes.items(.main_token); + const token_tags = tree.tokens.items(.tag); + var stmt_buf: [2]Ast.Node.Index = undefined; + const statements: []const Ast.Node.Index = switch (node_tags[node]) { + else => return expr(gz, scope, ri, node), + .block_two, .block_two_semicolon => if (node_datas[node].lhs == 0) s: { + break :s &.{}; + } else if (node_datas[node].rhs == 0) s: { + stmt_buf[0] = node_datas[node].lhs; + break :s stmt_buf[0..1]; + } else s: { + stmt_buf[0] = node_datas[node].lhs; + stmt_buf[1] = node_datas[node].rhs; + break :s stmt_buf[0..2]; + }, + .block, .block_semicolon => tree.extra_data[node_datas[node].lhs..node_datas[node].rhs], + }; + + const lbrace = main_tokens[node]; + if (token_tags[lbrace - 1] == .colon and + token_tags[lbrace - 2] == .identifier) + { + // Labeled blocks are tricky - forwarding result location information properly is non-trivial, + // plus if this block is exited with a `break_inline` we aren't allowed multiple breaks. This + // case is rare, so just treat it as a normal expression and create a nested block. + return expr(gz, scope, ri, node); + } + + var sub_gz = gz.makeSubBlock(scope); + try blockExprStmts(&sub_gz, &sub_gz.base, statements); + + return rvalue(gz, ri, .void_value, node); +} + fn blockExpr( gz: *GenZir, scope: *Scope, @@ -4102,7 +4149,7 @@ fn fnDecl( assert(param_type_node != 0); var param_gz = decl_gz.makeSubBlock(scope); defer param_gz.unstack(); - const param_type = try expr(¶m_gz, params_scope, coerced_type_ri, param_type_node); + const param_type = try fullBodyExpr(¶m_gz, params_scope, coerced_type_ri, param_type_node); const param_inst_expected: Zir.Inst.Index = @enumFromInt(astgen.instructions.len + 1); _ = try param_gz.addBreakWithSrcNode(.break_inline, param_inst_expected, param_type, param_type_node); @@ -4220,7 +4267,7 @@ fn fnDecl( var ret_gz = decl_gz.makeSubBlock(params_scope); defer ret_gz.unstack(); const ret_ref: Zir.Inst.Ref = inst: { - const inst = try expr(&ret_gz, params_scope, coerced_type_ri, fn_proto.ast.return_type); + const inst = try fullBodyExpr(&ret_gz, params_scope, coerced_type_ri, fn_proto.ast.return_type); if (ret_gz.instructionsSlice().len == 0) { // In this case we will send a len=0 body which can be encoded more efficiently. break :inst inst; @@ -4285,7 +4332,7 @@ fn fnDecl( const lbrace_line = astgen.source_line - decl_gz.decl_line; const lbrace_column = astgen.source_column; - _ = try expr(&fn_gz, params_scope, .{ .rl = .none }, body_node); + _ = try fullBodyExpr(&fn_gz, params_scope, .{ .rl = .none }, body_node); try checkUsed(gz, &fn_gz.base, params_scope); if (!fn_gz.endsWithNoReturn()) { @@ -4471,19 +4518,19 @@ fn globalVarDecl( var align_gz = block_scope.makeSubBlock(scope); if (var_decl.ast.align_node != 0) { - const align_inst = try expr(&align_gz, &align_gz.base, coerced_align_ri, var_decl.ast.align_node); + const align_inst = try fullBodyExpr(&align_gz, &align_gz.base, coerced_align_ri, var_decl.ast.align_node); _ = try align_gz.addBreakWithSrcNode(.break_inline, decl_inst, align_inst, node); } var linksection_gz = align_gz.makeSubBlock(scope); if (var_decl.ast.section_node != 0) { - const linksection_inst = try expr(&linksection_gz, &linksection_gz.base, coerced_linksection_ri, var_decl.ast.section_node); + const linksection_inst = try fullBodyExpr(&linksection_gz, &linksection_gz.base, coerced_linksection_ri, var_decl.ast.section_node); _ = try linksection_gz.addBreakWithSrcNode(.break_inline, decl_inst, linksection_inst, node); } var addrspace_gz = linksection_gz.makeSubBlock(scope); if (var_decl.ast.addrspace_node != 0) { - const addrspace_inst = try expr(&addrspace_gz, &addrspace_gz.base, coerced_addrspace_ri, var_decl.ast.addrspace_node); + const addrspace_inst = try fullBodyExpr(&addrspace_gz, &addrspace_gz.base, coerced_addrspace_ri, var_decl.ast.addrspace_node); _ = try addrspace_gz.addBreakWithSrcNode(.break_inline, decl_inst, addrspace_inst, node); } @@ -4532,7 +4579,7 @@ fn comptimeDecl( }; defer decl_block.unstack(); - const block_result = try expr(&decl_block, &decl_block.base, .{ .rl = .none }, body_node); + const block_result = try fullBodyExpr(&decl_block, &decl_block.base, .{ .rl = .none }, body_node); if (decl_block.isEmpty() or !decl_block.refIsNoReturn(block_result)) { _ = try decl_block.addBreak(.break_inline, decl_inst, .void_value); } @@ -4734,7 +4781,7 @@ fn testDecl( const lbrace_line = astgen.source_line - decl_block.decl_line; const lbrace_column = astgen.source_column; - const block_result = try expr(&fn_block, &fn_block.base, .{ .rl = .none }, body_node); + const block_result = try fullBodyExpr(&fn_block, &fn_block.base, .{ .rl = .none }, body_node); if (fn_block.isEmpty() or !fn_block.refIsNoReturn(block_result)) { // As our last action before the return, "pop" the error trace if needed @@ -5981,7 +6028,7 @@ fn orelseCatchExpr( break :blk &err_val_scope.base; }; - const else_result = try expr(&else_scope, else_sub_scope, block_scope.break_result_info, rhs); + const else_result = try fullBodyExpr(&else_scope, else_sub_scope, block_scope.break_result_info, rhs); if (!else_scope.endsWithNoReturn()) { // As our last action before the break, "pop" the error trace if needed if (do_err_trace) @@ -6149,7 +6196,7 @@ fn boolBinOp( var rhs_scope = gz.makeSubBlock(scope); defer rhs_scope.unstack(); - const rhs = try expr(&rhs_scope, &rhs_scope.base, coerced_bool_ri, node_datas[node].rhs); + const rhs = try fullBodyExpr(&rhs_scope, &rhs_scope.base, coerced_bool_ri, node_datas[node].rhs); if (!gz.refIsNoReturn(rhs)) { _ = try rhs_scope.addBreakWithSrcNode(.break_inline, bool_br, rhs, node_datas[node].rhs); } @@ -6293,7 +6340,7 @@ fn ifExpr( } }; - const then_result = try expr(&then_scope, then_sub_scope, block_scope.break_result_info, then_node); + const then_result = try fullBodyExpr(&then_scope, then_sub_scope, block_scope.break_result_info, then_node); try checkUsed(parent_gz, &then_scope.base, then_sub_scope); if (!then_scope.endsWithNoReturn()) { _ = try then_scope.addBreakWithSrcNode(.@"break", block, then_result, then_node); @@ -6335,7 +6382,7 @@ fn ifExpr( break :s &else_scope.base; } }; - const else_result = try expr(&else_scope, sub_scope, block_scope.break_result_info, else_node); + const else_result = try fullBodyExpr(&else_scope, sub_scope, block_scope.break_result_info, else_node); if (!else_scope.endsWithNoReturn()) { // As our last action before the break, "pop" the error trace if needed if (do_err_trace) @@ -6444,7 +6491,7 @@ fn whileExpr( } = c: { if (while_full.error_token) |_| { const cond_ri: ResultInfo = .{ .rl = if (payload_is_ref) .ref else .none }; - const err_union = try expr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr); + const err_union = try fullBodyExpr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr); const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_err_ptr else .is_non_err; break :c .{ .inst = err_union, @@ -6452,14 +6499,14 @@ fn whileExpr( }; } else if (while_full.payload_token) |_| { const cond_ri: ResultInfo = .{ .rl = if (payload_is_ref) .ref else .none }; - const optional = try expr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr); + const optional = try fullBodyExpr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr); const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_null_ptr else .is_non_null; break :c .{ .inst = optional, .bool_bit = try cond_scope.addUnNode(tag, optional, while_full.ast.cond_expr), }; } else { - const cond = try expr(&cond_scope, &cond_scope.base, coerced_bool_ri, while_full.ast.cond_expr); + const cond = try fullBodyExpr(&cond_scope, &cond_scope.base, coerced_bool_ri, while_full.ast.cond_expr); break :c .{ .inst = cond, .bool_bit = cond, @@ -6582,7 +6629,11 @@ fn whileExpr( } continue_scope.instructions_top = continue_scope.instructions.items.len; - _ = try unusedResultExpr(&continue_scope, &continue_scope.base, then_node); + { + try emitDbgNode(&continue_scope, then_node); + const unused_result = try fullBodyExpr(&continue_scope, &continue_scope.base, .{ .rl = .none }, then_node); + _ = try addEnsureResult(&continue_scope, unused_result, then_node); + } try checkUsed(parent_gz, &then_scope.base, then_sub_scope); const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; if (!continue_scope.endsWithNoReturn()) { @@ -6626,7 +6677,7 @@ fn whileExpr( // control flow apply to outer loops; not this one. loop_scope.continue_block = .none; loop_scope.break_block = .none; - const else_result = try expr(&else_scope, sub_scope, loop_scope.break_result_info, else_node); + const else_result = try fullBodyExpr(&else_scope, sub_scope, loop_scope.break_result_info, else_node); if (is_statement) { _ = try addEnsureResult(&else_scope, else_result, else_node); } @@ -6894,7 +6945,7 @@ fn forExpr( break :blk capture_sub_scope; }; - const then_result = try expr(&then_scope, then_sub_scope, .{ .rl = .none }, then_node); + const then_result = try fullBodyExpr(&then_scope, then_sub_scope, .{ .rl = .none }, then_node); _ = try addEnsureResult(&then_scope, then_result, then_node); try checkUsed(parent_gz, &then_scope.base, then_sub_scope); @@ -6913,7 +6964,7 @@ fn forExpr( // control flow apply to outer loops; not this one. loop_scope.continue_block = .none; loop_scope.break_block = .none; - const else_result = try expr(&else_scope, sub_scope, loop_scope.break_result_info, else_node); + const else_result = try fullBodyExpr(&else_scope, sub_scope, loop_scope.break_result_info, else_node); if (is_statement) { _ = try addEnsureResult(&else_scope, else_result, else_node); } @@ -7388,7 +7439,7 @@ fn switchExprErrUnion( } const target_expr_node = case.ast.target_expr; - const case_result = try expr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node); + const case_result = try fullBodyExpr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node); // check capture_scope, not err_scope to avoid false positive unused error capture try checkUsed(parent_gz, &case_scope.base, err_scope.parent); const uses_err = err_scope.used != 0 or err_scope.discarded != 0; @@ -7849,7 +7900,7 @@ fn switchExpr( try case_scope.addDbgVar(.dbg_var_val, dbg_var_tag_name, dbg_var_tag_inst); } const target_expr_node = case.ast.target_expr; - const case_result = try expr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node); + const case_result = try fullBodyExpr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node); try checkUsed(parent_gz, &case_scope.base, sub_scope); if (!parent_gz.refIsNoReturn(case_result)) { _ = try case_scope.addBreakWithSrcNode(.@"break", switch_block, case_result, target_expr_node); @@ -9752,7 +9803,7 @@ fn cImport( defer block_scope.unstack(); const block_inst = try gz.makeBlockInst(.c_import, node); - const block_result = try expr(&block_scope, &block_scope.base, .{ .rl = .none }, body_node); + const block_result = try fullBodyExpr(&block_scope, &block_scope.base, .{ .rl = .none }, body_node); _ = try gz.addUnNode(.ensure_result_used, block_result, node); if (!gz.refIsNoReturn(block_result)) { _ = try block_scope.addBreak(.break_inline, block_inst, .void_value); @@ -9835,7 +9886,7 @@ fn callExpr( defer arg_block.unstack(); // `call_inst` is reused to provide the param type. - const arg_ref = try expr(&arg_block, &arg_block.base, .{ .rl = .{ .coerced_ty = call_inst }, .ctx = .fn_arg }, param_node); + const arg_ref = try fullBodyExpr(&arg_block, &arg_block.base, .{ .rl = .{ .coerced_ty = call_inst }, .ctx = .fn_arg }, param_node); _ = try arg_block.addBreakWithSrcNode(.break_inline, call_index, arg_ref, param_node); const body = arg_block.instructionsSlice(); @@ -10871,11 +10922,11 @@ fn rvalueInner( .ty => |ty_inst| { // Quickly eliminate some common, unnecessary type coercion. const as_ty = @as(u64, @intFromEnum(Zir.Inst.Ref.type_type)) << 32; - const as_comptime_int = @as(u64, @intFromEnum(Zir.Inst.Ref.comptime_int_type)) << 32; const as_bool = @as(u64, @intFromEnum(Zir.Inst.Ref.bool_type)) << 32; + const as_void = @as(u64, @intFromEnum(Zir.Inst.Ref.void_type)) << 32; + const as_comptime_int = @as(u64, @intFromEnum(Zir.Inst.Ref.comptime_int_type)) << 32; const as_usize = @as(u64, @intFromEnum(Zir.Inst.Ref.usize_type)) << 32; const as_u8 = @as(u64, @intFromEnum(Zir.Inst.Ref.u8_type)) << 32; - const as_void = @as(u64, @intFromEnum(Zir.Inst.Ref.void_type)) << 32; switch ((@as(u64, @intFromEnum(ty_inst)) << 32) | @as(u64, @intFromEnum(result))) { as_ty | @intFromEnum(Zir.Inst.Ref.u1_type), as_ty | @intFromEnum(Zir.Inst.Ref.u8_type), @@ -11694,7 +11745,8 @@ const GenZir = struct { /// Whether we're in an expression within a `@TypeOf` operand. In this case, closure of runtime /// variables is permitted where it is usually not. is_typeof: bool = false, - /// This is set to true for inline loops; false otherwise. + /// This is set to true for a `GenZir` of a `block_inline`, indicating that + /// exits from this block should use `break_inline` rather than `break`. is_inline: bool = false, c_import: bool = false, /// How decls created in this scope should be named. From 07d8740882616b18a3dd81629b1ad0b698376399 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 27 Feb 2024 02:15:32 +0000 Subject: [PATCH 3/9] AstGen: do not generate defers at unreachable end of block Resolves: #8822 --- lib/std/zig/AstGen.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index 27cc5e0ad1..0968d85a6a 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -2563,7 +2563,9 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod } } - try genDefers(gz, parent_scope, scope, .normal_only); + if (noreturn_src_node == 0) { + try genDefers(gz, parent_scope, scope, .normal_only); + } try checkUsed(gz, parent_scope, scope); } From f51d9ab892caeb63c40fcd2c1da4ade70038119c Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 27 Feb 2024 20:32:31 +0000 Subject: [PATCH 4/9] Sema: simplify and clarify analyzeBodyInner and wrapper functions The signature and variants of Sema's main loop have evolved over time to what was a quite confusing state of affairs. This commit makes minor changes to how `analyzeBodyInner` works, and restructures/renames the wrapper functions, adding doc comments to clarify their purposes. The most notable change is that `analyzeBodyInner` now returns `CompileError!void`; inline breaks are now all communicated via `error.ComptimeBreak`. --- src/Module.zig | 22 +-- src/Sema.zig | 468 ++++++++++++++++++++++++++----------------------- 2 files changed, 256 insertions(+), 234 deletions(-) diff --git a/src/Module.zig b/src/Module.zig index ad6487b1eb..45107f9681 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -3492,6 +3492,8 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult { @panic("TODO: update owner Decl"); } + const decl_inst = decl.zir_decl_index.unwrap().?; + const gpa = mod.gpa; const zir = decl.getFileScope(mod).zir; @@ -3563,7 +3565,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult { try sema.declareDependency(.{ .src_hash = try ip.trackZir( sema.gpa, decl.getFileScope(mod), - decl.zir_decl_index.unwrap().?, + decl_inst, ) }); var block_scope: Sema.Block = .{ @@ -3580,7 +3582,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult { const decl_bodies = decl.zirBodies(mod); - const result_ref = (try sema.analyzeBodyBreak(&block_scope, decl_bodies.value_body)).?.operand; + const result_ref = try sema.resolveInlineBody(&block_scope, decl_bodies.value_body, decl_inst); // We'll do some other bits with the Sema. Clear the type target index just // in case they analyze any type. sema.builtin_type_target_index = .none; @@ -3593,7 +3595,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult { const address_space_src: LazySrcLoc = .{ .node_offset_var_decl_addrspace = 0 }; const ty_src: LazySrcLoc = .{ .node_offset_var_decl_ty = 0 }; const init_src: LazySrcLoc = .{ .node_offset_var_decl_init = 0 }; - const decl_tv = try sema.resolveInstValueAllowVariables(&block_scope, init_src, result_ref, .{ + const decl_tv = try sema.resolveConstValueAllowVariables(&block_scope, init_src, result_ref, .{ .needed_comptime_reason = "global variable initializer must be comptime-known", }); @@ -3709,13 +3711,13 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult { decl.val = Value.fromInterned((try decl_tv.val.intern(decl_tv.ty, mod))); decl.alignment = blk: { const align_body = decl_bodies.align_body orelse break :blk .none; - const align_ref = (try sema.analyzeBodyBreak(&block_scope, align_body)).?.operand; - break :blk try sema.resolveAlign(&block_scope, align_src, align_ref); + const align_ref = try sema.resolveInlineBody(&block_scope, align_body, decl_inst); + break :blk try sema.analyzeAsAlign(&block_scope, align_src, align_ref); }; decl.@"linksection" = blk: { const linksection_body = decl_bodies.linksection_body orelse break :blk .none; - const linksection_ref = (try sema.analyzeBodyBreak(&block_scope, linksection_body)).?.operand; - const bytes = try sema.resolveConstString(&block_scope, section_src, linksection_ref, .{ + const linksection_ref = try sema.resolveInlineBody(&block_scope, linksection_body, decl_inst); + const bytes = try sema.toConstString(&block_scope, section_src, linksection_ref, .{ .needed_comptime_reason = "linksection must be comptime-known", }); if (mem.indexOfScalar(u8, bytes, 0) != null) { @@ -3741,8 +3743,8 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult { .constant => target_util.defaultAddressSpace(target, .global_constant), else => unreachable, }; - const addrspace_ref = (try sema.analyzeBodyBreak(&block_scope, addrspace_body)).?.operand; - break :blk try sema.analyzeAddressSpace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx); + const addrspace_ref = try sema.resolveInlineBody(&block_scope, addrspace_body, decl_inst); + break :blk try sema.analyzeAsAddressSpace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx); }; decl.has_tv = true; decl.analysis = .complete; @@ -4513,7 +4515,7 @@ pub fn analyzeFnBody(mod: *Module, func_index: InternPool.Index, arena: Allocato sema.error_return_trace_index_on_fn_entry = error_return_trace_index; inner_block.error_return_trace_index = error_return_trace_index; - sema.analyzeBody(&inner_block, fn_info.body) catch |err| switch (err) { + sema.analyzeFnBody(&inner_block, fn_info.body) catch |err| switch (err) { // TODO make these unreachable instead of @panic error.NeededSourceLocation => @panic("zig compiler bug: NeededSourceLocation"), error.GenericPoison => @panic("zig compiler bug: GenericPoison"), diff --git a/src/Sema.zig b/src/Sema.zig index 8976848bef..7bc96c8e21 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -876,104 +876,100 @@ pub fn deinit(sema: *Sema) void { sema.* = undefined; } -/// Returns only the result from the body that is specified. -/// Only appropriate to call when it is determined at comptime that this body -/// has no peers. -fn resolveBody( - sema: *Sema, - block: *Block, - body: []const Zir.Inst.Index, - /// This is the instruction that a break instruction within `body` can - /// use to return from the body. - body_inst: Zir.Inst.Index, -) CompileError!Air.Inst.Ref { - const break_data = (try sema.analyzeBodyBreak(block, body)) orelse - return .unreachable_value; - // For comptime control flow, we need to detect when `analyzeBody` reports - // that we need to break from an outer block. In such case we - // use Zig's error mechanism to send control flow up the stack until - // we find the corresponding block to this break. - if (block.is_comptime and break_data.block_inst != body_inst) { - sema.comptime_break_inst = break_data.inst; - return error.ComptimeBreak; - } - return try sema.resolveInst(break_data.operand); -} - +/// Performs semantic analysis of a ZIR body which is behind a runtime condition. If comptime +/// control flow happens here, Sema will convert it to runtime control flow by introducing post-hoc +/// blocks where necessary. fn analyzeBodyRuntimeBreak(sema: *Sema, block: *Block, body: []const Zir.Inst.Index) !void { - _ = sema.analyzeBodyInner(block, body) catch |err| switch (err) { + sema.analyzeBodyInner(block, body) catch |err| switch (err) { error.ComptimeBreak => { const zir_datas = sema.code.instructions.items(.data); const break_data = zir_datas[@intFromEnum(sema.comptime_break_inst)].@"break"; const extra = sema.code.extraData(Zir.Inst.Break, break_data.payload_index).data; - try sema.addRuntimeBreak(block, .{ - .block_inst = extra.block_inst, - .operand = break_data.operand, - .inst = sema.comptime_break_inst, - }); + try sema.addRuntimeBreak(block, extra.block_inst, break_data.operand); }, else => |e| return e, }; } -pub fn analyzeBody( +/// Semantically analyze a ZIR function body. It is guranteed by AstGen that such a body cannot +/// trigger comptime control flow to move above the function body. +pub fn analyzeFnBody( sema: *Sema, block: *Block, body: []const Zir.Inst.Index, ) !void { - _ = sema.analyzeBodyInner(block, body) catch |err| switch (err) { + sema.analyzeBodyInner(block, body) catch |err| switch (err) { error.ComptimeBreak => unreachable, // unexpected comptime control flow else => |e| return e, }; } -const BreakData = struct { - block_inst: Zir.Inst.Index, - operand: Zir.Inst.Ref, - inst: Zir.Inst.Index, -}; - -pub fn analyzeBodyBreak( +/// Given a ZIR body which can be exited via a `break_inline` instruction, or a non-inline body which +/// we are evaluating at comptime, semantically analyze the body and return the result from it. +/// Returns `null` if control flow did not break from this block, but instead terminated with some +/// other runtime noreturn instruction. Compile-time breaks to blocks further up the stack still +/// return `error.ComptimeBreak`. If `block.is_comptime`, this function will never return `null`. +fn analyzeInlineBody( sema: *Sema, block: *Block, body: []const Zir.Inst.Index, -) CompileError!?BreakData { - const break_inst = sema.analyzeBodyInner(block, body) catch |err| switch (err) { - error.ComptimeBreak => sema.comptime_break_inst, - else => |e| return e, - }; - if (block.instructions.items.len != 0 and - sema.isNoReturn(block.instructions.items[block.instructions.items.len - 1].toRef())) + /// The index which a break instruction can target to break from this body. + break_target: Zir.Inst.Index, +) CompileError!?Air.Inst.Ref { + if (sema.analyzeBodyInner(block, body)) |_| { return null; + } else |err| switch (err) { + error.ComptimeBreak => {}, + else => |e| return e, + } + const break_inst = sema.comptime_break_inst; const break_data = sema.code.instructions.items(.data)[@intFromEnum(break_inst)].@"break"; const extra = sema.code.extraData(Zir.Inst.Break, break_data.payload_index).data; - return BreakData{ - .block_inst = extra.block_inst, - .operand = break_data.operand, - .inst = break_inst, - }; + if (extra.block_inst != break_target) { + // This control flow goes further up the stack. + return error.ComptimeBreak; + } + return try sema.resolveInst(break_data.operand); } -/// ZIR instructions which are always `noreturn` return this. This matches the -/// return type of `analyzeBody` so that we can tail call them. -/// Only appropriate to return when the instruction is known to be NoReturn -/// solely based on the ZIR tag. -const always_noreturn: CompileError!Zir.Inst.Index = @as(Zir.Inst.Index, undefined); +/// Like `analyzeInlineBody`, but if the body does not break with a value, returns +/// `.unreachable_value` instead of `null`. Notably, use this to evaluate an arbitrary +/// body at comptime to a single result value. +pub fn resolveInlineBody( + sema: *Sema, + block: *Block, + body: []const Zir.Inst.Index, + /// The index which a break instruction can target to break from this body. + break_target: Zir.Inst.Index, +) CompileError!Air.Inst.Ref { + return (try sema.analyzeInlineBody(block, body, break_target)) orelse .unreachable_value; +} -/// This function is the main loop of `Sema` and it can be used in two different ways: -/// * The traditional way where there are N breaks out of the block and peer type -/// resolution is done on the break operands. In this case, the `Zir.Inst.Index` -/// part of the return value will be `undefined`, and callsites should ignore it, -/// finding the block result value via the block scope. -/// * The "flat" way. There is only 1 break out of the block, and it is with a `break_inline` -/// instruction. In this case, the `Zir.Inst.Index` part of the return value will be -/// the break instruction. This communicates both which block the break applies to, as -/// well as the operand. No block scope needs to be created for this strategy. +/// This function is the main loop of `Sema`. It analyzes a single body of ZIR instructions. +/// +/// If this function returns normally, the merges of `block` were populated with all possible +/// (runtime) results of this block. Peer type resolution should be performed on the result, +/// and relevant runtime instructions written to perform necessary coercions and breaks. See +/// `resolveAnalyzedBlock`. This form of return is impossible if `block.is_comptime == true`. +/// +/// Alternatively, this function may return `error.ComptimeBreak`. This indicates that comptime +/// control flow is happening, and we are breaking at comptime from a block indicated by the +/// break instruction in `sema.comptime_break_inst`. This occurs for any `break_inline`, or for a +/// standard `break` at comptime. This error is pushed up the stack until the target block is +/// reached, at which point the break operand will be fetched. +/// +/// It is rare to call this function directly. Usually, you want one of the following wrappers: +/// * If the body is exited via a `break_inline`, or is being evaluated at comptime, +/// use `Sema.analyzeInlineBody` or `Sema.resolveInlineBody`. +/// * If the body is behind a fresh runtime condition, use `Sema.analyzeBodyRuntimeBreak`. +/// * If the body is an entire function body, use `Sema.analyzeFnBody`. +/// * If the body is to be generated into an AIR `block`, use `Sema.resolveBlockBody`. +/// * Otherwise, direct usage of `Sema.analyzeBodyInner` may be necessary. fn analyzeBodyInner( sema: *Sema, block: *Block, body: []const Zir.Inst.Index, -) CompileError!Zir.Inst.Index { +) CompileError!void { // No tracy calls here, to avoid interfering with the tail call mechanism. try sema.inst_map.ensureSpaceForInstructions(sema.gpa, body); @@ -997,7 +993,7 @@ fn analyzeBodyInner( // the loop. The only way to break out of the loop is with a `noreturn` // instruction. var i: u32 = 0; - const result = while (true) { + while (true) { crash_info.setBodyIndex(i); const inst = body[i]; std.log.scoped(.sema_zir).debug("sema ZIR {s} %{d}", .{ @@ -1214,14 +1210,14 @@ fn analyzeBodyInner( // Instructions that we know to *always* be noreturn based solely on their tag. // These functions match the return type of analyzeBody so that we can // tail call them here. - .compile_error => break sema.zirCompileError(block, inst), - .ret_implicit => break sema.zirRetImplicit(block, inst), - .ret_node => break sema.zirRetNode(block, inst), - .ret_load => break sema.zirRetLoad(block, inst), - .ret_err_value => break sema.zirRetErrValue(block, inst), - .@"unreachable" => break sema.zirUnreachable(block, inst), - .panic => break sema.zirPanic(block, inst), - .trap => break sema.zirTrap(block, inst), + .compile_error => break try sema.zirCompileError(block, inst), + .ret_implicit => break try sema.zirRetImplicit(block, inst), + .ret_node => break try sema.zirRetNode(block, inst), + .ret_load => break try sema.zirRetLoad(block, inst), + .ret_err_value => break try sema.zirRetErrValue(block, inst), + .@"unreachable" => break try sema.zirUnreachable(block, inst), + .panic => break try sema.zirPanic(block, inst), + .trap => break try sema.zirTrap(block, inst), // zig fmt: on // This instruction never exists in an analyzed body. It exists only in the declaration @@ -1247,7 +1243,7 @@ fn analyzeBodyInner( .builtin_extern => try sema.zirBuiltinExtern( block, extended), .@"asm" => try sema.zirAsm( block, extended, false), .asm_expr => try sema.zirAsm( block, extended, true), - .typeof_peer => try sema.zirTypeofPeer( block, extended), + .typeof_peer => try sema.zirTypeofPeer( block, extended, inst), .compile_log => try sema.zirCompileLog( extended), .min_multi => try sema.zirMinMaxMulti( block, extended, .min), .max_multi => try sema.zirMinMaxMulti( block, extended, .max), @@ -1522,18 +1518,16 @@ fn analyzeBodyInner( // Special case instructions to handle comptime control flow. .@"break" => { if (block.is_comptime) { - break inst; // same as break_inline + sema.comptime_break_inst = inst; + return error.ComptimeBreak; } else { - break sema.zirBreak(block, inst); + try sema.zirBreak(block, inst); + break; } }, .break_inline => { - if (block.is_comptime) { - break inst; - } else { - sema.comptime_break_inst = inst; - return error.ComptimeBreak; - } + sema.comptime_break_inst = inst; + return error.ComptimeBreak; }, .repeat => { if (block.is_comptime) { @@ -1548,7 +1542,10 @@ fn analyzeBodyInner( i = 0; continue; } else { - break always_noreturn; + // We are definitely called by `zirLoop`, which will treat the + // fact that this body does not terminate `noreturn` as an + // implicit repeat. + break; } }, .repeat_inline => { @@ -1584,13 +1581,8 @@ fn analyzeBodyInner( child_block.instructions = block.instructions; defer block.instructions = child_block.instructions; - const break_data = (try sema.analyzeBodyBreak(&child_block, inline_body)) orelse - break always_noreturn; - if (inst == break_data.block_inst) { - break :blk try sema.resolveInst(break_data.operand); - } else { - break break_data.inst; - } + const result = try sema.analyzeInlineBody(&child_block, inline_body, inst) orelse break; + break :blk result; }, .block, .block_comptime => blk: { if (!block.is_comptime) { @@ -1615,13 +1607,8 @@ fn analyzeBodyInner( child_block.instructions = block.instructions; defer block.instructions = child_block.instructions; - const break_data = (try sema.analyzeBodyBreak(&child_block, inline_body)) orelse - break always_noreturn; - if (inst == break_data.block_inst) { - break :blk try sema.resolveInst(break_data.operand); - } else { - break break_data.inst; - } + const result = try sema.analyzeInlineBody(&child_block, inline_body, inst) orelse break; + break :blk result; }, .block_inline => blk: { // Directly analyze the block body without introducing a new block. @@ -1634,7 +1621,12 @@ fn analyzeBodyInner( const inline_body = sema.code.bodySlice(extra.end, extra.data.body_len); const gpa = sema.gpa; - const opt_break_data, const need_debug_scope = b: { + const BreakResult = struct { + block_inst: Zir.Inst.Index, + operand: Zir.Inst.Ref, + }; + + const opt_break_data: ?BreakResult, const need_debug_scope = b: { // Create a temporary child block so that this inline block is properly // labeled for any .restore_err_ret_index instructions var child_block = block.makeSubBlock(); @@ -1660,11 +1652,26 @@ fn analyzeBodyInner( child_block.instructions = block.instructions; defer block.instructions = child_block.instructions; - const result = try sema.analyzeBodyBreak(&child_block, inline_body); + const break_result: ?BreakResult = if (sema.analyzeBodyInner(&child_block, inline_body)) |_| r: { + break :r null; + } else |err| switch (err) { + error.ComptimeBreak => brk_res: { + const break_inst = sema.comptime_break_inst; + const break_data = sema.code.instructions.items(.data)[@intFromEnum(break_inst)].@"break"; + const break_extra = sema.code.extraData(Zir.Inst.Break, break_data.payload_index).data; + break :brk_res .{ + .block_inst = break_extra.block_inst, + .operand = break_data.operand, + }; + }, + else => |e| return e, + }; + if (need_debug_scope) { _ = try sema.ensurePostHoc(block, inst); } - break :b .{ result, need_debug_scope }; + + break :b .{ break_result, need_debug_scope }; }; // A runtime conditional branch that needs a post-hoc block to be @@ -1686,13 +1693,13 @@ fn analyzeBodyInner( // It may pass through our currently being analyzed block_inline or it // may point directly to it. In the latter case, this modifies the // block that we looked up in the post_hoc_blocks map above. - try sema.addRuntimeBreak(block, break_data); + try sema.addRuntimeBreak(block, break_data.block_inst, break_data.operand); } try labeled_block.block.instructions.appendSlice(gpa, block.instructions.items[block_index..]); block.instructions.items.len = block_index; - const block_result = try sema.analyzeBlockBody(block, inst_data.src(), &labeled_block.block, &labeled_block.label.merges, need_debug_scope); + const block_result = try sema.resolveAnalyzedBlock(block, inst_data.src(), &labeled_block.block, &labeled_block.label.merges, need_debug_scope); { // Destroy the ad-hoc block entry so that it does not interfere with // the next iteration of comptime control flow, if any. @@ -1703,15 +1710,19 @@ fn analyzeBodyInner( break :blk block_result; } - const break_data = opt_break_data orelse break always_noreturn; + const break_data = opt_break_data orelse break; if (inst == break_data.block_inst) { break :blk try sema.resolveInst(break_data.operand); } else { - break break_data.inst; + // `comptime_break_inst` preserved from `analyzeBodyInner` above. + return error.ComptimeBreak; } }, .condbr => blk: { - if (!block.is_comptime) break sema.zirCondbr(block, inst); + if (!block.is_comptime) { + try sema.zirCondbr(block, inst); + break; + } // Same as condbr_inline. TODO https://github.com/ziglang/zig/issues/8220 const inst_data = datas[@intFromEnum(inst)].pl_node; const cond_src: LazySrcLoc = .{ .node_offset_if_cond = inst_data.src_node }; @@ -1728,13 +1739,9 @@ fn analyzeBodyInner( const inline_body = if (cond.val.toBool()) then_body else else_body; try sema.maybeErrorUnwrapCondbr(block, inline_body, extra.data.condition, cond_src); - const break_data = (try sema.analyzeBodyBreak(block, inline_body)) orelse - break always_noreturn; - if (inst == break_data.block_inst) { - break :blk try sema.resolveInst(break_data.operand); - } else { - break break_data.inst; - } + + const result = try sema.analyzeInlineBody(block, inline_body, inst) orelse break; + break :blk result; }, .condbr_inline => blk: { const inst_data = datas[@intFromEnum(inst)].pl_node; @@ -1754,13 +1761,9 @@ fn analyzeBodyInner( try sema.maybeErrorUnwrapCondbr(block, inline_body, extra.data.condition, cond_src); const old_runtime_index = block.runtime_index; defer block.runtime_index = old_runtime_index; - const break_data = (try sema.analyzeBodyBreak(block, inline_body)) orelse - break always_noreturn; - if (inst == break_data.block_inst) { - break :blk try sema.resolveInst(break_data.operand); - } else { - break break_data.inst; - } + + const result = try sema.analyzeInlineBody(block, inline_body, inst) orelse break; + break :blk result; }, .@"try" => blk: { if (!block.is_comptime) break :blk try sema.zirTry(block, inst); @@ -1785,13 +1788,8 @@ fn analyzeBodyInner( if (is_non_err_val.toBool()) { break :blk try sema.analyzeErrUnionPayload(block, src, err_union_ty, err_union, operand_src, false); } - const break_data = (try sema.analyzeBodyBreak(block, inline_body)) orelse - break always_noreturn; - if (inst == break_data.block_inst) { - break :blk try sema.resolveInst(break_data.operand); - } else { - break break_data.inst; - } + const result = try sema.analyzeInlineBody(block, inline_body, inst) orelse break; + break :blk result; }, .try_ptr => blk: { if (!block.is_comptime) break :blk try sema.zirTryPtr(block, inst); @@ -1811,22 +1809,22 @@ fn analyzeBodyInner( if (is_non_err_val.toBool()) { break :blk try sema.analyzeErrUnionPayloadPtr(block, src, operand, false, false); } - const break_data = (try sema.analyzeBodyBreak(block, inline_body)) orelse - break always_noreturn; - if (inst == break_data.block_inst) { - break :blk try sema.resolveInst(break_data.operand); - } else { - break break_data.inst; - } + const result = try sema.analyzeInlineBody(block, inline_body, inst) orelse break; + break :blk result; }, .@"defer" => blk: { const inst_data = sema.code.instructions.items(.data)[@intFromEnum(inst)].@"defer"; const defer_body = sema.code.bodySlice(inst_data.index, inst_data.len); - const break_inst = sema.analyzeBodyInner(block, defer_body) catch |err| switch (err) { - error.ComptimeBreak => sema.comptime_break_inst, + if (sema.analyzeBodyInner(block, defer_body)) |_| { + // The defer terminated noreturn - no more analysis needed. + break; + } else |err| switch (err) { + error.ComptimeBreak => {}, else => |e| return e, - }; - if (break_inst != defer_body[defer_body.len - 1]) break always_noreturn; + } + if (sema.comptime_break_inst != defer_body[defer_body.len - 1]) { + return error.ComptimeBreak; + } break :blk .void_value; }, .defer_err_code => blk: { @@ -1835,11 +1833,16 @@ fn analyzeBodyInner( const defer_body = sema.code.bodySlice(extra.index, extra.len); const err_code = try sema.resolveInst(inst_data.err_code); map.putAssumeCapacity(extra.remapped_err_code, err_code); - const break_inst = sema.analyzeBodyInner(block, defer_body) catch |err| switch (err) { - error.ComptimeBreak => sema.comptime_break_inst, + if (sema.analyzeBodyInner(block, defer_body)) |_| { + // The defer terminated noreturn - no more analysis needed. + break; + } else |err| switch (err) { + error.ComptimeBreak => {}, else => |e| return e, - }; - if (break_inst != defer_body[defer_body.len - 1]) break always_noreturn; + } + if (sema.comptime_break_inst != defer_body[defer_body.len - 1]) { + return error.ComptimeBreak; + } break :blk .void_value; }, }; @@ -1847,17 +1850,15 @@ fn analyzeBodyInner( // We're going to assume that the body itself is noreturn, so let's ensure that now assert(block.instructions.items.len > 0); assert(sema.isNoReturn(block.instructions.items[block.instructions.items.len - 1].toRef())); - break always_noreturn; + break; } map.putAssumeCapacity(inst, air_inst); i += 1; - }; + } // We may have overwritten the capture scope due to a `repeat` instruction where // the body had a capture; restore it now. block.wip_capture_scope = parent_capture_scope; - - return result; } pub fn resolveInstAllowNone(sema: *Sema, zir_ref: Zir.Inst.Ref) !Air.Inst.Ref { @@ -1894,7 +1895,7 @@ fn resolveConstBool( return val.toBool(); } -pub fn resolveConstString( +fn resolveConstString( sema: *Sema, block: *Block, src: LazySrcLoc, @@ -1902,6 +1903,16 @@ pub fn resolveConstString( reason: NeededComptimeReason, ) ![]u8 { const air_inst = try sema.resolveInst(zir_ref); + return sema.toConstString(block, src, air_inst, reason); +} + +pub fn toConstString( + sema: *Sema, + block: *Block, + src: LazySrcLoc, + air_inst: Air.Inst.Ref, + reason: NeededComptimeReason, +) ![]u8 { const wanted_type = Type.slice_const_u8; const coerced_inst = try sema.coerce(block, wanted_type, air_inst, src); const val = try sema.resolveConstDefinedValue(block, src, coerced_inst, reason); @@ -2193,9 +2204,8 @@ fn resolveValueAllowVariables(sema: *Sema, inst: Air.Inst.Ref) CompileError!?Val return val; } -/// Returns a compile error if the value has tag `variable`. See `resolveInstValue` for -/// a function that does not. -pub fn resolveInstConst( +/// Returns a compile error if the value has tag `variable`. +fn resolveInstConst( sema: *Sema, block: *Block, src: LazySrcLoc, @@ -2211,15 +2221,13 @@ pub fn resolveInstConst( } /// Value Tag may be `undef` or `variable`. -/// See `resolveInstConst` for an alternative. -pub fn resolveInstValueAllowVariables( +pub fn resolveConstValueAllowVariables( sema: *Sema, block: *Block, src: LazySrcLoc, - zir_ref: Zir.Inst.Ref, + air_ref: Air.Inst.Ref, reason: NeededComptimeReason, ) CompileError!TypedValue { - const air_ref = try sema.resolveInst(zir_ref); const val = try sema.resolveValueAllowVariables(air_ref) orelse { return sema.failWithNeededComptime(block, src, reason); }; @@ -2616,7 +2624,7 @@ fn reparentOwnedErrorMsg( const align_ty = Type.u29; -fn analyzeAsAlign( +pub fn analyzeAsAlign( sema: *Sema, block: *Block, src: LazySrcLoc, @@ -2654,7 +2662,7 @@ fn validateAlignAllowZero( return Alignment.fromNonzeroByteUnits(alignment); } -pub fn resolveAlign( +fn resolveAlign( sema: *Sema, block: *Block, src: LazySrcLoc, @@ -3054,7 +3062,7 @@ fn zirEnumDecl( defer enum_block.instructions.deinit(sema.gpa); if (body.len != 0) { - try sema.analyzeBody(&enum_block, body); + _ = try sema.analyzeInlineBody(&enum_block, body, inst); } if (tag_type_ref != .none) { @@ -5597,7 +5605,7 @@ fn zirFloat128(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai return Air.internedToRef((try sema.mod.floatValue(Type.comptime_float, number)).toIntern()); } -fn zirCompileError(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index { +fn zirCompileError(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void { const tracy = trace(@src()); defer tracy.end(); @@ -5650,7 +5658,7 @@ fn zirCompileLog( return .void_value; } -fn zirPanic(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index { +fn zirPanic(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void { const inst_data = sema.code.instructions.items(.data)[@intFromEnum(inst)].un_node; const src = inst_data.src(); const msg_inst = try sema.resolveInst(inst_data.operand); @@ -5663,16 +5671,14 @@ fn zirPanic(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir.I return sema.fail(block, src, "encountered @panic at comptime", .{}); } try sema.panicWithMsg(block, src, coerced_msg, .@"@panic"); - return always_noreturn; } -fn zirTrap(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index { +fn zirTrap(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void { const src_node = sema.code.instructions.items(.data)[@intFromEnum(inst)].node; const src = LazySrcLoc.nodeOffset(src_node); if (block.is_comptime) return sema.fail(block, src, "encountered @trap at comptime", .{}); _ = try block.addNoOp(.trap); - return always_noreturn; } fn zirLoop(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -5726,7 +5732,8 @@ fn zirLoop(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError var loop_block = child_block.makeSubBlock(); defer loop_block.instructions.deinit(gpa); - try sema.analyzeBody(&loop_block, body); + // Use `analyzeBodyInner` directly to push any comptime control flow up the stack. + try sema.analyzeBodyInner(&loop_block, body); const loop_block_len = loop_block.instructions.items.len; if (loop_block_len > 0 and sema.typeOf(loop_block.instructions.items[loop_block_len - 1].toRef()).isNoReturn(mod)) { @@ -5742,7 +5749,7 @@ fn zirLoop(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError ); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(loop_block.instructions.items)); } - return sema.analyzeBlockBody(parent_block, src, &child_block, merges, false); + return sema.resolveAnalyzedBlock(parent_block, src, &child_block, merges, false); } fn zirCImport(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -5785,8 +5792,7 @@ fn zirCImport(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileEr }; defer child_block.instructions.deinit(gpa); - // Ignore the result, all the relevant operations have written to c_import_buf already. - _ = try sema.analyzeBodyBreak(&child_block, body); + _ = try sema.analyzeInlineBody(&child_block, body, inst); var c_import_res = comp.cImport(c_import_buf.items, parent_block.ownerModule()) catch |err| return sema.fail(&child_block, src, "C import failed: {s}", .{@errorName(err)}); @@ -5916,6 +5922,9 @@ fn zirBlock(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index, force_compt return sema.resolveBlockBody(parent_block, src, &child_block, body, inst, &label.merges); } +/// Semantically analyze the given ZIR body, emitting any resulting runtime code into the AIR block +/// specified by `child_block` if necessary (and emitting this block into `parent_block`). +/// TODO: `merges` is known from `child_block`, remove this parameter. fn resolveBlockBody( sema: *Sema, parent_block: *Block, @@ -5928,12 +5937,12 @@ fn resolveBlockBody( merges: *Block.Merges, ) CompileError!Air.Inst.Ref { if (child_block.is_comptime) { - return sema.resolveBody(child_block, body, body_inst); + return sema.resolveInlineBody(child_block, body, body_inst); } else { var need_debug_scope = false; child_block.need_debug_scope = &need_debug_scope; if (sema.analyzeBodyInner(child_block, body)) |_| { - return sema.analyzeBlockBody(parent_block, src, child_block, merges, need_debug_scope); + return sema.resolveAnalyzedBlock(parent_block, src, child_block, merges, need_debug_scope); } else |err| switch (err) { error.ComptimeBreak => { // Comptime control flow is happening, however child_block may still contain @@ -5970,7 +5979,12 @@ fn resolveBlockBody( } } -fn analyzeBlockBody( +/// After a body corresponding to an AIR `block` has been analyzed, this function places them into +/// the block pointed at by `merges.block_inst` if necessary, or the block may be elided in favor of +/// inlining the instructions directly into the parent block. Either way, it considers all merges of +/// this block, and combines them appropriately using peer type resolution, returning the final +/// value of the block. +fn resolveAnalyzedBlock( sema: *Sema, parent_block: *Block, src: LazySrcLoc, @@ -6360,7 +6374,7 @@ fn zirFence(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) Co }); } -fn zirBreak(sema: *Sema, start_block: *Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index { +fn zirBreak(sema: *Sema, start_block: *Block, inst: Zir.Inst.Index) CompileError!void { const tracy = trace(@src()); defer tracy.end(); @@ -6386,7 +6400,7 @@ fn zirBreak(sema: *Sema, start_block: *Block, inst: Zir.Inst.Index) CompileError block.runtime_cond = start_block.runtime_cond orelse start_block.runtime_loop; block.runtime_loop = start_block.runtime_loop; } - return inst; + return; } } block = block.parent.?; @@ -7096,7 +7110,7 @@ const CallArgsInfo = union(enum) { // Give the arg its result type sema.inst_map.putAssumeCapacity(zir_call.call_inst, Air.internedToRef(param_ty.toIntern())); // Resolve the arg! - const uncoerced_arg = try sema.resolveBody(block, arg_body, zir_call.call_inst); + const uncoerced_arg = try sema.resolveInlineBody(block, arg_body, zir_call.call_inst); if (sema.typeOf(uncoerced_arg).zigTypeTag(mod) == .NoReturn) { // This terminates resolution of arguments. The caller should @@ -7539,7 +7553,7 @@ fn analyzeCall( // each of the parameters, resolving the return type and providing it to the child // `Sema` so that it can be used for the `ret_ptr` instruction. const ret_ty_inst = if (fn_info.ret_ty_body.len != 0) - try sema.resolveBody(&child_block, fn_info.ret_ty_body, module_fn.zir_body_inst.resolve(ip)) + try sema.resolveInlineBody(&child_block, fn_info.ret_ty_body, module_fn.zir_body_inst.resolve(ip)) else try sema.resolveInst(fn_info.ret_ty_ref); const ret_ty_src: LazySrcLoc = .{ .node_offset_fn_type_ret_ty = 0 }; @@ -7608,11 +7622,11 @@ fn analyzeCall( } const result = result: { - sema.analyzeBody(&child_block, fn_info.body) catch |err| switch (err) { + sema.analyzeFnBody(&child_block, fn_info.body) catch |err| switch (err) { error.ComptimeReturn => break :result inlining.comptime_result, else => |e| return e, }; - break :result try sema.analyzeBlockBody(block, call_src, &child_block, merges, false); + break :result try sema.resolveAnalyzedBlock(block, call_src, &child_block, merges, false); }; if (!is_comptime_call and !block.is_typeof and @@ -7791,7 +7805,7 @@ fn analyzeInlineCallArg( const param_ty = param_ty: { const raw_param_ty = func_ty_info.param_types.get(ip)[arg_i.*]; if (raw_param_ty != .generic_poison_type) break :param_ty raw_param_ty; - const param_ty_inst = try ics.callee().resolveBody(param_block, param_body, inst); + const param_ty_inst = try ics.callee().resolveInlineBody(param_block, param_body, inst); const param_ty = try ics.callee().analyzeAsType(param_block, param_src, param_ty_inst); break :param_ty param_ty.toIntern(); }; @@ -8026,7 +8040,7 @@ fn instantiateGenericCall( child_sema.generic_call_decl = prev_generic_call_decl; } - const param_ty_inst = try child_sema.resolveBody(&child_block, param_ty_body, param_inst); + const param_ty_inst = try child_sema.resolveInlineBody(&child_block, param_ty_body, param_inst); break :param_ty try child_sema.analyzeAsType(&child_block, param_data.src(), param_ty_inst); }, else => unreachable, @@ -8118,7 +8132,7 @@ fn instantiateGenericCall( // We've already handled parameters, so don't resolve the whole body. Instead, just // do the instructions after the params (i.e. the func itself). - const new_func_inst = try child_sema.resolveBody(&child_block, fn_info.param_body[args_info.count()..], fn_info.param_body_inst); + const new_func_inst = try child_sema.resolveInlineBody(&child_block, fn_info.param_body[args_info.count()..], fn_info.param_body_inst); const callee_index = (child_sema.resolveConstDefinedValue(&child_block, .unneeded, new_func_inst, undefined) catch unreachable).toIntern(); const callee = mod.funcInfo(callee_index); @@ -9176,7 +9190,7 @@ fn resolveGenericBody( sema.generic_call_decl = prev_generic_call_decl; } - const uncasted = sema.resolveBody(block, body, func_inst) catch |err| break :err err; + const uncasted = sema.resolveInlineBody(block, body, func_inst) catch |err| break :err err; const result = sema.coerce(block, dest_ty, uncasted, src) catch |err| break :err err; const val = sema.resolveConstDefinedValue(block, src, result, reason) catch |err| break :err err; return val; @@ -9810,7 +9824,7 @@ fn zirParam( sema.generic_call_decl = prev_generic_call_decl; } - if (sema.resolveBody(block, body, inst)) |param_ty_inst| { + if (sema.resolveInlineBody(block, body, inst)) |param_ty_inst| { if (sema.analyzeAsType(block, src, param_ty_inst)) |param_ty| { break :param_ty param_ty; } else |err| break :err err; @@ -11556,7 +11570,7 @@ fn zirSwitchBlockErrUnion(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Comp sema.air_extra.appendSliceAssumeCapacity(@ptrCast(true_instructions)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(sub_block.instructions.items)); - return sema.analyzeBlockBody(block, main_src, &child_block, merges, false); + return sema.resolveAnalyzedBlock(block, main_src, &child_block, merges, false); } fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index, operand_is_ref: bool) CompileError!Air.Inst.Ref { @@ -12178,7 +12192,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index, operand_is_r false, ); - return sema.analyzeBlockBody(block, src, &child_block, merges, false); + return sema.resolveAnalyzedBlock(block, src, &child_block, merges, false); } const SpecialProng = struct { @@ -18602,7 +18616,7 @@ fn zirTypeofBuiltin(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErr }; defer child_block.instructions.deinit(sema.gpa); - const operand = try sema.resolveBody(&child_block, body, inst); + const operand = try sema.resolveInlineBody(&child_block, body, inst); const operand_ty = sema.typeOf(operand); if (operand_ty.isGenericPoison()) return error.GenericPoison; return Air.internedToRef(operand_ty.toIntern()); @@ -18657,6 +18671,7 @@ fn zirTypeofPeer( sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData, + inst: Zir.Inst.Index, ) CompileError!Air.Inst.Ref { const tracy = trace(@src()); defer tracy.end(); @@ -18681,7 +18696,7 @@ fn zirTypeofPeer( }; defer child_block.instructions.deinit(sema.gpa); // Ignore the result, we only care about the instructions in `args`. - _ = try sema.analyzeBodyBreak(&child_block, body); + _ = try sema.analyzeInlineBody(&child_block, body, inst); const args = sema.code.refSlice(extra.end, extended.small); @@ -18748,7 +18763,7 @@ fn zirBoolBr( // comptime-known left-hand side. No need for a block here; the result // is simply the rhs expression. Here we rely on there only being 1 // break instruction (`break_inline`). - const rhs_result = try sema.resolveBody(parent_block, body, inst); + const rhs_result = try sema.resolveInlineBody(parent_block, body, inst); if (sema.typeOf(rhs_result).isNoReturn(mod)) { return rhs_result; } @@ -18782,7 +18797,7 @@ fn zirBoolBr( const lhs_result: Air.Inst.Ref = if (is_bool_or) .bool_true else .bool_false; _ = try lhs_block.addBr(block_inst, lhs_result); - const rhs_result = try sema.resolveBody(rhs_block, body, inst); + const rhs_result = try sema.resolveInlineBody(rhs_block, body, inst); const rhs_noret = sema.typeOf(rhs_result).isNoReturn(mod); const coerced_rhs_result = if (!rhs_noret) rhs: { const coerced_result = try sema.coerce(rhs_block, Type.bool, rhs_result, rhs_src); @@ -18933,7 +18948,7 @@ fn zirCondbr( sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index, -) CompileError!Zir.Inst.Index { +) CompileError!void { const tracy = trace(@src()); defer tracy.end(); @@ -19002,7 +19017,6 @@ fn zirCondbr( }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(true_instructions)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(sub_block.instructions.items)); - return always_noreturn; } fn zirTry(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -19027,14 +19041,15 @@ fn zirTry(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError! } // We can analyze the body directly in the parent block because we know there are // no breaks from the body possible, and that the body is noreturn. - return sema.resolveBody(parent_block, body, inst); + try sema.analyzeBodyInner(parent_block, body); + return .unreachable_value; } var sub_block = parent_block.makeSubBlock(); defer sub_block.instructions.deinit(sema.gpa); // This body is guaranteed to end with noreturn and has no breaks. - _ = try sema.analyzeBodyInner(&sub_block, body); + try sema.analyzeBodyInner(&sub_block, body); try sema.air_extra.ensureUnusedCapacity(sema.gpa, @typeInfo(Air.Try).Struct.fields.len + sub_block.instructions.items.len); @@ -19074,14 +19089,15 @@ fn zirTryPtr(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileErr } // We can analyze the body directly in the parent block because we know there are // no breaks from the body possible, and that the body is noreturn. - return sema.resolveBody(parent_block, body, inst); + try sema.analyzeBodyInner(parent_block, body); + return .unreachable_value; } var sub_block = parent_block.makeSubBlock(); defer sub_block.instructions.deinit(sema.gpa); // This body is guaranteed to end with noreturn and has no breaks. - _ = try sema.analyzeBodyInner(&sub_block, body); + try sema.analyzeBodyInner(&sub_block, body); const operand_ty = sema.typeOf(operand); const ptr_info = operand_ty.ptrInfo(mod); @@ -19156,13 +19172,13 @@ fn ensurePostHoc(sema: *Sema, block: *Block, dest_block: Zir.Inst.Index) !*Label return labeled_block; } -// A `break` statement is inside a runtime condition, but trying to -// break from an inline loop. In such case we must convert it to -// a runtime break. -fn addRuntimeBreak(sema: *Sema, child_block: *Block, break_data: BreakData) !void { - const labeled_block = try sema.ensurePostHoc(child_block, break_data.block_inst); +/// A `break` statement is inside a runtime condition, but trying to +/// break from an inline loop. In such case we must convert it to +/// a runtime break. +fn addRuntimeBreak(sema: *Sema, child_block: *Block, block_inst: Zir.Inst.Index, break_operand: Zir.Inst.Ref) !void { + const labeled_block = try sema.ensurePostHoc(child_block, block_inst); - const operand = try sema.resolveInst(break_data.operand); + const operand = try sema.resolveInst(break_operand); const br_ref = try child_block.addBr(labeled_block.label.merges.block_inst, operand); try labeled_block.label.merges.results.append(sema.gpa, operand); @@ -19176,7 +19192,7 @@ fn addRuntimeBreak(sema: *Sema, child_block: *Block, break_data: BreakData) !voi } } -fn zirUnreachable(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index { +fn zirUnreachable(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void { const inst_data = sema.code.instructions.items(.data)[@intFromEnum(inst)].@"unreachable"; const src = inst_data.src(); @@ -19193,14 +19209,13 @@ fn zirUnreachable(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError }, else => |e| return e, }; - return always_noreturn; } fn zirRetErrValue( sema: *Sema, block: *Block, inst: Zir.Inst.Index, -) CompileError!Zir.Inst.Index { +) CompileError!void { const mod = sema.mod; const inst_data = sema.code.instructions.items(.data)[@intFromEnum(inst)].str_tok; const err_name = try mod.intern_pool.getOrPutString(sema.gpa, inst_data.get(sema.code)); @@ -19219,7 +19234,7 @@ fn zirRetImplicit( sema: *Sema, block: *Block, inst: Zir.Inst.Index, -) CompileError!Zir.Inst.Index { +) CompileError!void { const tracy = trace(@src()); defer tracy.end(); @@ -19234,7 +19249,7 @@ fn zirRetImplicit( } else { try block.addUnreachable(r_brace_src, false); } - return always_noreturn; + return; } const operand = try sema.resolveInst(inst_data.operand); @@ -19265,7 +19280,7 @@ fn zirRetImplicit( return sema.analyzeRet(block, operand, r_brace_src, r_brace_src); } -fn zirRetNode(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index { +fn zirRetNode(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void { const tracy = trace(@src()); defer tracy.end(); @@ -19276,7 +19291,7 @@ fn zirRetNode(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir return sema.analyzeRet(block, operand, src, .{ .node_offset_return_operand = inst_data.src_node }); } -fn zirRetLoad(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index { +fn zirRetLoad(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void { const tracy = trace(@src()); defer tracy.end(); @@ -19295,7 +19310,6 @@ fn zirRetLoad(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Zir } _ = try block.addUnOp(.ret_load, ret_ptr); - return always_noreturn; } fn retWithErrTracing( @@ -19305,12 +19319,12 @@ fn retWithErrTracing( is_non_err: Air.Inst.Ref, ret_tag: Air.Inst.Tag, operand: Air.Inst.Ref, -) CompileError!Zir.Inst.Index { +) CompileError!void { const mod = sema.mod; const need_check = switch (is_non_err) { .bool_true => { _ = try block.addUnOp(ret_tag, operand); - return always_noreturn; + return; }, .bool_false => false, else => true, @@ -19326,7 +19340,7 @@ fn retWithErrTracing( if (!need_check) { try sema.callBuiltin(block, src, return_err_fn, .never_inline, &args, .@"error return"); _ = try block.addUnOp(ret_tag, operand); - return always_noreturn; + return; } var then_block = block.makeSubBlock(); @@ -19353,8 +19367,6 @@ fn retWithErrTracing( .operand = is_non_err, .payload = cond_br_payload, } } }); - - return always_noreturn; } fn wantErrorReturnTracing(sema: *Sema, fn_ret_ty: Type) bool { @@ -19481,7 +19493,7 @@ fn analyzeRet( uncasted_operand: Air.Inst.Ref, src: LazySrcLoc, operand_src: LazySrcLoc, -) CompileError!Zir.Inst.Index { +) CompileError!void { // Special case for returning an error to an inferred error set; we need to // add the error tag to the inferred error set of the in-scope function, so // that the coercion below works correctly. @@ -19513,7 +19525,7 @@ fn analyzeRet( try inlining.merges.results.append(sema.gpa, operand); try inlining.merges.br_list.append(sema.gpa, br_inst.toIndex().?); try inlining.merges.src_locs.append(sema.gpa, operand_src); - return always_noreturn; + return; } else if (block.is_comptime) { return sema.fail(block, src, "function called at runtime cannot return value at comptime", .{}); } else if (sema.func_is_naked) { @@ -19538,8 +19550,6 @@ fn analyzeRet( } _ = try block.addUnOp(air_tag, operand); - - return always_noreturn; } fn floatOpAllowed(tag: Zir.Inst.Tag) bool { @@ -19616,7 +19626,7 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const address_space: std.builtin.AddressSpace = if (inst_data.flags.has_addrspace) blk: { const ref: Zir.Inst.Ref = @enumFromInt(sema.code.extra[extra_i]); extra_i += 1; - break :blk try sema.analyzeAddressSpace(block, addrspace_src, ref, .pointer); + break :blk try sema.resolveAddressSpace(block, addrspace_src, ref, .pointer); } else if (elem_ty.zigTypeTag(mod) == .Fn and target.cpu.arch == .avr) .flash else .generic; const bit_offset: u16 = if (inst_data.flags.has_bit_range) blk: { @@ -35737,7 +35747,7 @@ fn semaBackingIntType(mod: *Module, struct_type: InternPool.Key.StructType) Comp break :blk try sema.resolveType(&block, backing_int_src, backing_int_ref); } else { const body = zir.bodySlice(extra_index, backing_int_body_len); - const ty_ref = try sema.resolveBody(&block, body, zir_index); + const ty_ref = try sema.resolveInlineBody(&block, body, zir_index); break :blk try sema.analyzeAsType(&block, backing_int_src, ty_ref); } }; @@ -36618,7 +36628,7 @@ fn semaStructFields( assert(zir_field.type_body_len != 0); const body = zir.bodySlice(extra_index, zir_field.type_body_len); extra_index += body.len; - const ty_ref = try sema.resolveBody(&block_scope, body, zir_index); + const ty_ref = try sema.resolveInlineBody(&block_scope, body, zir_index); break :ty sema.analyzeAsType(&block_scope, .unneeded, ty_ref) catch |err| switch (err) { error.NeededSourceLocation => { const ty_src = mod.fieldSrcLoc(decl_index, .{ @@ -36704,7 +36714,7 @@ fn semaStructFields( if (zir_field.align_body_len > 0) { const body = zir.bodySlice(extra_index, zir_field.align_body_len); extra_index += body.len; - const align_ref = try sema.resolveBody(&block_scope, body, zir_index); + const align_ref = try sema.resolveInlineBody(&block_scope, body, zir_index); const field_align = sema.analyzeAsAlign(&block_scope, .unneeded, align_ref) catch |err| switch (err) { error.NeededSourceLocation => { const align_src = mod.fieldSrcLoc(decl_index, .{ @@ -36854,7 +36864,7 @@ fn semaStructFieldInits( try sema.inst_map.ensureSpaceForInstructions(sema.gpa, &.{zir_index}); sema.inst_map.putAssumeCapacity(zir_index, type_ref); - const init = try sema.resolveBody(&block_scope, body, zir_index); + const init = try sema.resolveInlineBody(&block_scope, body, zir_index); const coerced = sema.coerce(&block_scope, field_ty, init, .unneeded) catch |err| switch (err) { error.NeededSourceLocation => { const init_src = mod.fieldSrcLoc(decl_index, .{ @@ -36971,7 +36981,7 @@ fn semaUnionFields(mod: *Module, arena: Allocator, union_type: InternPool.Key.Un defer assert(block_scope.instructions.items.len == 0); if (body.len != 0) { - try sema.analyzeBody(&block_scope, body); + _ = try sema.analyzeInlineBody(&block_scope, body, zir_index); } for (comptime_mutable_decls.items) |ct_decl_index| { @@ -37914,15 +37924,25 @@ pub const AddressSpaceContext = enum { pointer, }; -pub fn analyzeAddressSpace( +fn resolveAddressSpace( sema: *Sema, block: *Block, src: LazySrcLoc, zir_ref: Zir.Inst.Ref, ctx: AddressSpaceContext, ) !std.builtin.AddressSpace { - const mod = sema.mod; const air_ref = try sema.resolveInst(zir_ref); + return sema.analyzeAsAddressSpace(block, src, air_ref, ctx); +} + +pub fn analyzeAsAddressSpace( + sema: *Sema, + block: *Block, + src: LazySrcLoc, + air_ref: Air.Inst.Ref, + ctx: AddressSpaceContext, +) !std.builtin.AddressSpace { + const mod = sema.mod; const coerced = try sema.coerce(block, Type.fromInterned(.address_space_type), air_ref, src); const addrspace_val = try sema.resolveConstDefinedValue(block, src, coerced, .{ .needed_comptime_reason = "address space must be comptime-known", From 321045cf33268e7b75e2972d898010ecacc345dc Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 28 Feb 2024 02:05:10 +0000 Subject: [PATCH 5/9] codegen: handle dbg_var scoping correctly after eliding more ZIR blocks Since we now elide more ZIR blocks in AstGen, care must be taken in codegen to introduce lexical scopes for every body, not just `block`s. Also, elide a few unnecessary AIR blocks in Sema. --- src/Sema.zig | 6 ++++-- src/codegen/llvm.zig | 12 ++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Sema.zig b/src/Sema.zig index 7bc96c8e21..426c32c2b6 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -11508,6 +11508,7 @@ fn zirSwitchBlockErrUnion(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Comp sub_block.runtime_loop = null; sub_block.runtime_cond = mod.declPtr(child_block.src_decl).toSrcLoc(main_operand_src, mod); sub_block.runtime_index.increment(); + sub_block.need_debug_scope = null; // this body is emitted regardless defer sub_block.instructions.deinit(gpa); try sema.analyzeBodyRuntimeBreak(&sub_block, non_error_case.body); @@ -12243,6 +12244,7 @@ fn analyzeSwitchRuntimeBlock( case_block.runtime_loop = null; case_block.runtime_cond = mod.declPtr(child_block.src_decl).toSrcLoc(operand_src, mod); case_block.runtime_index.increment(); + case_block.need_debug_scope = null; // this body is emitted regardless defer case_block.instructions.deinit(gpa); var extra_index: usize = special.end; @@ -18967,8 +18969,7 @@ fn zirCondbr( const body = if (cond_val.toBool()) then_body else else_body; try sema.maybeErrorUnwrapCondbr(parent_block, body, extra.data.condition, cond_src); - // We use `analyzeBodyInner` since we want to propagate any possible - // `error.ComptimeBreak` to the caller. + // We use `analyzeBodyInner` since we want to propagate any comptime control flow to the caller. return sema.analyzeBodyInner(parent_block, body); } @@ -18980,6 +18981,7 @@ fn zirCondbr( sub_block.runtime_loop = null; sub_block.runtime_cond = mod.declPtr(parent_block.src_decl).toSrcLoc(cond_src, mod); sub_block.runtime_index.increment(); + sub_block.need_debug_scope = null; // this body is emitted regardless defer sub_block.instructions.deinit(gpa); try sema.analyzeBodyRuntimeBreak(&sub_block, then_body); diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 06d5e16eb5..82b204cacb 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -5903,10 +5903,10 @@ pub const FuncGen = struct { _ = try self.wip.brCond(cond, then_block, else_block); self.wip.cursor = .{ .block = then_block }; - try self.genBody(then_body); + try self.genBodyDebugScope(then_body); self.wip.cursor = .{ .block = else_block }; - try self.genBody(else_body); + try self.genBodyDebugScope(else_body); // No need to reset the insert cursor since this instruction is noreturn. return .none; @@ -5987,7 +5987,7 @@ pub const FuncGen = struct { _ = try fg.wip.brCond(is_err, return_block, continue_block); fg.wip.cursor = .{ .block = return_block }; - try fg.genBody(body); + try fg.genBodyDebugScope(body); fg.wip.cursor = .{ .block = continue_block }; } @@ -6060,13 +6060,13 @@ pub const FuncGen = struct { } self.wip.cursor = .{ .block = case_block }; - try self.genBody(case_body); + try self.genBodyDebugScope(case_body); } self.wip.cursor = .{ .block = else_block }; const else_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]); if (else_body.len != 0) { - try self.genBody(else_body); + try self.genBodyDebugScope(else_body); } else { _ = try self.wip.@"unreachable"(); } @@ -6085,7 +6085,7 @@ pub const FuncGen = struct { _ = try self.wip.br(loop_block); self.wip.cursor = .{ .block = loop_block }; - try self.genBody(body); + try self.genBodyDebugScope(body); // TODO instead of this logic, change AIR to have the property that // every block is guaranteed to end with a noreturn instruction. From 408a08708fbb0abe03bfeaa835666a72d79c843d Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 1 Mar 2024 01:20:50 +0000 Subject: [PATCH 6/9] Autodoc: do not rely on redundant block within function body --- src/Autodoc.zig | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Autodoc.zig b/src/Autodoc.zig index f321fadbb7..480651fb20 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -5137,7 +5137,7 @@ fn analyzeFancyFunction( file, scope, parent_src, - fn_info.body[0], + fn_info.body, call_ctx, ); } else { @@ -5303,7 +5303,7 @@ fn analyzeFunction( file, scope, parent_src, - fn_info.body[0], + fn_info.body, call_ctx, ); } else { @@ -5350,17 +5350,10 @@ fn getGenericReturnType( file: *File, scope: *Scope, parent_src: SrcLocInfo, // function decl line - body_main_block: Zir.Inst.Index, + body: []const Zir.Inst.Index, call_ctx: ?*const CallContext, ) !DocData.Expr { const tags = file.zir.instructions.items(.tag); - const data = file.zir.instructions.items(.data); - - // We expect `body_main_block` to be the first instruction - // inside the function body, and for it to be a block instruction. - const pl_node = data[@intFromEnum(body_main_block)].pl_node; - const extra = file.zir.extraData(Zir.Inst.Block, pl_node.payload_index); - const body = file.zir.bodySlice(extra.end, extra.data.body_len); if (body.len >= 4) { const maybe_ret_inst = body[body.len - 4]; switch (tags[@intFromEnum(maybe_ret_inst)]) { From eefa60e376ba44e163a501143ff44dde445b4bfe Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 1 Mar 2024 06:01:53 +0000 Subject: [PATCH 7/9] AstGen: optimize ZIR for `-1` literal --- lib/std/zig/AstGen.zig | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index 0968d85a6a..61ff3413a1 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -8458,7 +8458,14 @@ fn numberLiteral(gz: *GenZir, ri: ResultInfo, node: Ast.Node.Index, source_node: try astgen.errNoteTok(num_token, "use '-0.0' for a floating-point signed zero", .{}), }, ), - 1 => .one, + 1 => { + // Handle the negation here! + const result: Zir.Inst.Ref = switch (sign) { + .positive => .one, + .negative => .negative_one, + }; + return rvalue(gz, ri, result, source_node); + }, else => try gz.addInt(num), }, .big_int => |base| big: { From 36d0afbf2871ddaa6717807ae0050cb06cb2cd0d Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 1 Mar 2024 06:02:25 +0000 Subject: [PATCH 8/9] Autodoc: handle more direct int value Refs --- src/Autodoc.zig | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/Autodoc.zig b/src/Autodoc.zig index 480651fb20..9e8ec9a469 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -5669,6 +5669,42 @@ fn walkRef( .expr = .{ .int = .{ .value = 1 } }, }; }, + .negative_one => { + return DocData.WalkResult{ + .typeRef = .{ .type = @intFromEnum(Ref.comptime_int_type) }, + .expr = .{ .int = .{ .value = 1, .negated = true } }, + }; + }, + .zero_usize => { + return DocData.WalkResult{ + .typeRef = .{ .type = @intFromEnum(Ref.usize_type) }, + .expr = .{ .int = .{ .value = 0 } }, + }; + }, + .one_usize => { + return DocData.WalkResult{ + .typeRef = .{ .type = @intFromEnum(Ref.usize_type) }, + .expr = .{ .int = .{ .value = 1 } }, + }; + }, + .zero_u8 => { + return DocData.WalkResult{ + .typeRef = .{ .type = @intFromEnum(Ref.u8_type) }, + .expr = .{ .int = .{ .value = 0 } }, + }; + }, + .one_u8 => { + return DocData.WalkResult{ + .typeRef = .{ .type = @intFromEnum(Ref.u8_type) }, + .expr = .{ .int = .{ .value = 1 } }, + }; + }, + .four_u8 => { + return DocData.WalkResult{ + .typeRef = .{ .type = @intFromEnum(Ref.u8_type) }, + .expr = .{ .int = .{ .value = 4 } }, + }; + }, .void_value => { return DocData.WalkResult{ @@ -5700,18 +5736,6 @@ fn walkRef( .empty_struct => { return DocData.WalkResult{ .expr = .{ .@"struct" = &.{} } }; }, - .zero_usize => { - return DocData.WalkResult{ - .typeRef = .{ .type = @intFromEnum(Ref.usize_type) }, - .expr = .{ .int = .{ .value = 0 } }, - }; - }, - .one_usize => { - return DocData.WalkResult{ - .typeRef = .{ .type = @intFromEnum(Ref.usize_type) }, - .expr = .{ .int = .{ .value = 1 } }, - }; - }, .calling_convention_type => { return DocData.WalkResult{ .typeRef = .{ .type = @intFromEnum(Ref.type_type) }, From 6a87e42c2ea070a6273317bbb005029d95ceae49 Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 1 Mar 2024 23:45:11 +0000 Subject: [PATCH 9/9] AstGen: fix latent bug causing incorrect elision of `dbg_stmt` instructions Thanks to jacobly0 for figuring this out. The chain of events causing the failure this triggered is as follows. * As of a recent commit, certain bodies no longer emit a redundant `block`, meaning there are more likely to be "interesting" instructions (i.e. not blocks) at the end of parent GenZir scopes. * When emitting the first `dbg_stmt` in such a body, the elision logic incorrectly looks at a tag from an instruction in an enclosing scope. * The tag of this instruction may be `undefined`, meaning that in unsafe builds it may be incorrectly identified as a `dbg_stmt` instruction. * This instruction from another body is clobbered rather than emitting an actual `dbg_stmt` instruction. Note that this does not produce invalid ZIR, since the creator of the undefined instruction replaces the previously-undefined payload later. --- lib/std/zig/AstGen.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index 61ff3413a1..af9bde4917 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -13550,7 +13550,7 @@ fn countBodyLenAfterFixups(astgen: *AstGen, body: []const Zir.Inst.Index) u32 { fn emitDbgStmt(gz: *GenZir, lc: LineColumn) !void { if (gz.is_comptime) return; - if (gz.instructions.items.len > 0) { + if (gz.instructions.items.len > gz.instructions_top) { const astgen = gz.astgen; const last = gz.instructions.items[gz.instructions.items.len - 1]; if (astgen.instructions.items(.tag)[@intFromEnum(last)] == .dbg_stmt) { @@ -13576,7 +13576,7 @@ fn emitDbgStmt(gz: *GenZir, lc: LineColumn) !void { /// instructions; fix up Sema so we don't need it! fn emitDbgStmtForceCurrentIndex(gz: *GenZir, lc: LineColumn) !void { const astgen = gz.astgen; - if (gz.instructions.items.len > 0 and + if (gz.instructions.items.len > gz.instructions_top and @intFromEnum(gz.instructions.items[gz.instructions.items.len - 1]) == astgen.instructions.len - 1) { const last = astgen.instructions.len - 1;