diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index e1c90b34c8..9dcefd2b11 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -675,6 +675,25 @@ pub const ExternOptions = struct { is_thread_local: bool = false, }; +/// This data structure is used by the Zig language code generation and +/// therefore must be kept in sync with the compiler implementation. +pub const BranchHint = enum(u3) { + /// Equivalent to no hint given. + none, + /// This branch of control flow is more likely to be reached than its peers. + /// The optimizer should optimize for reaching it. + likely, + /// This branch of control flow is less likely to be reached than its peers. + /// The optimizer should optimize for not reaching it. + unlikely, + /// This branch of control flow is unlikely to *ever* be reached. + /// The optimizer may place it in a different page of memory to optimize other branches. + cold, + /// It is difficult to predict whether this branch of control flow will be reached. + /// The optimizer should avoid branching behavior with expensive mispredictions. + unpredictable, +}; + /// This enum is set by the compiler and communicates which compiler backend is /// used to produce machine code. /// Think carefully before deciding to observe this value. Nearly all code should diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index f11143e9cd..83765c8ad4 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -811,18 +811,18 @@ fn expr(gz: *GenZir, scope: *Scope, ri: ResultInfo, node: Ast.Node.Index) InnerE .builtin_call_two, .builtin_call_two_comma => { if (node_datas[node].lhs == 0) { const params = [_]Ast.Node.Index{}; - return builtinCall(gz, scope, ri, node, ¶ms); + return builtinCall(gz, scope, ri, node, ¶ms, false); } else if (node_datas[node].rhs == 0) { const params = [_]Ast.Node.Index{node_datas[node].lhs}; - return builtinCall(gz, scope, ri, node, ¶ms); + return builtinCall(gz, scope, ri, node, ¶ms, false); } else { const params = [_]Ast.Node.Index{ node_datas[node].lhs, node_datas[node].rhs }; - return builtinCall(gz, scope, ri, node, ¶ms); + return builtinCall(gz, scope, ri, node, ¶ms, false); } }, .builtin_call, .builtin_call_comma => { const params = tree.extra_data[node_datas[node].lhs..node_datas[node].rhs]; - return builtinCall(gz, scope, ri, node, params); + return builtinCall(gz, scope, ri, node, params, false); }, .call_one, @@ -1017,16 +1017,16 @@ fn expr(gz: *GenZir, scope: *Scope, ri: ResultInfo, node: Ast.Node.Index) InnerE .block_two, .block_two_semicolon => { const statements = [2]Ast.Node.Index{ node_datas[node].lhs, node_datas[node].rhs }; if (node_datas[node].lhs == 0) { - return blockExpr(gz, scope, ri, node, statements[0..0]); + return blockExpr(gz, scope, ri, node, statements[0..0], .normal); } else if (node_datas[node].rhs == 0) { - return blockExpr(gz, scope, ri, node, statements[0..1]); + return blockExpr(gz, scope, ri, node, statements[0..1], .normal); } else { - return blockExpr(gz, scope, ri, node, statements[0..2]); + return blockExpr(gz, scope, ri, node, statements[0..2], .normal); } }, .block, .block_semicolon => { const statements = tree.extra_data[node_datas[node].lhs..node_datas[node].rhs]; - return blockExpr(gz, scope, ri, node, statements); + return blockExpr(gz, scope, ri, node, statements, .normal); }, .enum_literal => return simpleStrTok(gz, ri, main_tokens[node], node, .enum_literal), .error_value => return simpleStrTok(gz, ri, node_datas[node].rhs, node, .error_value), @@ -1241,7 +1241,7 @@ fn suspendExpr( suspend_scope.suspend_node = node; defer suspend_scope.unstack(); - const body_result = try fullBodyExpr(&suspend_scope, &suspend_scope.base, .{ .rl = .none }, body_node); + const body_result = try fullBodyExpr(&suspend_scope, &suspend_scope.base, .{ .rl = .none }, body_node, .normal); if (!gz.refIsNoReturn(body_result)) { _ = try suspend_scope.addBreak(.break_inline, suspend_inst, .void_value); } @@ -1362,7 +1362,7 @@ fn fnProtoExpr( assert(param_type_node != 0); var param_gz = block_scope.makeSubBlock(scope); defer param_gz.unstack(); - const param_type = try fullBodyExpr(¶m_gz, scope, coerced_type_ri, param_type_node); + const param_type = try fullBodyExpr(¶m_gz, scope, coerced_type_ri, param_type_node, .normal); const param_inst_expected: Zir.Inst.Index = @enumFromInt(astgen.instructions.len + 1); _ = try param_gz.addBreakWithSrcNode(.break_inline, param_inst_expected, param_type, param_type_node); const main_tokens = tree.nodes.items(.main_token); @@ -2040,13 +2040,13 @@ fn comptimeExpr( else stmts[0..2]; - const block_ref = try labeledBlockExpr(gz, scope, ty_only_ri, node, stmt_slice, true); + const block_ref = try labeledBlockExpr(gz, scope, ty_only_ri, node, stmt_slice, true, .normal); return rvalue(gz, ri, block_ref, node); }, .block, .block_semicolon => { const stmts = tree.extra_data[node_datas[node].lhs..node_datas[node].rhs]; // Replace result location and copy back later - see above. - const block_ref = try labeledBlockExpr(gz, scope, ty_only_ri, node, stmts, true); + const block_ref = try labeledBlockExpr(gz, scope, ty_only_ri, node, stmts, true, .normal); return rvalue(gz, ri, block_ref, node); }, else => unreachable, @@ -2071,7 +2071,7 @@ fn comptimeExpr( else .none, }; - const block_result = try fullBodyExpr(&block_scope, scope, ty_only_ri, node); + const block_result = try fullBodyExpr(&block_scope, scope, ty_only_ri, node, .normal); if (!gz.refIsNoReturn(block_result)) { _ = try block_scope.addBreak(.@"break", block_inst, block_result); } @@ -2311,6 +2311,7 @@ fn fullBodyExpr( scope: *Scope, ri: ResultInfo, node: Ast.Node.Index, + block_kind: BlockKind, ) InnerError!Zir.Inst.Ref { const tree = gz.astgen.tree; const node_tags = tree.nodes.items(.tag); @@ -2340,21 +2341,24 @@ fn fullBodyExpr( // Labeled blocks are tricky - forwarding result location information properly is non-trivial, // plus if this block is exited with a `break_inline` we aren't allowed multiple breaks. This // case is rare, so just treat it as a normal expression and create a nested block. - return expr(gz, scope, ri, node); + return blockExpr(gz, scope, ri, node, statements, block_kind); } var sub_gz = gz.makeSubBlock(scope); - try blockExprStmts(&sub_gz, &sub_gz.base, statements); + try blockExprStmts(&sub_gz, &sub_gz.base, statements, block_kind); return rvalue(gz, ri, .void_value, node); } +const BlockKind = enum { normal, allow_branch_hint }; + fn blockExpr( gz: *GenZir, scope: *Scope, ri: ResultInfo, block_node: Ast.Node.Index, statements: []const Ast.Node.Index, + kind: BlockKind, ) InnerError!Zir.Inst.Ref { const astgen = gz.astgen; const tree = astgen.tree; @@ -2365,7 +2369,7 @@ fn blockExpr( if (token_tags[lbrace - 1] == .colon and token_tags[lbrace - 2] == .identifier) { - return labeledBlockExpr(gz, scope, ri, block_node, statements, false); + return labeledBlockExpr(gz, scope, ri, block_node, statements, false, kind); } if (!gz.is_comptime) { @@ -2380,7 +2384,7 @@ fn blockExpr( var block_scope = gz.makeSubBlock(scope); defer block_scope.unstack(); - try blockExprStmts(&block_scope, &block_scope.base, statements); + try blockExprStmts(&block_scope, &block_scope.base, statements, kind); if (!block_scope.endsWithNoReturn()) { // As our last action before the break, "pop" the error trace if needed @@ -2391,7 +2395,7 @@ fn blockExpr( try block_scope.setBlockBody(block_inst); } else { var sub_gz = gz.makeSubBlock(scope); - try blockExprStmts(&sub_gz, &sub_gz.base, statements); + try blockExprStmts(&sub_gz, &sub_gz.base, statements, kind); } return rvalue(gz, ri, .void_value, block_node); @@ -2436,6 +2440,7 @@ fn labeledBlockExpr( block_node: Ast.Node.Index, statements: []const Ast.Node.Index, force_comptime: bool, + block_kind: BlockKind, ) InnerError!Zir.Inst.Ref { const astgen = gz.astgen; const tree = astgen.tree; @@ -2476,7 +2481,7 @@ fn labeledBlockExpr( if (force_comptime) block_scope.is_comptime = true; defer block_scope.unstack(); - try blockExprStmts(&block_scope, &block_scope.base, statements); + try blockExprStmts(&block_scope, &block_scope.base, statements, block_kind); if (!block_scope.endsWithNoReturn()) { // As our last action before the return, "pop" the error trace if needed _ = try gz.addRestoreErrRetIndex(.{ .block = block_inst }, .always, block_node); @@ -2495,7 +2500,7 @@ fn labeledBlockExpr( } } -fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Node.Index) !void { +fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Node.Index, block_kind: BlockKind) !void { const astgen = gz.astgen; const tree = astgen.tree; const node_tags = tree.nodes.items(.tag); @@ -2509,7 +2514,7 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod var noreturn_src_node: Ast.Node.Index = 0; var scope = parent_scope; - for (statements) |statement| { + for (statements, 0..) |statement, stmt_idx| { if (noreturn_src_node != 0) { try astgen.appendErrorNodeNotes( statement, @@ -2524,6 +2529,10 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod }, ); } + const allow_branch_hint = switch (block_kind) { + .normal => false, + .allow_branch_hint => stmt_idx == 0, + }; var inner_node = statement; while (true) { switch (node_tags[inner_node]) { @@ -2567,6 +2576,30 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod .for_simple, .@"for", => _ = try forExpr(gz, scope, .{ .rl = .none }, inner_node, tree.fullFor(inner_node).?, true), + // These cases are here to allow branch hints. + .builtin_call_two, .builtin_call_two_comma => { + try emitDbgNode(gz, inner_node); + const ri: ResultInfo = .{ .rl = .none }; + const result = if (node_data[inner_node].lhs == 0) r: { + break :r try builtinCall(gz, scope, ri, inner_node, &.{}, allow_branch_hint); + } else if (node_data[inner_node].rhs == 0) r: { + break :r try builtinCall(gz, scope, ri, inner_node, &.{node_data[inner_node].lhs}, allow_branch_hint); + } else r: { + break :r try builtinCall(gz, scope, ri, inner_node, &.{ + node_data[inner_node].lhs, + node_data[inner_node].rhs, + }, allow_branch_hint); + }; + noreturn_src_node = try addEnsureResult(gz, result, inner_node); + }, + .builtin_call, .builtin_call_comma => { + try emitDbgNode(gz, inner_node); + const ri: ResultInfo = .{ .rl = .none }; + const params = tree.extra_data[node_data[inner_node].lhs..node_data[inner_node].rhs]; + const result = try builtinCall(gz, scope, ri, inner_node, params, allow_branch_hint); + noreturn_src_node = try addEnsureResult(gz, result, inner_node); + }, + else => noreturn_src_node = try unusedResultExpr(gz, scope, inner_node), // zig fmt: on } @@ -2827,7 +2860,7 @@ fn addEnsureResult(gz: *GenZir, maybe_unused_result: Zir.Inst.Ref, statement: As .fence, .set_float_mode, .set_align_stack, - .set_cold, + .branch_hint, => break :b true, else => break :b false, }, @@ -4154,7 +4187,7 @@ fn fnDecl( assert(param_type_node != 0); var param_gz = decl_gz.makeSubBlock(scope); defer param_gz.unstack(); - const param_type = try fullBodyExpr(¶m_gz, params_scope, coerced_type_ri, param_type_node); + const param_type = try fullBodyExpr(¶m_gz, params_scope, coerced_type_ri, param_type_node, .normal); const param_inst_expected: Zir.Inst.Index = @enumFromInt(astgen.instructions.len + 1); _ = try param_gz.addBreakWithSrcNode(.break_inline, param_inst_expected, param_type, param_type_node); @@ -4276,7 +4309,7 @@ fn fnDecl( var ret_gz = decl_gz.makeSubBlock(params_scope); defer ret_gz.unstack(); const ret_ref: Zir.Inst.Ref = inst: { - const inst = try fullBodyExpr(&ret_gz, params_scope, coerced_type_ri, fn_proto.ast.return_type); + const inst = try fullBodyExpr(&ret_gz, params_scope, coerced_type_ri, fn_proto.ast.return_type, .normal); if (ret_gz.instructionsSlice().len == 0) { // In this case we will send a len=0 body which can be encoded more efficiently. break :inst inst; @@ -4351,7 +4384,7 @@ fn fnDecl( const lbrace_line = astgen.source_line - decl_gz.decl_line; const lbrace_column = astgen.source_column; - _ = try fullBodyExpr(&fn_gz, params_scope, .{ .rl = .none }, body_node); + _ = try fullBodyExpr(&fn_gz, params_scope, .{ .rl = .none }, body_node, .allow_branch_hint); try checkUsed(gz, &fn_gz.base, params_scope); if (!fn_gz.endsWithNoReturn()) { @@ -4552,20 +4585,20 @@ fn globalVarDecl( var align_gz = block_scope.makeSubBlock(scope); if (var_decl.ast.align_node != 0) { - const align_inst = try fullBodyExpr(&align_gz, &align_gz.base, coerced_align_ri, var_decl.ast.align_node); + const align_inst = try fullBodyExpr(&align_gz, &align_gz.base, coerced_align_ri, var_decl.ast.align_node, .normal); _ = try align_gz.addBreakWithSrcNode(.break_inline, decl_inst, align_inst, node); } var linksection_gz = align_gz.makeSubBlock(scope); if (var_decl.ast.section_node != 0) { - const linksection_inst = try fullBodyExpr(&linksection_gz, &linksection_gz.base, coerced_linksection_ri, var_decl.ast.section_node); + const linksection_inst = try fullBodyExpr(&linksection_gz, &linksection_gz.base, coerced_linksection_ri, var_decl.ast.section_node, .normal); _ = try linksection_gz.addBreakWithSrcNode(.break_inline, decl_inst, linksection_inst, node); } var addrspace_gz = linksection_gz.makeSubBlock(scope); if (var_decl.ast.addrspace_node != 0) { const addrspace_ty = try addrspace_gz.addBuiltinValue(var_decl.ast.addrspace_node, .address_space); - const addrspace_inst = try fullBodyExpr(&addrspace_gz, &addrspace_gz.base, .{ .rl = .{ .coerced_ty = addrspace_ty } }, var_decl.ast.addrspace_node); + const addrspace_inst = try fullBodyExpr(&addrspace_gz, &addrspace_gz.base, .{ .rl = .{ .coerced_ty = addrspace_ty } }, var_decl.ast.addrspace_node, .normal); _ = try addrspace_gz.addBreakWithSrcNode(.break_inline, decl_inst, addrspace_inst, node); } @@ -4622,7 +4655,7 @@ fn comptimeDecl( }; defer decl_block.unstack(); - const block_result = try fullBodyExpr(&decl_block, &decl_block.base, .{ .rl = .none }, body_node); + const block_result = try fullBodyExpr(&decl_block, &decl_block.base, .{ .rl = .none }, body_node, .normal); if (decl_block.isEmpty() or !decl_block.refIsNoReturn(block_result)) { _ = try decl_block.addBreak(.break_inline, decl_inst, .void_value); } @@ -4843,7 +4876,7 @@ fn testDecl( const lbrace_line = astgen.source_line - decl_block.decl_line; const lbrace_column = astgen.source_column; - const block_result = try fullBodyExpr(&fn_block, &fn_block.base, .{ .rl = .none }, body_node); + const block_result = try fullBodyExpr(&fn_block, &fn_block.base, .{ .rl = .none }, body_node, .normal); if (fn_block.isEmpty() or !fn_block.refIsNoReturn(block_result)) { // As our last action before the return, "pop" the error trace if needed @@ -6112,7 +6145,7 @@ fn orelseCatchExpr( break :blk &err_val_scope.base; }; - const else_result = try fullBodyExpr(&else_scope, else_sub_scope, block_scope.break_result_info, rhs); + const else_result = try fullBodyExpr(&else_scope, else_sub_scope, block_scope.break_result_info, rhs, .allow_branch_hint); if (!else_scope.endsWithNoReturn()) { // As our last action before the break, "pop" the error trace if needed if (do_err_trace) @@ -6280,7 +6313,7 @@ fn boolBinOp( var rhs_scope = gz.makeSubBlock(scope); defer rhs_scope.unstack(); - const rhs = try fullBodyExpr(&rhs_scope, &rhs_scope.base, coerced_bool_ri, node_datas[node].rhs); + const rhs = try fullBodyExpr(&rhs_scope, &rhs_scope.base, coerced_bool_ri, node_datas[node].rhs, .allow_branch_hint); if (!gz.refIsNoReturn(rhs)) { _ = try rhs_scope.addBreakWithSrcNode(.break_inline, bool_br, rhs, node_datas[node].rhs); } @@ -6424,7 +6457,7 @@ fn ifExpr( } }; - const then_result = try fullBodyExpr(&then_scope, then_sub_scope, block_scope.break_result_info, then_node); + const then_result = try fullBodyExpr(&then_scope, then_sub_scope, block_scope.break_result_info, then_node, .allow_branch_hint); try checkUsed(parent_gz, &then_scope.base, then_sub_scope); if (!then_scope.endsWithNoReturn()) { _ = try then_scope.addBreakWithSrcNode(.@"break", block, then_result, then_node); @@ -6466,7 +6499,7 @@ fn ifExpr( break :s &else_scope.base; } }; - const else_result = try fullBodyExpr(&else_scope, sub_scope, block_scope.break_result_info, else_node); + const else_result = try fullBodyExpr(&else_scope, sub_scope, block_scope.break_result_info, else_node, .allow_branch_hint); if (!else_scope.endsWithNoReturn()) { // As our last action before the break, "pop" the error trace if needed if (do_err_trace) @@ -6575,7 +6608,7 @@ fn whileExpr( } = c: { if (while_full.error_token) |_| { const cond_ri: ResultInfo = .{ .rl = if (payload_is_ref) .ref else .none }; - const err_union = try fullBodyExpr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr); + const err_union = try fullBodyExpr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr, .normal); const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_err_ptr else .is_non_err; break :c .{ .inst = err_union, @@ -6583,14 +6616,14 @@ fn whileExpr( }; } else if (while_full.payload_token) |_| { const cond_ri: ResultInfo = .{ .rl = if (payload_is_ref) .ref else .none }; - const optional = try fullBodyExpr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr); + const optional = try fullBodyExpr(&cond_scope, &cond_scope.base, cond_ri, while_full.ast.cond_expr, .normal); const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_null_ptr else .is_non_null; break :c .{ .inst = optional, .bool_bit = try cond_scope.addUnNode(tag, optional, while_full.ast.cond_expr), }; } else { - const cond = try fullBodyExpr(&cond_scope, &cond_scope.base, coerced_bool_ri, while_full.ast.cond_expr); + const cond = try fullBodyExpr(&cond_scope, &cond_scope.base, coerced_bool_ri, while_full.ast.cond_expr, .normal); break :c .{ .inst = cond, .bool_bit = cond, @@ -6715,7 +6748,7 @@ fn whileExpr( continue_scope.instructions_top = continue_scope.instructions.items.len; { try emitDbgNode(&continue_scope, then_node); - const unused_result = try fullBodyExpr(&continue_scope, &continue_scope.base, .{ .rl = .none }, then_node); + const unused_result = try fullBodyExpr(&continue_scope, &continue_scope.base, .{ .rl = .none }, then_node, .allow_branch_hint); _ = try addEnsureResult(&continue_scope, unused_result, then_node); } try checkUsed(parent_gz, &then_scope.base, then_sub_scope); @@ -6761,7 +6794,7 @@ fn whileExpr( // control flow apply to outer loops; not this one. loop_scope.continue_block = .none; loop_scope.break_block = .none; - const else_result = try fullBodyExpr(&else_scope, sub_scope, loop_scope.break_result_info, else_node); + const else_result = try fullBodyExpr(&else_scope, sub_scope, loop_scope.break_result_info, else_node, .allow_branch_hint); if (is_statement) { _ = try addEnsureResult(&else_scope, else_result, else_node); } @@ -7029,7 +7062,7 @@ fn forExpr( break :blk capture_sub_scope; }; - const then_result = try fullBodyExpr(&then_scope, then_sub_scope, .{ .rl = .none }, then_node); + const then_result = try fullBodyExpr(&then_scope, then_sub_scope, .{ .rl = .none }, then_node, .allow_branch_hint); _ = try addEnsureResult(&then_scope, then_result, then_node); try checkUsed(parent_gz, &then_scope.base, then_sub_scope); @@ -7048,7 +7081,7 @@ fn forExpr( // control flow apply to outer loops; not this one. loop_scope.continue_block = .none; loop_scope.break_block = .none; - const else_result = try fullBodyExpr(&else_scope, sub_scope, loop_scope.break_result_info, else_node); + const else_result = try fullBodyExpr(&else_scope, sub_scope, loop_scope.break_result_info, else_node, .allow_branch_hint); if (is_statement) { _ = try addEnsureResult(&else_scope, else_result, else_node); } @@ -7525,7 +7558,7 @@ fn switchExprErrUnion( } const target_expr_node = case.ast.target_expr; - const case_result = try fullBodyExpr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node); + const case_result = try fullBodyExpr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node, .allow_branch_hint); // check capture_scope, not err_scope to avoid false positive unused error capture try checkUsed(parent_gz, &case_scope.base, err_scope.parent); const uses_err = err_scope.used != 0 or err_scope.discarded != 0; @@ -7986,7 +8019,7 @@ fn switchExpr( try case_scope.addDbgVar(.dbg_var_val, dbg_var_tag_name, dbg_var_tag_inst); } const target_expr_node = case.ast.target_expr; - const case_result = try fullBodyExpr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node); + const case_result = try fullBodyExpr(&case_scope, sub_scope, block_scope.break_result_info, target_expr_node, .allow_branch_hint); try checkUsed(parent_gz, &case_scope.base, sub_scope); if (!parent_gz.refIsNoReturn(case_result)) { _ = try case_scope.addBreakWithSrcNode(.@"break", switch_block, case_result, target_expr_node); @@ -9154,6 +9187,7 @@ fn builtinCall( ri: ResultInfo, node: Ast.Node.Index, params: []const Ast.Node.Index, + allow_branch_hint: bool, ) InnerError!Zir.Inst.Ref { const astgen = gz.astgen; const tree = astgen.tree; @@ -9187,6 +9221,18 @@ fn builtinCall( return astgen.failNode(node, "'{s}' outside function scope", .{builtin_name}); switch (info.tag) { + .branch_hint => { + if (!allow_branch_hint) { + return astgen.failNode(node, "'@branchHint' must appear as the first statement in a function or conditional branch", .{}); + } + const hint_ty = try gz.addBuiltinValue(node, .branch_hint); + const hint_val = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = hint_ty } }, params[0]); + _ = try gz.addExtendedPayload(.branch_hint, Zir.Inst.UnNode{ + .node = gz.nodeIndexToRelative(node), + .operand = hint_val, + }); + return rvalue(gz, ri, .void_value, node); + }, .import => { const node_tags = tree.nodes.items(.tag); const operand_node = params[0]; @@ -9294,14 +9340,6 @@ fn builtinCall( }); return rvalue(gz, ri, .void_value, node); }, - .set_cold => { - const order = try expr(gz, scope, ri, params[0]); - _ = try gz.addExtendedPayload(.set_cold, Zir.Inst.UnNode{ - .node = gz.nodeIndexToRelative(node), - .operand = order, - }); - return rvalue(gz, ri, .void_value, node); - }, .src => { // Incorporate the source location into the source hash, so that @@ -9963,7 +10001,7 @@ fn cImport( defer block_scope.unstack(); const block_inst = try gz.makeBlockInst(.c_import, node); - const block_result = try fullBodyExpr(&block_scope, &block_scope.base, .{ .rl = .none }, body_node); + const block_result = try fullBodyExpr(&block_scope, &block_scope.base, .{ .rl = .none }, body_node, .normal); _ = try gz.addUnNode(.ensure_result_used, block_result, node); if (!gz.refIsNoReturn(block_result)) { _ = try block_scope.addBreak(.break_inline, block_inst, .void_value); @@ -10046,7 +10084,7 @@ fn callExpr( defer arg_block.unstack(); // `call_inst` is reused to provide the param type. - const arg_ref = try fullBodyExpr(&arg_block, &arg_block.base, .{ .rl = .{ .coerced_ty = call_inst }, .ctx = .fn_arg }, param_node); + const arg_ref = try fullBodyExpr(&arg_block, &arg_block.base, .{ .rl = .{ .coerced_ty = call_inst }, .ctx = .fn_arg }, param_node, .normal); _ = try arg_block.addBreakWithSrcNode(.break_inline, call_index, arg_ref, param_node); const body = arg_block.instructionsSlice(); diff --git a/lib/std/zig/AstRlAnnotate.zig b/lib/std/zig/AstRlAnnotate.zig index e956ffd2a9..597baa2936 100644 --- a/lib/std/zig/AstRlAnnotate.zig +++ b/lib/std/zig/AstRlAnnotate.zig @@ -829,6 +829,10 @@ fn builtinCall(astrl: *AstRlAnnotate, block: ?*Block, ri: ResultInfo, node: Ast. } switch (info.tag) { .import => return false, + .branch_hint => { + _ = try astrl.expr(args[0], block, ResultInfo.type_only); + return false; + }, .compile_log, .TypeOf => { for (args) |arg_node| { _ = try astrl.expr(arg_node, block, ResultInfo.none); @@ -907,7 +911,6 @@ fn builtinCall(astrl: *AstRlAnnotate, block: ?*Block, ri: ResultInfo, node: Ast. .fence, .set_float_mode, .set_align_stack, - .set_cold, .type_info, .work_item_id, .work_group_size, diff --git a/lib/std/zig/BuiltinFn.zig b/lib/std/zig/BuiltinFn.zig index fc08f9eb85..1da3ffb5a7 100644 --- a/lib/std/zig/BuiltinFn.zig +++ b/lib/std/zig/BuiltinFn.zig @@ -14,6 +14,7 @@ pub const Tag = enum { bit_offset_of, int_from_bool, bit_size_of, + branch_hint, breakpoint, disable_instrumentation, mul_add, @@ -82,7 +83,6 @@ pub const Tag = enum { return_address, select, set_align_stack, - set_cold, set_eval_branch_quota, set_float_mode, set_runtime_safety, @@ -256,6 +256,14 @@ pub const list = list: { .param_count = 1, }, }, + .{ + "@branchHint", + .{ + .tag = .branch_hint, + .param_count = 1, + .illegal_outside_function = true, + }, + }, .{ "@breakpoint", .{ @@ -744,14 +752,6 @@ pub const list = list: { .illegal_outside_function = true, }, }, - .{ - "@setCold", - .{ - .tag = .set_cold, - .param_count = 1, - .illegal_outside_function = true, - }, - }, .{ "@setEvalBranchQuota", .{ diff --git a/lib/std/zig/Zir.zig b/lib/std/zig/Zir.zig index e73fc5fd2d..02eb38f9c4 100644 --- a/lib/std/zig/Zir.zig +++ b/lib/std/zig/Zir.zig @@ -1546,7 +1546,7 @@ pub const Inst = struct { => false, .extended => switch (data.extended.opcode) { - .fence, .set_cold, .breakpoint, .disable_instrumentation => true, + .fence, .branch_hint, .breakpoint, .disable_instrumentation => true, else => false, }, }; @@ -1954,9 +1954,6 @@ pub const Inst = struct { /// Implement builtin `@setAlignStack`. /// `operand` is payload index to `UnNode`. set_align_stack, - /// Implements `@setCold`. - /// `operand` is payload index to `UnNode`. - set_cold, /// Implements the `@errorCast` builtin. /// `operand` is payload index to `BinNode`. `lhs` is dest type, `rhs` is operand. error_cast, @@ -2051,6 +2048,10 @@ pub const Inst = struct { /// `operand` is `src_node: i32`. /// `small` is an `Inst.BuiltinValue`. builtin_value, + /// Provide a `@branchHint` for the current block. + /// `operand` is payload index to `UnNode`. + /// `small` is unused. + branch_hint, pub const InstData = struct { opcode: Extended, @@ -3142,6 +3143,7 @@ pub const Inst = struct { export_options, extern_options, type_info, + branch_hint, // Values calling_convention_c, calling_convention_inline, @@ -3962,7 +3964,6 @@ fn findDeclsInner( .fence, .set_float_mode, .set_align_stack, - .set_cold, .error_cast, .await_nosuspend, .breakpoint, @@ -3986,6 +3987,7 @@ fn findDeclsInner( .closure_get, .field_parent_ptr, .builtin_value, + .branch_hint, => return, // `@TypeOf` has a body. diff --git a/src/Air.zig b/src/Air.zig index 5c559a4088..2e45024919 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -433,13 +433,18 @@ pub const Inst = struct { /// In the case of non-error, control flow proceeds to the next instruction /// after the `try`, with the result of this instruction being the unwrapped /// payload value, as if `unwrap_errunion_payload` was executed on the operand. + /// The error branch is considered to have a branch hint of `.unlikely`. /// Uses the `pl_op` field. Payload is `Try`. @"try", + /// Same as `try` except the error branch hint is `.cold`. + try_cold, /// Same as `try` except the operand is a pointer to an error union, and the /// result is a pointer to the payload. Result is as if `unwrap_errunion_payload_ptr` /// was executed on the operand. /// Uses the `ty_pl` field. Payload is `TryPtr`. try_ptr, + /// Same as `try_ptr` except the error branch hint is `.cold`. + try_ptr_cold, /// Notes the beginning of a source code statement and marks the line and column. /// Result type is always void. /// Uses the `dbg_stmt` field. @@ -1116,11 +1121,20 @@ pub const Call = struct { pub const CondBr = struct { then_body_len: u32, else_body_len: u32, + branch_hints: BranchHints, + pub const BranchHints = packed struct(u32) { + true: std.builtin.BranchHint, + false: std.builtin.BranchHint, + _: u26 = 0, + }; }; /// Trailing: -/// * 0. `Case` for each `cases_len` -/// * 1. the else body, according to `else_body_len`. +/// * 0. `BranchHint` for each `cases_len + 1`. bit-packed into `u32` +/// elems such that each `u32` contains up to 10x `BranchHint`. +/// LSBs are first case. Final hint is `else`. +/// * 1. `Case` for each `cases_len` +/// * 2. the else body, according to `else_body_len`. pub const SwitchBr = struct { cases_len: u32, else_body_len: u32, @@ -1380,6 +1394,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .ptr_add, .ptr_sub, .try_ptr, + .try_ptr_cold, => return datas[@intFromEnum(inst)].ty_pl.ty.toType(), .not, @@ -1500,7 +1515,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) return air.typeOf(extra.lhs, ip); }, - .@"try" => { + .@"try", .try_cold => { const err_union_ty = air.typeOf(datas[@intFromEnum(inst)].pl_op.operand, ip); return Type.fromInterned(ip.indexToKey(err_union_ty.ip_index).error_union_type.payload_type); }, @@ -1524,9 +1539,8 @@ pub fn extraData(air: Air, comptime T: type, index: usize) struct { data: T, end inline for (fields) |field| { @field(result, field.name) = switch (field.type) { u32 => air.extra[i], - Inst.Ref => @as(Inst.Ref, @enumFromInt(air.extra[i])), - i32 => @as(i32, @bitCast(air.extra[i])), - InternPool.Index => @as(InternPool.Index, @enumFromInt(air.extra[i])), + InternPool.Index, Inst.Ref => @enumFromInt(air.extra[i]), + i32, CondBr.BranchHints => @bitCast(air.extra[i]), else => @compileError("bad field type: " ++ @typeName(field.type)), }; i += 1; @@ -1593,7 +1607,9 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .cond_br, .switch_br, .@"try", + .try_cold, .try_ptr, + .try_ptr_cold, .dbg_stmt, .dbg_inline_block, .dbg_var_ptr, @@ -1796,4 +1812,91 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { }; } +pub const UnwrappedSwitch = struct { + air: *const Air, + operand: Inst.Ref, + cases_len: u32, + else_body_len: u32, + branch_hints_start: u32, + cases_start: u32, + + /// Asserts that `case_idx < us.cases_len`. + pub fn getHint(us: UnwrappedSwitch, case_idx: u32) std.builtin.BranchHint { + assert(case_idx < us.cases_len); + return us.getHintInner(case_idx); + } + pub fn getElseHint(us: UnwrappedSwitch) std.builtin.BranchHint { + return us.getHintInner(us.cases_len); + } + fn getHintInner(us: UnwrappedSwitch, idx: u32) std.builtin.BranchHint { + const bag = us.air.extra[us.branch_hints_start..][idx / 10]; + const bits: u3 = @truncate(bag >> @intCast(3 * (idx % 10))); + return @enumFromInt(bits); + } + + pub fn iterateCases(us: UnwrappedSwitch) CaseIterator { + return .{ + .air = us.air, + .cases_len = us.cases_len, + .else_body_len = us.else_body_len, + .next_case = 0, + .extra_index = us.cases_start, + }; + } + pub const CaseIterator = struct { + air: *const Air, + cases_len: u32, + else_body_len: u32, + next_case: u32, + extra_index: u32, + + pub fn next(it: *CaseIterator) ?Case { + if (it.next_case == it.cases_len) return null; + const idx = it.next_case; + it.next_case += 1; + + const extra = it.air.extraData(SwitchBr.Case, it.extra_index); + var extra_index = extra.end; + const items: []const Inst.Ref = @ptrCast(it.air.extra[extra_index..][0..extra.data.items_len]); + extra_index += items.len; + const body: []const Inst.Index = @ptrCast(it.air.extra[extra_index..][0..extra.data.body_len]); + extra_index += body.len; + it.extra_index = @intCast(extra_index); + + return .{ + .idx = idx, + .items = items, + .body = body, + }; + } + /// Only valid to call once all cases have been iterated, i.e. `next` returns `null`. + /// Returns the body of the "default" (`else`) case. + pub fn elseBody(it: *CaseIterator) []const Inst.Index { + assert(it.next_case == it.cases_len); + return @ptrCast(it.air.extra[it.extra_index..][0..it.else_body_len]); + } + pub const Case = struct { + idx: u32, + items: []const Inst.Ref, + body: []const Inst.Index, + }; + }; +}; + +pub fn unwrapSwitch(air: *const Air, switch_inst: Inst.Index) UnwrappedSwitch { + const inst = air.instructions.get(@intFromEnum(switch_inst)); + assert(inst.tag == .switch_br); + const pl_op = inst.data.pl_op; + const extra = air.extraData(SwitchBr, pl_op.payload); + const hint_bag_count = std.math.divCeil(usize, extra.data.cases_len + 1, 10) catch unreachable; + return .{ + .air = air, + .operand = pl_op.operand, + .cases_len = extra.data.cases_len, + .else_body_len = extra.data.else_body_len, + .branch_hints_start = @intCast(extra.end), + .cases_start = @intCast(extra.end + hint_bag_count), + }; +} + pub const typesFullyResolved = @import("Air/types_resolved.zig").typesFullyResolved; diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index 4b92a3a94f..f51e4c2aea 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -344,7 +344,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { if (!checkRef(data.pl_op.operand, zcu)) return false; }, - .@"try" => { + .@"try", .try_cold => { const extra = air.extraData(Air.Try, data.pl_op.payload); if (!checkRef(data.pl_op.operand, zcu)) return false; if (!checkBody( @@ -354,7 +354,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { )) return false; }, - .try_ptr => { + .try_ptr, .try_ptr_cold => { const extra = air.extraData(Air.TryPtr, data.ty_pl.payload); if (!checkType(data.ty_pl.ty.toType(), zcu)) return false; if (!checkRef(extra.data.ptr, zcu)) return false; @@ -381,27 +381,14 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { }, .switch_br => { - const extra = air.extraData(Air.SwitchBr, data.pl_op.payload); - if (!checkRef(data.pl_op.operand, zcu)) return false; - var extra_index = extra.end; - for (0..extra.data.cases_len) |_| { - const case = air.extraData(Air.SwitchBr.Case, extra_index); - extra_index = case.end; - const items: []const Air.Inst.Ref = @ptrCast(air.extra[extra_index..][0..case.data.items_len]); - extra_index += case.data.items_len; - for (items) |item| if (!checkRef(item, zcu)) return false; - if (!checkBody( - air, - @ptrCast(air.extra[extra_index..][0..case.data.body_len]), - zcu, - )) return false; - extra_index += case.data.body_len; + const switch_br = air.unwrapSwitch(inst); + if (!checkRef(switch_br.operand, zcu)) return false; + var it = switch_br.iterateCases(); + while (it.next()) |case| { + for (case.items) |item| if (!checkRef(item, zcu)) return false; + if (!checkBody(air, case.body, zcu)) return false; } - if (!checkBody( - air, - @ptrCast(air.extra[extra_index..][0..extra.data.else_body_len]), - zcu, - )) return false; + if (!checkBody(air, it.elseBody(), zcu)) return false; }, .assembly => { diff --git a/src/InternPool.zig b/src/InternPool.zig index 7adfae31f4..d0ade7026c 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -2121,6 +2121,17 @@ pub const Key = union(enum) { @atomicStore(FuncAnalysis, analysis_ptr, analysis, .release); } + pub fn setBranchHint(func: Func, ip: *InternPool, hint: std.builtin.BranchHint) void { + const extra_mutex = &ip.getLocal(func.tid).mutate.extra.mutex; + extra_mutex.lock(); + defer extra_mutex.unlock(); + + const analysis_ptr = func.analysisPtr(ip); + var analysis = analysis_ptr.*; + analysis.branch_hint = hint; + @atomicStore(FuncAnalysis, analysis_ptr, analysis, .release); + } + /// Returns a pointer that becomes invalid after any additions to the `InternPool`. fn zirBodyInstPtr(func: Func, ip: *InternPool) *TrackedInst.Index { const extra = ip.getLocalShared(func.tid).extra.acquire(); @@ -5575,7 +5586,7 @@ pub const Tag = enum(u8) { /// to be part of the type of the function. pub const FuncAnalysis = packed struct(u32) { state: State, - is_cold: bool, + branch_hint: std.builtin.BranchHint, is_noinline: bool, calls_or_awaits_errorable_fn: bool, stack_alignment: Alignment, @@ -5583,7 +5594,7 @@ pub const FuncAnalysis = packed struct(u32) { inferred_error_set: bool, disable_instrumentation: bool, - _: u19 = 0, + _: u17 = 0, pub const State = enum(u2) { /// The runtime function has never been referenced. @@ -8636,7 +8647,7 @@ pub fn getFuncDecl( const func_decl_extra_index = addExtraAssumeCapacity(extra, Tag.FuncDecl{ .analysis = .{ .state = .unreferenced, - .is_cold = false, + .branch_hint = .none, .is_noinline = key.is_noinline, .calls_or_awaits_errorable_fn = false, .stack_alignment = .none, @@ -8740,7 +8751,7 @@ pub fn getFuncDeclIes( const func_decl_extra_index = addExtraAssumeCapacity(extra, Tag.FuncDecl{ .analysis = .{ .state = .unreferenced, - .is_cold = false, + .branch_hint = .none, .is_noinline = key.is_noinline, .calls_or_awaits_errorable_fn = false, .stack_alignment = .none, @@ -8932,7 +8943,7 @@ pub fn getFuncInstance( const func_extra_index = addExtraAssumeCapacity(extra, Tag.FuncInstance{ .analysis = .{ .state = .unreferenced, - .is_cold = false, + .branch_hint = .none, .is_noinline = arg.is_noinline, .calls_or_awaits_errorable_fn = false, .stack_alignment = .none, @@ -9032,7 +9043,7 @@ pub fn getFuncInstanceIes( const func_extra_index = addExtraAssumeCapacity(extra, Tag.FuncInstance{ .analysis = .{ .state = .unreferenced, - .is_cold = false, + .branch_hint = .none, .is_noinline = arg.is_noinline, .calls_or_awaits_errorable_fn = false, .stack_alignment = .none, @@ -11853,18 +11864,6 @@ pub fn funcSetDisableInstrumentation(ip: *InternPool, func: Index) void { @atomicStore(FuncAnalysis, analysis_ptr, analysis, .release); } -pub fn funcSetCold(ip: *InternPool, func: Index, is_cold: bool) void { - const unwrapped_func = func.unwrap(ip); - const extra_mutex = &ip.getLocal(unwrapped_func.tid).mutate.extra.mutex; - extra_mutex.lock(); - defer extra_mutex.unlock(); - - const analysis_ptr = ip.funcAnalysisPtr(func); - var analysis = analysis_ptr.*; - analysis.is_cold = is_cold; - @atomicStore(FuncAnalysis, analysis_ptr, analysis, .release); -} - pub fn funcZirBodyInst(ip: *const InternPool, func: Index) TrackedInst.Index { const unwrapped_func = func.unwrap(ip); const item = unwrapped_func.getItem(ip); diff --git a/src/Liveness.zig b/src/Liveness.zig index b75fc402dd..f26ea9a7a7 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -658,10 +658,10 @@ pub fn categorizeOperand( return .complex; }, - .@"try" => { + .@"try", .try_cold => { return .complex; }, - .try_ptr => { + .try_ptr, .try_ptr_cold => { return .complex; }, .loop => { @@ -1254,8 +1254,8 @@ fn analyzeInst( }, .loop => return analyzeInstLoop(a, pass, data, inst), - .@"try" => return analyzeInstCondBr(a, pass, data, inst, .@"try"), - .try_ptr => return analyzeInstCondBr(a, pass, data, inst, .try_ptr), + .@"try", .try_cold => return analyzeInstCondBr(a, pass, data, inst, .@"try"), + .try_ptr, .try_ptr_cold => return analyzeInstCondBr(a, pass, data, inst, .try_ptr), .cond_br => return analyzeInstCondBr(a, pass, data, inst, .cond_br), .switch_br => return analyzeInstSwitchBr(a, pass, data, inst), @@ -1674,21 +1674,18 @@ fn analyzeInstSwitchBr( const inst_datas = a.air.instructions.items(.data); const pl_op = inst_datas[@intFromEnum(inst)].pl_op; const condition = pl_op.operand; - const switch_br = a.air.extraData(Air.SwitchBr, pl_op.payload); + const switch_br = a.air.unwrapSwitch(inst); const gpa = a.gpa; - const ncases = switch_br.data.cases_len; + const ncases = switch_br.cases_len; switch (pass) { .loop_analysis => { - var air_extra_index: usize = switch_br.end; - for (0..ncases) |_| { - const case = a.air.extraData(Air.SwitchBr.Case, air_extra_index); - const case_body: []const Air.Inst.Index = @ptrCast(a.air.extra[case.end + case.data.items_len ..][0..case.data.body_len]); - air_extra_index = case.end + case.data.items_len + case_body.len; - try analyzeBody(a, pass, data, case_body); + var it = switch_br.iterateCases(); + while (it.next()) |case| { + try analyzeBody(a, pass, data, case.body); } { // else - const else_body: []const Air.Inst.Index = @ptrCast(a.air.extra[air_extra_index..][0..switch_br.data.else_body_len]); + const else_body = it.elseBody(); try analyzeBody(a, pass, data, else_body); } }, @@ -1706,16 +1703,13 @@ fn analyzeInstSwitchBr( @memset(case_live_sets, .{}); defer for (case_live_sets) |*live_set| live_set.deinit(gpa); - var air_extra_index: usize = switch_br.end; - for (case_live_sets[0..ncases]) |*live_set| { - const case = a.air.extraData(Air.SwitchBr.Case, air_extra_index); - const case_body: []const Air.Inst.Index = @ptrCast(a.air.extra[case.end + case.data.items_len ..][0..case.data.body_len]); - air_extra_index = case.end + case.data.items_len + case_body.len; - try analyzeBody(a, pass, data, case_body); - live_set.* = data.live_set.move(); + var case_it = switch_br.iterateCases(); + while (case_it.next()) |case| { + try analyzeBody(a, pass, data, case.body); + case_live_sets[case.idx] = data.live_set.move(); } { // else - const else_body: []const Air.Inst.Index = @ptrCast(a.air.extra[air_extra_index..][0..switch_br.data.else_body_len]); + const else_body = case_it.elseBody(); try analyzeBody(a, pass, data, else_body); case_live_sets[ncases] = data.live_set.move(); } diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig index 7a9959481a..bcd60d72c8 100644 --- a/src/Liveness/Verify.zig +++ b/src/Liveness/Verify.zig @@ -374,7 +374,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { }, // control flow - .@"try" => { + .@"try", .try_cold => { const pl_op = data[@intFromEnum(inst)].pl_op; const extra = self.air.extraData(Air.Try, pl_op.payload); const try_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]); @@ -396,7 +396,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { try self.verifyInst(inst); }, - .try_ptr => { + .try_ptr, .try_ptr_cold => { const ty_pl = data[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.TryPtr, ty_pl.payload); const try_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]); @@ -509,44 +509,33 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { try self.verifyInst(inst); }, .switch_br => { - const pl_op = data[@intFromEnum(inst)].pl_op; - const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); - var extra_index = switch_br.end; - var case_i: u32 = 0; + const switch_br = self.air.unwrapSwitch(inst); const switch_br_liveness = try self.liveness.getSwitchBr( self.gpa, inst, - switch_br.data.cases_len + 1, + switch_br.cases_len + 1, ); defer self.gpa.free(switch_br_liveness.deaths); - try self.verifyOperand(inst, pl_op.operand, self.liveness.operandDies(inst, 0)); + try self.verifyOperand(inst, switch_br.operand, self.liveness.operandDies(inst, 0)); var live = self.live.move(); defer live.deinit(self.gpa); - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items = @as( - []const Air.Inst.Ref, - @ptrCast(self.air.extra[case.end..][0..case.data.items_len]), - ); - const case_body: []const Air.Inst.Index = @ptrCast(self.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + items.len + case_body.len; - + var it = switch_br.iterateCases(); + while (it.next()) |case| { self.live.deinit(self.gpa); self.live = try live.clone(self.gpa); - for (switch_br_liveness.deaths[case_i]) |death| try self.verifyDeath(inst, death); - try self.verifyBody(case_body); + for (switch_br_liveness.deaths[case.idx]) |death| try self.verifyDeath(inst, death); + try self.verifyBody(case.body); } - const else_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]); + const else_body = it.elseBody(); if (else_body.len > 0) { self.live.deinit(self.gpa); self.live = try live.clone(self.gpa); - - for (switch_br_liveness.deaths[case_i]) |death| try self.verifyDeath(inst, death); + for (switch_br_liveness.deaths[switch_br.cases_len]) |death| try self.verifyDeath(inst, death); try self.verifyBody(else_body); } diff --git a/src/Sema.zig b/src/Sema.zig index 024ec8961d..ef72016b7b 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -118,6 +118,10 @@ dependencies: std.AutoArrayHashMapUnmanaged(InternPool.Dependee, void) = .{}, /// by `analyzeCall`. allow_memoize: bool = true, +/// The `BranchHint` for the current branch of runtime control flow. +/// This state is on `Sema` so that `cold` hints can be propagated up through blocks with less special handling. +branch_hint: ?std.builtin.BranchHint = null, + const MaybeComptimeAlloc = struct { /// The runtime index of the `alloc` instruction. runtime_index: Value.RuntimeIndex, @@ -892,7 +896,12 @@ pub fn deinit(sema: *Sema) void { /// Performs semantic analysis of a ZIR body which is behind a runtime condition. If comptime /// control flow happens here, Sema will convert it to runtime control flow by introducing post-hoc /// blocks where necessary. -fn analyzeBodyRuntimeBreak(sema: *Sema, block: *Block, body: []const Zir.Inst.Index) !void { +/// Returns the branch hint for this branch. +fn analyzeBodyRuntimeBreak(sema: *Sema, block: *Block, body: []const Zir.Inst.Index) !std.builtin.BranchHint { + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint; + sema.branch_hint = null; + sema.analyzeBodyInner(block, body) catch |err| switch (err) { error.ComptimeBreak => { const zir_datas = sema.code.instructions.items(.data); @@ -902,6 +911,8 @@ fn analyzeBodyRuntimeBreak(sema: *Sema, block: *Block, body: []const Zir.Inst.In }, else => |e| return e, }; + + return sema.branch_hint orelse .none; } /// Semantically analyze a ZIR function body. It is guranteed by AstGen that such a body cannot @@ -1304,11 +1315,6 @@ fn analyzeBodyInner( i += 1; continue; }, - .set_cold => { - try sema.zirSetCold(block, extended); - i += 1; - continue; - }, .breakpoint => { if (!block.is_comptime) { _ = try block.addNoOp(.breakpoint); @@ -1326,6 +1332,11 @@ fn analyzeBodyInner( i += 1; continue; }, + .branch_hint => { + try sema.zirBranchHint(block, extended); + i += 1; + continue; + }, .value_placeholder => unreachable, // never appears in a body .field_parent_ptr => try sema.zirFieldParentPtr(block, extended), .builtin_value => try sema.zirBuiltinValue(extended), @@ -5727,6 +5738,13 @@ fn zirPanic(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void if (block.is_comptime) { return sema.fail(block, src, "encountered @panic at comptime", .{}); } + + // We only apply the first hint in a branch. + // This allows user-provided hints to override implicit cold hints. + if (sema.branch_hint == null) { + sema.branch_hint = .cold; + } + try sema.panicWithMsg(block, src, coerced_msg, .@"@panic"); } @@ -6418,25 +6436,6 @@ fn zirSetAlignStack(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.Inst sema.allow_memoize = false; } -fn zirSetCold(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!void { - const pt = sema.pt; - const zcu = pt.zcu; - const ip = &zcu.intern_pool; - const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data; - const operand_src = block.builtinCallArgSrc(extra.node, 0); - const is_cold = try sema.resolveConstBool(block, operand_src, extra.operand, .{ - .needed_comptime_reason = "operand to @setCold must be comptime-known", - }); - // TODO: should `@setCold` apply to the parent in an inline call? - // See also #20642 and friends. - const func = switch (sema.owner.unwrap()) { - .func => |func| func, - .cau => return, // does nothing outside a function - }; - ip.funcSetCold(func, is_cold); - sema.allow_memoize = false; -} - fn zirDisableInstrumentation(sema: *Sema) CompileError!void { const pt = sema.pt; const zcu = pt.zcu; @@ -6891,13 +6890,20 @@ fn popErrorReturnTrace( @typeInfo(Air.Block).Struct.fields.len + 1); // +1 for the sole .cond_br instruction in the .block const cond_br_inst: Air.Inst.Index = @enumFromInt(sema.air_instructions.len); - try sema.air_instructions.append(gpa, .{ .tag = .cond_br, .data = .{ .pl_op = .{ - .operand = is_non_error_inst, - .payload = sema.addExtraAssumeCapacity(Air.CondBr{ - .then_body_len = @intCast(then_block.instructions.items.len), - .else_body_len = @intCast(else_block.instructions.items.len), - }), - } } }); + try sema.air_instructions.append(gpa, .{ + .tag = .cond_br, + .data = .{ + .pl_op = .{ + .operand = is_non_error_inst, + .payload = sema.addExtraAssumeCapacity(Air.CondBr{ + .then_body_len = @intCast(then_block.instructions.items.len), + .else_body_len = @intCast(else_block.instructions.items.len), + // weight against error branch + .branch_hints = .{ .true = .likely, .false = .unlikely }, + }), + }, + }, + }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(then_block.instructions.items)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(else_block.instructions.items)); @@ -10954,6 +10960,11 @@ const SwitchProngAnalysis = struct { sema.code.instructions.items(.data)[@intFromEnum(spa.switch_block_inst)].pl_node.src_node, ); + // We can propagate `.cold` hints from this branch since it's comptime-known + // to be taken from the parent branch. + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint orelse if (sema.branch_hint == .cold) .cold else null; + if (has_tag_capture) { const tag_ref = try spa.analyzeTagCapture(child_block, capture_src, inline_case_capture); sema.inst_map.putAssumeCapacity(spa.tag_capture_inst, tag_ref); @@ -10990,6 +11001,7 @@ const SwitchProngAnalysis = struct { /// Analyze a switch prong which may have peers at runtime. /// Uses `analyzeBodyRuntimeBreak`. Sets up captures as needed. + /// Returns the `BranchHint` for the prong. fn analyzeProngRuntime( spa: SwitchProngAnalysis, case_block: *Block, @@ -11007,7 +11019,7 @@ const SwitchProngAnalysis = struct { /// Whether this prong has an inline tag capture. If `true`, then /// `inline_case_capture` cannot be `.none`. has_tag_capture: bool, - ) CompileError!void { + ) CompileError!std.builtin.BranchHint { const sema = spa.sema; if (has_tag_capture) { @@ -11033,7 +11045,7 @@ const SwitchProngAnalysis = struct { if (sema.typeOf(capture_ref).isNoReturn(sema.pt.zcu)) { // No need to analyze any further, the prong is unreachable - return; + return .none; } sema.inst_map.putAssumeCapacity(spa.switch_block_inst, capture_ref); @@ -11302,10 +11314,17 @@ const SwitchProngAnalysis = struct { const prong_count = field_indices.len - in_mem_coercible.count(); - const estimated_extra = prong_count * 6; // 2 for Case, 1 item, probably 3 insts + const estimated_extra = prong_count * 6 + (prong_count / 10); // 2 for Case, 1 item, probably 3 insts; plus hints var cases_extra = try std.ArrayList(u32).initCapacity(sema.gpa, estimated_extra); defer cases_extra.deinit(); + { + // All branch hints are `.none`, so just add zero elems. + comptime assert(@intFromEnum(std.builtin.BranchHint.none) == 0); + const need_elems = std.math.divCeil(usize, prong_count + 1, 10) catch unreachable; + try cases_extra.appendNTimes(0, need_elems); + } + { // Non-bitcast cases var it = in_mem_coercible.iterator(.{ .kind = .unset }); @@ -11728,7 +11747,7 @@ fn zirSwitchBlockErrUnion(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Comp sub_block.need_debug_scope = null; // this body is emitted regardless defer sub_block.instructions.deinit(gpa); - try sema.analyzeBodyRuntimeBreak(&sub_block, non_error_case.body); + const non_error_hint = try sema.analyzeBodyRuntimeBreak(&sub_block, non_error_case.body); const true_instructions = try sub_block.instructions.toOwnedSlice(gpa); defer gpa.free(true_instructions); @@ -11782,6 +11801,7 @@ fn zirSwitchBlockErrUnion(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Comp .payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(true_instructions.len), .else_body_len = @intCast(sub_block.instructions.items.len), + .branch_hints = .{ .true = non_error_hint, .false = .none }, }), } }, }); @@ -12486,6 +12506,9 @@ fn analyzeSwitchRuntimeBlock( var cases_extra = try std.ArrayListUnmanaged(u32).initCapacity(gpa, estimated_cases_extra); defer cases_extra.deinit(gpa); + var branch_hints = try std.ArrayListUnmanaged(std.builtin.BranchHint).initCapacity(gpa, scalar_cases_len); + defer branch_hints.deinit(gpa); + var case_block = child_block.makeSubBlock(); case_block.runtime_loop = null; case_block.runtime_cond = operand_src; @@ -12516,10 +12539,13 @@ fn analyzeSwitchRuntimeBlock( break :blk field_ty.zigTypeTag(zcu) != .NoReturn; } else true; - if (err_set and try sema.maybeErrorUnwrap(&case_block, body, operand, operand_src, allow_err_code_unwrap)) { - // nothing to do here - } else if (analyze_body) { - try spa.analyzeProngRuntime( + const prong_hint: std.builtin.BranchHint = if (err_set and + try sema.maybeErrorUnwrap(&case_block, body, operand, operand_src, allow_err_code_unwrap)) + h: { + // nothing to do here. weight against error branch + break :h .unlikely; + } else if (analyze_body) h: { + break :h try spa.analyzeProngRuntime( &case_block, .normal, body, @@ -12532,10 +12558,12 @@ fn analyzeSwitchRuntimeBlock( if (info.is_inline) item else .none, info.has_tag_capture, ); - } else { + } else h: { _ = try case_block.addNoOp(.unreach); - } + break :h .none; + }; + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len cases_extra.appendAssumeCapacity(@intCast(case_block.instructions.items.len)); @@ -12545,6 +12573,7 @@ fn analyzeSwitchRuntimeBlock( var is_first = true; var prev_cond_br: Air.Inst.Index = undefined; + var prev_hint: std.builtin.BranchHint = undefined; var first_else_body: []const Air.Inst.Index = &.{}; defer gpa.free(first_else_body); var prev_then_body: []const Air.Inst.Index = &.{}; @@ -12606,7 +12635,7 @@ fn analyzeSwitchRuntimeBlock( } })); emit_bb = true; - try spa.analyzeProngRuntime( + const prong_hint = try spa.analyzeProngRuntime( &case_block, .normal, body, @@ -12619,6 +12648,7 @@ fn analyzeSwitchRuntimeBlock( item_ref, info.has_tag_capture, ); + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len @@ -12649,8 +12679,8 @@ fn analyzeSwitchRuntimeBlock( } })); emit_bb = true; - if (analyze_body) { - try spa.analyzeProngRuntime( + const prong_hint: std.builtin.BranchHint = if (analyze_body) h: { + break :h try spa.analyzeProngRuntime( &case_block, .normal, body, @@ -12663,9 +12693,11 @@ fn analyzeSwitchRuntimeBlock( item, info.has_tag_capture, ); - } else { + } else h: { _ = try case_block.addNoOp(.unreach); - } + break :h .none; + }; + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len @@ -12697,10 +12729,13 @@ fn analyzeSwitchRuntimeBlock( const body = sema.code.bodySlice(extra_index, info.body_len); extra_index += info.body_len; - if (err_set and try sema.maybeErrorUnwrap(&case_block, body, operand, operand_src, allow_err_code_unwrap)) { - // nothing to do here - } else if (analyze_body) { - try spa.analyzeProngRuntime( + const prong_hint: std.builtin.BranchHint = if (err_set and + try sema.maybeErrorUnwrap(&case_block, body, operand, operand_src, allow_err_code_unwrap)) + h: { + // nothing to do here. weight against error branch + break :h .unlikely; + } else if (analyze_body) h: { + break :h try spa.analyzeProngRuntime( &case_block, .normal, body, @@ -12713,10 +12748,12 @@ fn analyzeSwitchRuntimeBlock( .none, false, ); - } else { + } else h: { _ = try case_block.addNoOp(.unreach); - } + break :h .none; + }; + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 2 + items.len + case_block.instructions.items.len); @@ -12784,23 +12821,24 @@ fn analyzeSwitchRuntimeBlock( const body = sema.code.bodySlice(extra_index, info.body_len); extra_index += info.body_len; - if (err_set and try sema.maybeErrorUnwrap(&case_block, body, operand, operand_src, allow_err_code_unwrap)) { - // nothing to do here - } else { - try spa.analyzeProngRuntime( - &case_block, - .normal, - body, - info.capture, - child_block.src(.{ .switch_capture = .{ - .switch_node_offset = switch_node_offset, - .case_idx = .{ .kind = .multi, .index = @intCast(multi_i) }, - } }), - items, - .none, - false, - ); - } + const prong_hint: std.builtin.BranchHint = if (err_set and + try sema.maybeErrorUnwrap(&case_block, body, operand, operand_src, allow_err_code_unwrap)) + h: { + // nothing to do here. weight against error branch + break :h .unlikely; + } else try spa.analyzeProngRuntime( + &case_block, + .normal, + body, + info.capture, + child_block.src(.{ .switch_capture = .{ + .switch_node_offset = switch_node_offset, + .case_idx = .{ .kind = .multi, .index = @intCast(multi_i) }, + } }), + items, + .none, + false, + ); if (is_first) { is_first = false; @@ -12812,10 +12850,10 @@ fn analyzeSwitchRuntimeBlock( @typeInfo(Air.CondBr).Struct.fields.len + prev_then_body.len + cond_body.len, ); - sema.air_instructions.items(.data)[@intFromEnum(prev_cond_br)].pl_op.payload = - sema.addExtraAssumeCapacity(Air.CondBr{ + sema.air_instructions.items(.data)[@intFromEnum(prev_cond_br)].pl_op.payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(prev_then_body.len), .else_body_len = @intCast(cond_body.len), + .branch_hints = .{ .true = prev_hint, .false = .none }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(prev_then_body)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(cond_body)); @@ -12823,6 +12861,7 @@ fn analyzeSwitchRuntimeBlock( gpa.free(prev_then_body); prev_then_body = try case_block.instructions.toOwnedSlice(gpa); prev_cond_br = new_cond_br; + prev_hint = prong_hint; } } @@ -12854,8 +12893,8 @@ fn analyzeSwitchRuntimeBlock( if (emit_bb) try sema.emitBackwardBranch(block, special_prong_src); emit_bb = true; - if (analyze_body) { - try spa.analyzeProngRuntime( + const prong_hint: std.builtin.BranchHint = if (analyze_body) h: { + break :h try spa.analyzeProngRuntime( &case_block, .special, special.body, @@ -12868,9 +12907,11 @@ fn analyzeSwitchRuntimeBlock( item_ref, special.has_tag_capture, ); - } else { + } else h: { _ = try case_block.addNoOp(.unreach); - } + break :h .none; + }; + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len @@ -12903,7 +12944,7 @@ fn analyzeSwitchRuntimeBlock( if (emit_bb) try sema.emitBackwardBranch(block, special_prong_src); emit_bb = true; - try spa.analyzeProngRuntime( + const prong_hint = try spa.analyzeProngRuntime( &case_block, .special, special.body, @@ -12916,6 +12957,7 @@ fn analyzeSwitchRuntimeBlock( item_ref, special.has_tag_capture, ); + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len @@ -12937,7 +12979,7 @@ fn analyzeSwitchRuntimeBlock( if (emit_bb) try sema.emitBackwardBranch(block, special_prong_src); emit_bb = true; - try spa.analyzeProngRuntime( + const prong_hint = try spa.analyzeProngRuntime( &case_block, .special, special.body, @@ -12950,6 +12992,7 @@ fn analyzeSwitchRuntimeBlock( item_ref, special.has_tag_capture, ); + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len @@ -12968,7 +13011,7 @@ fn analyzeSwitchRuntimeBlock( if (emit_bb) try sema.emitBackwardBranch(block, special_prong_src); emit_bb = true; - try spa.analyzeProngRuntime( + const prong_hint = try spa.analyzeProngRuntime( &case_block, .special, special.body, @@ -12981,6 +13024,7 @@ fn analyzeSwitchRuntimeBlock( .bool_true, special.has_tag_capture, ); + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len @@ -12997,7 +13041,7 @@ fn analyzeSwitchRuntimeBlock( if (emit_bb) try sema.emitBackwardBranch(block, special_prong_src); emit_bb = true; - try spa.analyzeProngRuntime( + const prong_hint = try spa.analyzeProngRuntime( &case_block, .special, special.body, @@ -13010,6 +13054,7 @@ fn analyzeSwitchRuntimeBlock( .bool_false, special.has_tag_capture, ); + try branch_hints.append(gpa, prong_hint); try cases_extra.ensureUnusedCapacity(gpa, 3 + case_block.instructions.items.len); cases_extra.appendAssumeCapacity(1); // items_len @@ -13045,12 +13090,13 @@ fn analyzeSwitchRuntimeBlock( } else false else true; - if (special.body.len != 0 and err_set and + const else_hint: std.builtin.BranchHint = if (special.body.len != 0 and err_set and try sema.maybeErrorUnwrap(&case_block, special.body, operand, operand_src, allow_err_code_unwrap)) - { - // nothing to do here - } else if (special.body.len != 0 and analyze_body and !special.is_inline) { - try spa.analyzeProngRuntime( + h: { + // nothing to do here. weight against error branch + break :h .unlikely; + } else if (special.body.len != 0 and analyze_body and !special.is_inline) h: { + break :h try spa.analyzeProngRuntime( &case_block, .special, special.body, @@ -13063,7 +13109,7 @@ fn analyzeSwitchRuntimeBlock( .none, false, ); - } else { + } else h: { // We still need a terminator in this block, but we have proven // that it is unreachable. if (case_block.wantSafety()) { @@ -13072,33 +13118,57 @@ fn analyzeSwitchRuntimeBlock( } else { _ = try case_block.addNoOp(.unreach); } - } + // Safety check / unreachable branches are cold. + break :h .cold; + }; if (is_first) { + try branch_hints.append(gpa, else_hint); final_else_body = case_block.instructions.items; } else { + try branch_hints.append(gpa, .none); // we have the range conditionals first try sema.air_extra.ensureUnusedCapacity(gpa, prev_then_body.len + @typeInfo(Air.CondBr).Struct.fields.len + case_block.instructions.items.len); - sema.air_instructions.items(.data)[@intFromEnum(prev_cond_br)].pl_op.payload = - sema.addExtraAssumeCapacity(Air.CondBr{ + sema.air_instructions.items(.data)[@intFromEnum(prev_cond_br)].pl_op.payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(prev_then_body.len), .else_body_len = @intCast(case_block.instructions.items.len), + .branch_hints = .{ .true = prev_hint, .false = else_hint }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(prev_then_body)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(case_block.instructions.items)); final_else_body = first_else_body; } + } else { + try branch_hints.append(gpa, .none); } + assert(branch_hints.items.len == cases_len + 1); + try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.SwitchBr).Struct.fields.len + - cases_extra.items.len + final_else_body.len); + cases_extra.items.len + final_else_body.len + + (std.math.divCeil(usize, branch_hints.items.len, 10) catch unreachable)); // branch hints const payload_index = sema.addExtraAssumeCapacity(Air.SwitchBr{ .cases_len = @intCast(cases_len), .else_body_len = @intCast(final_else_body.len), }); + { + // Add branch hints. + var cur_bag: u32 = 0; + for (branch_hints.items, 0..) |hint, idx| { + const idx_in_bag = idx % 10; + cur_bag |= @as(u32, @intFromEnum(hint)) << @intCast(idx_in_bag * 3); + if (idx_in_bag == 9) { + sema.air_extra.appendAssumeCapacity(cur_bag); + cur_bag = 0; + } + } + if (branch_hints.items.len % 10 != 0) { + sema.air_extra.appendAssumeCapacity(cur_bag); + } + } sema.air_extra.appendSliceAssumeCapacity(@ptrCast(cases_extra.items)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(final_else_body)); @@ -19159,6 +19229,10 @@ fn zirBoolBr( const lhs_result: Air.Inst.Ref = if (is_bool_or) .bool_true else .bool_false; _ = try lhs_block.addBr(block_inst, lhs_result); + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint; + sema.branch_hint = null; + const rhs_result = try sema.resolveInlineBody(rhs_block, body, inst); const rhs_noret = sema.typeOf(rhs_result).isNoReturn(zcu); const coerced_rhs_result = if (!rhs_noret) rhs: { @@ -19167,7 +19241,17 @@ fn zirBoolBr( break :rhs coerced_result; } else rhs_result; - const result = sema.finishCondBr(parent_block, &child_block, &then_block, &else_block, lhs, block_inst); + const rhs_hint = sema.branch_hint orelse .none; + + const result = try sema.finishCondBr( + parent_block, + &child_block, + &then_block, + &else_block, + lhs, + block_inst, + if (is_bool_or) .{ .true = .none, .false = rhs_hint } else .{ .true = rhs_hint, .false = .none }, + ); if (!rhs_noret) { if (try sema.resolveDefinedValue(rhs_block, rhs_src, coerced_rhs_result)) |rhs_val| { if (is_bool_or and rhs_val.toBool()) { @@ -19189,6 +19273,7 @@ fn finishCondBr( else_block: *Block, cond: Air.Inst.Ref, block_inst: Air.Inst.Index, + branch_hints: Air.CondBr.BranchHints, ) !Air.Inst.Ref { const gpa = sema.gpa; @@ -19199,6 +19284,7 @@ fn finishCondBr( const cond_br_payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(then_block.instructions.items.len), .else_body_len = @intCast(else_block.instructions.items.len), + .branch_hints = branch_hints, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(then_block.instructions.items)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(else_block.instructions.items)); @@ -19333,6 +19419,11 @@ fn zirCondbr( if (try sema.resolveDefinedValue(parent_block, cond_src, cond)) |cond_val| { const body = if (cond_val.toBool()) then_body else else_body; + // We can propagate `.cold` hints from this branch since it's comptime-known + // to be taken from the parent branch. + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint orelse if (sema.branch_hint == .cold) .cold else null; + try sema.maybeErrorUnwrapCondbr(parent_block, body, extra.data.condition, cond_src); // We use `analyzeBodyInner` since we want to propagate any comptime control flow to the caller. return sema.analyzeBodyInner(parent_block, body); @@ -19349,7 +19440,7 @@ fn zirCondbr( sub_block.need_debug_scope = null; // this body is emitted regardless defer sub_block.instructions.deinit(gpa); - try sema.analyzeBodyRuntimeBreak(&sub_block, then_body); + const true_hint = try sema.analyzeBodyRuntimeBreak(&sub_block, then_body); const true_instructions = try sub_block.instructions.toOwnedSlice(gpa); defer gpa.free(true_instructions); @@ -19365,11 +19456,13 @@ fn zirCondbr( break :blk try sub_block.addTyOp(.unwrap_errunion_err, result_ty, err_operand); }; - if (err_cond != null and try sema.maybeErrorUnwrap(&sub_block, else_body, err_cond.?, cond_src, false)) { - // nothing to do - } else { - try sema.analyzeBodyRuntimeBreak(&sub_block, else_body); - } + const false_hint: std.builtin.BranchHint = if (err_cond != null and + try sema.maybeErrorUnwrap(&sub_block, else_body, err_cond.?, cond_src, false)) + h: { + // nothing to do here. weight against error branch + break :h .unlikely; + } else try sema.analyzeBodyRuntimeBreak(&sub_block, else_body); + try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.CondBr).Struct.fields.len + true_instructions.len + sub_block.instructions.items.len); _ = try parent_block.addInst(.{ @@ -19379,6 +19472,7 @@ fn zirCondbr( .payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(true_instructions.len), .else_body_len = @intCast(sub_block.instructions.items.len), + .branch_hints = .{ .true = true_hint, .false = false_hint }, }), } }, }); @@ -19403,6 +19497,11 @@ fn zirTry(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError! } const is_non_err = try sema.analyzeIsNonErrComptimeOnly(parent_block, operand_src, err_union); if (is_non_err != .none) { + // We can propagate `.cold` hints from this branch since it's comptime-known + // to be taken from the parent branch. + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint orelse if (sema.branch_hint == .cold) .cold else null; + const is_non_err_val = (try sema.resolveDefinedValue(parent_block, operand_src, is_non_err)).?; if (is_non_err_val.toBool()) { return sema.analyzeErrUnionPayload(parent_block, src, err_union_ty, err_union, operand_src, false); @@ -19416,13 +19515,19 @@ fn zirTry(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileError! var sub_block = parent_block.makeSubBlock(); defer sub_block.instructions.deinit(sema.gpa); + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint; + // This body is guaranteed to end with noreturn and has no breaks. try sema.analyzeBodyInner(&sub_block, body); + // The only interesting hint here is `.cold`, which can come from e.g. `errdefer @panic`. + const is_cold = sema.branch_hint == .cold; + try sema.air_extra.ensureUnusedCapacity(sema.gpa, @typeInfo(Air.Try).Struct.fields.len + sub_block.instructions.items.len); const try_inst = try parent_block.addInst(.{ - .tag = .@"try", + .tag = if (is_cold) .try_cold else .@"try", .data = .{ .pl_op = .{ .operand = err_union, .payload = sema.addExtraAssumeCapacity(Air.Try{ @@ -19452,6 +19557,11 @@ fn zirTryPtr(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileErr } const is_non_err = try sema.analyzeIsNonErrComptimeOnly(parent_block, operand_src, err_union); if (is_non_err != .none) { + // We can propagate `.cold` hints from this branch since it's comptime-known + // to be taken from the parent branch. + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint orelse if (sema.branch_hint == .cold) .cold else null; + const is_non_err_val = (try sema.resolveDefinedValue(parent_block, operand_src, is_non_err)).?; if (is_non_err_val.toBool()) { return sema.analyzeErrUnionPayloadPtr(parent_block, src, operand, false, false); @@ -19465,9 +19575,15 @@ fn zirTryPtr(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileErr var sub_block = parent_block.makeSubBlock(); defer sub_block.instructions.deinit(sema.gpa); + const parent_hint = sema.branch_hint; + defer sema.branch_hint = parent_hint; + // This body is guaranteed to end with noreturn and has no breaks. try sema.analyzeBodyInner(&sub_block, body); + // The only interesting hint here is `.cold`, which can come from e.g. `errdefer @panic`. + const is_cold = sema.branch_hint == .cold; + const operand_ty = sema.typeOf(operand); const ptr_info = operand_ty.ptrInfo(zcu); const res_ty = try pt.ptrTypeSema(.{ @@ -19483,7 +19599,7 @@ fn zirTryPtr(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileErr try sema.air_extra.ensureUnusedCapacity(sema.gpa, @typeInfo(Air.TryPtr).Struct.fields.len + sub_block.instructions.items.len); const try_inst = try parent_block.addInst(.{ - .tag = .try_ptr, + .tag = if (is_cold) .try_ptr_cold else .try_ptr, .data = .{ .ty_pl = .{ .ty = res_ty_ref, .payload = sema.addExtraAssumeCapacity(Air.TryPtr{ @@ -19735,6 +19851,8 @@ fn retWithErrTracing( const cond_br_payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(then_block.instructions.items.len), .else_body_len = @intCast(else_block.instructions.items.len), + // weight against error branch + .branch_hints = .{ .true = .likely, .false = .unlikely }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(then_block.instructions.items)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(else_block.instructions.items)); @@ -26747,6 +26865,7 @@ fn zirBuiltinValue(sema: *Sema, extended: Zir.Inst.Extended.InstData) CompileErr .export_options => "ExportOptions", .extern_options => "ExternOptions", .type_info => "Type", + .branch_hint => "BranchHint", // Values are handled here. .calling_convention_c => { @@ -26772,6 +26891,27 @@ fn zirBuiltinValue(sema: *Sema, extended: Zir.Inst.Extended.InstData) CompileErr return Air.internedToRef(ty.toIntern()); } +fn zirBranchHint(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!void { + const pt = sema.pt; + const zcu = pt.zcu; + + const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data; + const uncoerced_hint = try sema.resolveInst(extra.operand); + const operand_src = block.builtinCallArgSrc(extra.node, 0); + + const hint_ty = try pt.getBuiltinType("BranchHint"); + const coerced_hint = try sema.coerce(block, hint_ty, uncoerced_hint, operand_src); + const hint_val = try sema.resolveConstDefinedValue(block, operand_src, coerced_hint, .{ + .needed_comptime_reason = "operand to '@branchHint' must be comptime-known", + }); + + // We only apply the first hint in a branch. + // This allows user-provided hints to override implicit cold hints. + if (sema.branch_hint == null) { + sema.branch_hint = zcu.toEnum(std.builtin.BranchHint, hint_val); + } +} + fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void { if (block.is_comptime) { const msg = msg: { @@ -27327,13 +27467,17 @@ fn addSafetyCheckExtra( sema.air_instructions.appendAssumeCapacity(.{ .tag = .cond_br, - .data = .{ .pl_op = .{ - .operand = ok, - .payload = sema.addExtraAssumeCapacity(Air.CondBr{ - .then_body_len = 1, - .else_body_len = @intCast(fail_block.instructions.items.len), - }), - } }, + .data = .{ + .pl_op = .{ + .operand = ok, + .payload = sema.addExtraAssumeCapacity(Air.CondBr{ + .then_body_len = 1, + .else_body_len = @intCast(fail_block.instructions.items.len), + // safety check failure branch is cold + .branch_hints = .{ .true = .likely, .false = .cold }, + }), + }, + }, }); sema.air_extra.appendAssumeCapacity(@intFromEnum(br_inst)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(fail_block.instructions.items)); @@ -27530,6 +27674,7 @@ fn safetyCheckFormatted( try sema.addSafetyCheckExtra(parent_block, ok, &fail_block); } +/// This does not set `sema.branch_hint`. fn safetyPanic(sema: *Sema, block: *Block, src: LazySrcLoc, panic_id: Zcu.PanicId) CompileError!void { const msg_nav_index = try sema.preparePanicId(block, src, panic_id); const msg_inst = try sema.analyzeNavVal(block, src, msg_nav_index); @@ -37179,7 +37324,7 @@ pub fn addExtraAssumeCapacity(sema: *Sema, extra: anytype) u32 { inline for (fields) |field| { sema.air_extra.appendAssumeCapacity(switch (field.type) { u32 => @field(extra, field.name), - i32 => @bitCast(@field(extra, field.name)), + i32, Air.CondBr.BranchHints => @bitCast(@field(extra, field.name)), Air.Inst.Ref, InternPool.Index => @intFromEnum(@field(extra, field.name)), else => @compileError("bad field type: " ++ @typeName(field.type)), }); @@ -38247,6 +38392,12 @@ fn maybeDerefSliceAsArray( fn analyzeUnreachable(sema: *Sema, block: *Block, src: LazySrcLoc, safety_check: bool) !void { if (safety_check and block.wantSafety()) { + // We only apply the first hint in a branch. + // This allows user-provided hints to override implicit cold hints. + if (sema.branch_hint == null) { + sema.branch_hint = .cold; + } + try sema.safetyPanic(block, src, .unreach); } else { _ = try block.addNoOp(.unreach); diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig index 612921398e..b5038ff045 100644 --- a/src/Zcu/PerThread.zig +++ b/src/Zcu/PerThread.zig @@ -2188,6 +2188,8 @@ fn analyzeFnBody(pt: Zcu.PerThread, func_index: InternPool.Index) Zcu.SemaError! }); } + func.setBranchHint(ip, sema.branch_hint orelse .none); + // If we don't get an error return trace from a caller, create our own. if (func.analysisUnordered(ip).calls_or_awaits_errorable_fn and zcu.comp.config.any_error_tracing and diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 844a3e584a..9472bdecfd 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -795,7 +795,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), + .try_cold => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), + .try_ptr_cold => try self.airTryPtr(inst), .dbg_stmt => try self.airDbgStmt(inst), .dbg_inline_block => try self.airDbgInlineBlock(inst), @@ -5092,25 +5094,17 @@ fn lowerBlock(self: *Self, inst: Air.Inst.Index, body: []const Air.Inst.Index) ! } fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { - const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const condition_ty = self.typeOf(pl_op.operand); - const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); + const switch_br = self.air.unwrapSwitch(inst); + const condition_ty = self.typeOf(switch_br.operand); const liveness = try self.liveness.getSwitchBr( self.gpa, inst, - switch_br.data.cases_len + 1, + switch_br.cases_len + 1, ); defer self.gpa.free(liveness.deaths); - var extra_index: usize = switch_br.end; - var case_i: u32 = 0; - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items = @as([]const Air.Inst.Ref, @ptrCast(self.air.extra[case.end..][0..case.data.items_len])); - assert(items.len > 0); - const case_body: []const Air.Inst.Index = @ptrCast(self.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + items.len + case_body.len; - + var it = switch_br.iterateCases(); + while (it.next()) |case| { // For every item, we compare it to condition and branch into // the prong if they are equal. After we compared to all // items, we branch into the next prong (or if no other prongs @@ -5126,11 +5120,11 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { // prong: ... // ... // out: ... - const branch_into_prong_relocs = try self.gpa.alloc(u32, items.len); + const branch_into_prong_relocs = try self.gpa.alloc(u32, case.items.len); defer self.gpa.free(branch_into_prong_relocs); - for (items, 0..) |item, idx| { - const cmp_result = try self.cmp(.{ .inst = pl_op.operand }, .{ .inst = item }, condition_ty, .neq); + for (case.items, 0..) |item, idx| { + const cmp_result = try self.cmp(.{ .inst = switch_br.operand }, .{ .inst = item }, condition_ty, .neq); branch_into_prong_relocs[idx] = try self.condBr(cmp_result); } @@ -5156,11 +5150,11 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { _ = self.branch_stack.pop(); } - try self.ensureProcessDeathCapacity(liveness.deaths[case_i].len); - for (liveness.deaths[case_i]) |operand| { + try self.ensureProcessDeathCapacity(liveness.deaths[case.idx].len); + for (liveness.deaths[case.idx]) |operand| { self.processDeath(operand); } - try self.genBody(case_body); + try self.genBody(case.body); // Revert to the previous register and stack allocation state. var saved_case_branch = self.branch_stack.pop(); @@ -5178,8 +5172,8 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { try self.performReloc(branch_away_from_prong_reloc); } - if (switch_br.data.else_body_len > 0) { - const else_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]); + if (switch_br.else_body_len > 0) { + const else_body = it.elseBody(); // Capture the state of register and stack allocation state so that we can revert to it. const parent_next_stack_offset = self.next_stack_offset; @@ -5218,7 +5212,7 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { // in airCondBr. } - return self.finishAir(inst, .unreach, .{ pl_op.operand, .none, .none }); + return self.finishAir(inst, .unreach, .{ switch_br.operand, .none, .none }); } fn performReloc(self: *Self, inst: Mir.Inst.Index) !void { diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 6549868fa5..9e71bff381 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -782,7 +782,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), + .try_cold => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), + .try_ptr_cold => try self.airTryPtr(inst), .dbg_stmt => try self.airDbgStmt(inst), .dbg_inline_block => try self.airDbgInlineBlock(inst), @@ -5040,25 +5042,17 @@ fn lowerBlock(self: *Self, inst: Air.Inst.Index, body: []const Air.Inst.Index) ! } fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { - const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const condition_ty = self.typeOf(pl_op.operand); - const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); + const switch_br = self.air.unwrapSwitch(inst); + const condition_ty = self.typeOf(switch_br.operand); const liveness = try self.liveness.getSwitchBr( self.gpa, inst, - switch_br.data.cases_len + 1, + switch_br.cases_len + 1, ); defer self.gpa.free(liveness.deaths); - var extra_index: usize = switch_br.end; - var case_i: u32 = 0; - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = @ptrCast(self.air.extra[case.end..][0..case.data.items_len]); - assert(items.len > 0); - const case_body: []const Air.Inst.Index = @ptrCast(self.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + items.len + case_body.len; - + var it = switch_br.iterateCases(); + while (it.next()) |case| { // For every item, we compare it to condition and branch into // the prong if they are equal. After we compared to all // items, we branch into the next prong (or if no other prongs @@ -5074,11 +5068,11 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { // prong: ... // ... // out: ... - const branch_into_prong_relocs = try self.gpa.alloc(u32, items.len); + const branch_into_prong_relocs = try self.gpa.alloc(u32, case.items.len); defer self.gpa.free(branch_into_prong_relocs); - for (items, 0..) |item, idx| { - const cmp_result = try self.cmp(.{ .inst = pl_op.operand }, .{ .inst = item }, condition_ty, .neq); + for (case.items, 0..) |item, idx| { + const cmp_result = try self.cmp(.{ .inst = switch_br.operand }, .{ .inst = item }, condition_ty, .neq); branch_into_prong_relocs[idx] = try self.condBr(cmp_result); } @@ -5104,11 +5098,11 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { _ = self.branch_stack.pop(); } - try self.ensureProcessDeathCapacity(liveness.deaths[case_i].len); - for (liveness.deaths[case_i]) |operand| { + try self.ensureProcessDeathCapacity(liveness.deaths[case.idx].len); + for (liveness.deaths[case.idx]) |operand| { self.processDeath(operand); } - try self.genBody(case_body); + try self.genBody(case.body); // Revert to the previous register and stack allocation state. var saved_case_branch = self.branch_stack.pop(); @@ -5126,8 +5120,8 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { try self.performReloc(branch_away_from_prong_reloc); } - if (switch_br.data.else_body_len > 0) { - const else_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]); + if (switch_br.else_body_len > 0) { + const else_body = it.elseBody(); // Capture the state of register and stack allocation state so that we can revert to it. const parent_next_stack_offset = self.next_stack_offset; @@ -5166,7 +5160,7 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { // in airCondBr. } - return self.finishAir(inst, .unreach, .{ pl_op.operand, .none, .none }); + return self.finishAir(inst, .unreach, .{ switch_br.operand, .none, .none }); } fn performReloc(self: *Self, inst: Mir.Inst.Index) !void { diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 7028844779..d7e882ddbe 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -1640,7 +1640,9 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .addrspace_cast => return func.fail("TODO: addrspace_cast", .{}), .@"try" => try func.airTry(inst), + .try_cold => try func.airTry(inst), .try_ptr => return func.fail("TODO: try_ptr", .{}), + .try_ptr_cold => return func.fail("TODO: try_ptr_cold", .{}), .dbg_var_ptr, .dbg_var_val, @@ -5659,38 +5661,30 @@ fn lowerBlock(func: *Func, inst: Air.Inst.Index, body: []const Air.Inst.Index) ! } fn airSwitchBr(func: *Func, inst: Air.Inst.Index) !void { - const pl_op = func.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const condition_ty = func.typeOf(pl_op.operand); - const switch_br = func.air.extraData(Air.SwitchBr, pl_op.payload); - var extra_index: usize = switch_br.end; - var case_i: u32 = 0; - const liveness = try func.liveness.getSwitchBr(func.gpa, inst, switch_br.data.cases_len + 1); + const switch_br = func.air.unwrapSwitch(inst); + + const liveness = try func.liveness.getSwitchBr(func.gpa, inst, switch_br.cases_len + 1); defer func.gpa.free(liveness.deaths); - const condition = try func.resolveInst(pl_op.operand); + const condition = try func.resolveInst(switch_br.operand); + const condition_ty = func.typeOf(switch_br.operand); // If the condition dies here in this switch instruction, process // that death now instead of later as this has an effect on // whether it needs to be spilled in the branches if (func.liveness.operandDies(inst, 0)) { - if (pl_op.operand.toIndex()) |op_inst| try func.processDeath(op_inst); + if (switch_br.operand.toIndex()) |op_inst| try func.processDeath(op_inst); } func.scope_generation += 1; const state = try func.saveState(); - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = func.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = - @ptrCast(func.air.extra[case.end..][0..case.data.items_len]); - const case_body: []const Air.Inst.Index = - @ptrCast(func.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + items.len + case_body.len; - - var relocs = try func.gpa.alloc(Mir.Inst.Index, items.len); + var it = switch_br.iterateCases(); + while (it.next()) |case| { + var relocs = try func.gpa.alloc(Mir.Inst.Index, case.items.len); defer func.gpa.free(relocs); - for (items, relocs, 0..) |item, *reloc, i| { + for (case.items, relocs, 0..) |item, *reloc, i| { const item_mcv = try func.resolveInst(item); const cond_lock = switch (condition) { @@ -5724,10 +5718,10 @@ fn airSwitchBr(func: *Func, inst: Air.Inst.Index) !void { reloc.* = try func.condBr(condition_ty, .{ .register = cmp_reg }); } - for (liveness.deaths[case_i]) |operand| try func.processDeath(operand); + for (liveness.deaths[case.idx]) |operand| try func.processDeath(operand); for (relocs[0 .. relocs.len - 1]) |reloc| func.performReloc(reloc); - try func.genBody(case_body); + try func.genBody(case.body); try func.restoreState(state, &.{}, .{ .emit_instructions = false, .update_tracking = true, @@ -5738,9 +5732,8 @@ fn airSwitchBr(func: *Func, inst: Air.Inst.Index) !void { func.performReloc(relocs[relocs.len - 1]); } - if (switch_br.data.else_body_len > 0) { - const else_body: []const Air.Inst.Index = - @ptrCast(func.air.extra[extra_index..][0..switch_br.data.else_body_len]); + if (switch_br.else_body_len > 0) { + const else_body = it.elseBody(); const else_deaths = liveness.deaths.len - 1; for (liveness.deaths[else_deaths]) |operand| try func.processDeath(operand); diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 3abf8c81bd..06611a2804 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -637,7 +637,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .addrspace_cast => @panic("TODO try self.airAddrSpaceCast(int)"), .@"try" => try self.airTry(inst), + .try_cold => try self.airTry(inst), .try_ptr => @panic("TODO try self.airTryPtr(inst)"), + .try_ptr_cold => @panic("TODO try self.airTryPtrCold(inst)"), .dbg_stmt => try self.airDbgStmt(inst), .dbg_inline_block => try self.airDbgInlineBlock(inst), diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index f36df7c444..fbe63925dc 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1913,7 +1913,9 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { .get_union_tag => func.airGetUnionTag(inst), .@"try" => func.airTry(inst), + .try_cold => func.airTry(inst), .try_ptr => func.airTryPtr(inst), + .try_ptr_cold => func.airTryPtr(inst), .dbg_stmt => func.airDbgStmt(inst), .dbg_inline_block => func.airDbgInlineBlock(inst), @@ -4041,37 +4043,31 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const zcu = pt.zcu; // result type is always 'noreturn' const blocktype = wasm.block_empty; - const pl_op = func.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const target = try func.resolveInst(pl_op.operand); - const target_ty = func.typeOf(pl_op.operand); - const switch_br = func.air.extraData(Air.SwitchBr, pl_op.payload); - const liveness = try func.liveness.getSwitchBr(func.gpa, inst, switch_br.data.cases_len + 1); + const switch_br = func.air.unwrapSwitch(inst); + const target = try func.resolveInst(switch_br.operand); + const target_ty = func.typeOf(switch_br.operand); + const liveness = try func.liveness.getSwitchBr(func.gpa, inst, switch_br.cases_len + 1); defer func.gpa.free(liveness.deaths); - var extra_index: usize = switch_br.end; - var case_i: u32 = 0; - // a list that maps each value with its value and body based on the order inside the list. const CaseValue = struct { integer: i32, value: Value }; var case_list = try std.ArrayList(struct { values: []const CaseValue, body: []const Air.Inst.Index, - }).initCapacity(func.gpa, switch_br.data.cases_len); + }).initCapacity(func.gpa, switch_br.cases_len); defer for (case_list.items) |case| { func.gpa.free(case.values); } else case_list.deinit(); var lowest_maybe: ?i32 = null; var highest_maybe: ?i32 = null; - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = func.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = @ptrCast(func.air.extra[case.end..][0..case.data.items_len]); - const case_body: []const Air.Inst.Index = @ptrCast(func.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + items.len + case_body.len; - const values = try func.gpa.alloc(CaseValue, items.len); + + var it = switch_br.iterateCases(); + while (it.next()) |case| { + const values = try func.gpa.alloc(CaseValue, case.items.len); errdefer func.gpa.free(values); - for (items, 0..) |ref, i| { + for (case.items, 0..) |ref, i| { const item_val = (try func.air.value(ref, pt)).?; const int_val = func.valueAsI32(item_val); if (lowest_maybe == null or int_val < lowest_maybe.?) { @@ -4083,7 +4079,7 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { values[i] = .{ .integer = int_val, .value = item_val }; } - case_list.appendAssumeCapacity(.{ .values = values, .body = case_body }); + case_list.appendAssumeCapacity(.{ .values = values, .body = case.body }); try func.startBlock(.block, blocktype); } @@ -4097,7 +4093,7 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // TODO: Benchmark this to find a proper value, LLVM seems to draw the line at '40~45'. const is_sparse = highest - lowest > 50 or target_ty.bitSize(zcu) > 32; - const else_body: []const Air.Inst.Index = @ptrCast(func.air.extra[extra_index..][0..switch_br.data.else_body_len]); + const else_body = it.elseBody(); const has_else_body = else_body.len != 0; if (has_else_body) { try func.startBlock(.block, blocktype); @@ -4140,11 +4136,11 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // for errors that are not present in any branch. This is fine as this default // case will never be hit for those cases but we do save runtime cost and size // by using a jump table for this instead of if-else chains. - break :blk if (has_else_body or target_ty.zigTypeTag(zcu) == .ErrorSet) case_i else unreachable; + break :blk if (has_else_body or target_ty.zigTypeTag(zcu) == .ErrorSet) switch_br.cases_len else unreachable; }; func.mir_extra.appendAssumeCapacity(idx); } else if (has_else_body) { - func.mir_extra.appendAssumeCapacity(case_i); // default branch + func.mir_extra.appendAssumeCapacity(switch_br.cases_len); // default branch } try func.endBlock(); } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 25a951df7b..d5661fc193 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2262,7 +2262,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), + .try_cold => try self.airTry(inst), // TODO .try_ptr => try self.airTryPtr(inst), + .try_ptr_cold => try self.airTryPtr(inst), // TODO .dbg_stmt => try self.airDbgStmt(inst), .dbg_inline_block => try self.airDbgInlineBlock(inst), @@ -13631,38 +13633,29 @@ fn lowerBlock(self: *Self, inst: Air.Inst.Index, body: []const Air.Inst.Index) ! } fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { - const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const condition = try self.resolveInst(pl_op.operand); - const condition_ty = self.typeOf(pl_op.operand); - const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); - var extra_index: usize = switch_br.end; - var case_i: u32 = 0; - const liveness = try self.liveness.getSwitchBr(self.gpa, inst, switch_br.data.cases_len + 1); + const switch_br = self.air.unwrapSwitch(inst); + const condition = try self.resolveInst(switch_br.operand); + const condition_ty = self.typeOf(switch_br.operand); + const liveness = try self.liveness.getSwitchBr(self.gpa, inst, switch_br.cases_len + 1); defer self.gpa.free(liveness.deaths); // If the condition dies here in this switch instruction, process // that death now instead of later as this has an effect on // whether it needs to be spilled in the branches if (self.liveness.operandDies(inst, 0)) { - if (pl_op.operand.toIndex()) |op_inst| try self.processDeath(op_inst); + if (switch_br.operand.toIndex()) |op_inst| try self.processDeath(op_inst); } self.scope_generation += 1; const state = try self.saveState(); - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = - @ptrCast(self.air.extra[case.end..][0..case.data.items_len]); - const case_body: []const Air.Inst.Index = - @ptrCast(self.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + items.len + case_body.len; - - var relocs = try self.gpa.alloc(Mir.Inst.Index, items.len); + var it = switch_br.iterateCases(); + while (it.next()) |case| { + var relocs = try self.gpa.alloc(Mir.Inst.Index, case.items.len); defer self.gpa.free(relocs); try self.spillEflagsIfOccupied(); - for (items, relocs, 0..) |item, *reloc, i| { + for (case.items, relocs, 0..) |item, *reloc, i| { const item_mcv = try self.resolveInst(item); const cc: Condition = switch (condition) { .eflags => |cc| switch (item_mcv.immediate) { @@ -13678,10 +13671,10 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { reloc.* = try self.asmJccReloc(if (i < relocs.len - 1) cc else cc.negate(), undefined); } - for (liveness.deaths[case_i]) |operand| try self.processDeath(operand); + for (liveness.deaths[case.idx]) |operand| try self.processDeath(operand); for (relocs[0 .. relocs.len - 1]) |reloc| self.performReloc(reloc); - try self.genBody(case_body); + try self.genBody(case.body); try self.restoreState(state, &.{}, .{ .emit_instructions = false, .update_tracking = true, @@ -13692,9 +13685,8 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { self.performReloc(relocs[relocs.len - 1]); } - if (switch_br.data.else_body_len > 0) { - const else_body: []const Air.Inst.Index = - @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]); + if (switch_br.else_body_len > 0) { + const else_body = it.elseBody(); const else_deaths = liveness.deaths.len - 1; for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand); diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 754286d80b..eb5d2921ad 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -1786,7 +1786,7 @@ pub const DeclGen = struct { else => unreachable, } } - if (fn_val.getFunction(zcu)) |func| if (func.analysisUnordered(ip).is_cold) + if (fn_val.getFunction(zcu)) |func| if (func.analysisUnordered(ip).branch_hint == .cold) try w.writeAll("zig_cold "); if (fn_info.return_type == .noreturn_type) try w.writeAll("zig_noreturn "); @@ -3290,8 +3290,10 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, .prefetch => try airPrefetch(f, inst), .addrspace_cast => return f.fail("TODO: C backend: implement addrspace_cast", .{}), - .@"try" => try airTry(f, inst), - .try_ptr => try airTryPtr(f, inst), + .@"try" => try airTry(f, inst), + .try_cold => try airTry(f, inst), + .try_ptr => try airTryPtr(f, inst), + .try_ptr_cold => try airTryPtr(f, inst), .dbg_stmt => try airDbgStmt(f, inst), .dbg_inline_block => try airDbgInlineBlock(f, inst), @@ -4988,11 +4990,10 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue { fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue { const pt = f.object.dg.pt; const zcu = pt.zcu; - const pl_op = f.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const condition = try f.resolveInst(pl_op.operand); - try reap(f, inst, &.{pl_op.operand}); - const condition_ty = f.typeOf(pl_op.operand); - const switch_br = f.air.extraData(Air.SwitchBr, pl_op.payload); + const switch_br = f.air.unwrapSwitch(inst); + const condition = try f.resolveInst(switch_br.operand); + try reap(f, inst, &.{switch_br.operand}); + const condition_ty = f.typeOf(switch_br.operand); const writer = f.object.writer(); try writer.writeAll("switch ("); @@ -5013,22 +5014,16 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue { f.object.indent_writer.pushIndent(); const gpa = f.object.dg.gpa; - const liveness = try f.liveness.getSwitchBr(gpa, inst, switch_br.data.cases_len + 1); + const liveness = try f.liveness.getSwitchBr(gpa, inst, switch_br.cases_len + 1); defer gpa.free(liveness.deaths); // On the final iteration we do not need to fix any state. This is because, like in the `else` // branch of a `cond_br`, our parent has to do it for this entire body anyway. - const last_case_i = switch_br.data.cases_len - @intFromBool(switch_br.data.else_body_len == 0); + const last_case_i = switch_br.cases_len - @intFromBool(switch_br.else_body_len == 0); - var extra_index: usize = switch_br.end; - for (0..switch_br.data.cases_len) |case_i| { - const case = f.air.extraData(Air.SwitchBr.Case, extra_index); - const items = @as([]const Air.Inst.Ref, @ptrCast(f.air.extra[case.end..][0..case.data.items_len])); - const case_body: []const Air.Inst.Index = - @ptrCast(f.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + case.data.items_len + case_body.len; - - for (items) |item| { + var it = switch_br.iterateCases(); + while (it.next()) |case| { + for (case.items) |item| { try f.object.indent_writer.insertNewline(); try writer.writeAll("case "); const item_value = try f.air.value(item, pt); @@ -5046,19 +5041,19 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue { } try writer.writeByte(' '); - if (case_i != last_case_i) { - try genBodyResolveState(f, inst, liveness.deaths[case_i], case_body, false); + if (case.idx != last_case_i) { + try genBodyResolveState(f, inst, liveness.deaths[case.idx], case.body, false); } else { - for (liveness.deaths[case_i]) |death| { + for (liveness.deaths[case.idx]) |death| { try die(f, inst, death.toRef()); } - try genBody(f, case_body); + try genBody(f, case.body); } // The case body must be noreturn so we don't need to insert a break. } - const else_body: []const Air.Inst.Index = @ptrCast(f.air.extra[extra_index..][0..switch_br.data.else_body_len]); + const else_body = it.elseBody(); try f.object.indent_writer.insertNewline(); if (else_body.len > 0) { // Note that this must be the last case (i.e. the `last_case_i` case was not hit above) diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 1d8667ecb2..f49e1c333b 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -898,9 +898,9 @@ pub const Object = struct { const i32_2 = try builder.intConst(.i32, 2); const i32_3 = try builder.intConst(.i32, 3); const debug_info_version = try builder.debugModuleFlag( - try builder.debugConstant(i32_2), + try builder.metadataConstant(i32_2), try builder.metadataString("Debug Info Version"), - try builder.debugConstant(i32_3), + try builder.metadataConstant(i32_3), ); switch (comp.config.debug_format) { @@ -908,9 +908,9 @@ pub const Object = struct { .dwarf => |f| { const i32_4 = try builder.intConst(.i32, 4); const dwarf_version = try builder.debugModuleFlag( - try builder.debugConstant(i32_2), + try builder.metadataConstant(i32_2), try builder.metadataString("Dwarf Version"), - try builder.debugConstant(i32_4), + try builder.metadataConstant(i32_4), ); switch (f) { .@"32" => { @@ -921,9 +921,9 @@ pub const Object = struct { }, .@"64" => { const dwarf64 = try builder.debugModuleFlag( - try builder.debugConstant(i32_2), + try builder.metadataConstant(i32_2), try builder.metadataString("DWARF64"), - try builder.debugConstant(.@"1"), + try builder.metadataConstant(.@"1"), ); try builder.debugNamed(try builder.metadataString("llvm.module.flags"), &.{ debug_info_version, @@ -935,9 +935,9 @@ pub const Object = struct { }, .code_view => { const code_view = try builder.debugModuleFlag( - try builder.debugConstant(i32_2), + try builder.metadataConstant(i32_2), try builder.metadataString("CodeView"), - try builder.debugConstant(.@"1"), + try builder.metadataConstant(.@"1"), ); try builder.debugNamed(try builder.metadataString("llvm.module.flags"), &.{ debug_info_version, @@ -1122,12 +1122,12 @@ pub const Object = struct { self.builder.debugForwardReferenceSetType( self.debug_enums_fwd_ref, - try self.builder.debugTuple(self.debug_enums.items), + try self.builder.metadataTuple(self.debug_enums.items), ); self.builder.debugForwardReferenceSetType( self.debug_globals_fwd_ref, - try self.builder.debugTuple(self.debug_globals.items), + try self.builder.metadataTuple(self.debug_globals.items), ); } } @@ -1369,7 +1369,7 @@ pub const Object = struct { _ = try attributes.removeFnAttr(.alignstack); } - if (func_analysis.is_cold) { + if (func_analysis.branch_hint == .cold) { try attributes.addFnAttr(.cold, &o.builder); } else { _ = try attributes.removeFnAttr(.cold); @@ -1978,7 +1978,7 @@ pub const Object = struct { try o.lowerDebugType(int_ty), ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(enumerators), + try o.builder.metadataTuple(enumerators), ); try o.debug_type_map.put(gpa, ty, debug_enum_type); @@ -2087,7 +2087,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(&.{ + try o.builder.metadataTuple(&.{ debug_ptr_type, debug_len_type, }), @@ -2167,10 +2167,10 @@ pub const Object = struct { try o.lowerDebugType(ty.childType(zcu)), ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(&.{ + try o.builder.metadataTuple(&.{ try o.builder.debugSubrange( - try o.builder.debugConstant(try o.builder.intConst(.i64, 0)), - try o.builder.debugConstant(try o.builder.intConst(.i64, ty.arrayLen(zcu))), + try o.builder.metadataConstant(try o.builder.intConst(.i64, 0)), + try o.builder.metadataConstant(try o.builder.intConst(.i64, ty.arrayLen(zcu))), ), }), ); @@ -2210,10 +2210,10 @@ pub const Object = struct { debug_elem_type, ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(&.{ + try o.builder.metadataTuple(&.{ try o.builder.debugSubrange( - try o.builder.debugConstant(try o.builder.intConst(.i64, 0)), - try o.builder.debugConstant(try o.builder.intConst(.i64, ty.vectorLen(zcu))), + try o.builder.metadataConstant(try o.builder.intConst(.i64, 0)), + try o.builder.metadataConstant(try o.builder.intConst(.i64, ty.vectorLen(zcu))), ), }), ); @@ -2288,7 +2288,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(&.{ + try o.builder.metadataTuple(&.{ debug_data_type, debug_some_type, }), @@ -2367,7 +2367,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(&fields), + try o.builder.metadataTuple(&fields), ); o.builder.debugForwardReferenceSetType(debug_fwd_ref, debug_error_union_type); @@ -2447,7 +2447,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(fields.items), + try o.builder.metadataTuple(fields.items), ); o.builder.debugForwardReferenceSetType(debug_fwd_ref, debug_struct_type); @@ -2520,7 +2520,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(fields.items), + try o.builder.metadataTuple(fields.items), ); o.builder.debugForwardReferenceSetType(debug_fwd_ref, debug_struct_type); @@ -2561,7 +2561,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple( + try o.builder.metadataTuple( &.{try o.lowerDebugType(Type.fromInterned(union_type.enum_tag_ty))}, ), ); @@ -2623,7 +2623,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(fields.items), + try o.builder.metadataTuple(fields.items), ); o.builder.debugForwardReferenceSetType(debug_union_fwd_ref, debug_union_type); @@ -2682,7 +2682,7 @@ pub const Object = struct { .none, // Underlying type ty.abiSize(zcu) * 8, (ty.abiAlignment(zcu).toByteUnits() orelse 0) * 8, - try o.builder.debugTuple(&full_fields), + try o.builder.metadataTuple(&full_fields), ); o.builder.debugForwardReferenceSetType(debug_fwd_ref, debug_tagged_union_type); @@ -2735,7 +2735,7 @@ pub const Object = struct { } const debug_function_type = try o.builder.debugSubroutineType( - try o.builder.debugTuple(debug_param_types.items), + try o.builder.metadataTuple(debug_param_types.items), ); try o.debug_type_map.put(gpa, ty, debug_function_type); @@ -4571,7 +4571,7 @@ pub const Object = struct { const bad_value_block = try wip.block(1, "BadValue"); const tag_int_value = wip.arg(0); var wip_switch = - try wip.@"switch"(tag_int_value, bad_value_block, @intCast(enum_type.names.len)); + try wip.@"switch"(tag_int_value, bad_value_block, @intCast(enum_type.names.len), .none); defer wip_switch.finish(&wip); for (0..enum_type.names.len) |field_index| { @@ -4958,8 +4958,10 @@ pub const FuncGen = struct { .ret_addr => try self.airRetAddr(inst), .frame_addr => try self.airFrameAddress(inst), .cond_br => try self.airCondBr(inst), - .@"try" => try self.airTry(body[i..]), - .try_ptr => try self.airTryPtr(inst), + .@"try" => try self.airTry(body[i..], false), + .try_cold => try self.airTry(body[i..], true), + .try_ptr => try self.airTryPtr(inst, false), + .try_ptr_cold => try self.airTryPtr(inst, true), .intcast => try self.airIntCast(inst), .trunc => try self.airTrunc(inst), .fptrunc => try self.airFptrunc(inst), @@ -5506,6 +5508,7 @@ pub const FuncGen = struct { const panic_nav = ip.getNav(panic_func.owner_nav); const fn_info = zcu.typeToFunc(Type.fromInterned(panic_nav.typeOf(ip))).?; const panic_global = try o.resolveLlvmFunction(panic_func.owner_nav); + _ = try fg.wip.callIntrinsicAssumeCold(); _ = try fg.wip.call( .normal, toLlvmCallConv(fn_info.cc, target), @@ -5794,7 +5797,7 @@ pub const FuncGen = struct { const mixed_block = try self.wip.block(1, "Mixed"); const both_pl_block = try self.wip.block(1, "BothNonNull"); const end_block = try self.wip.block(3, "End"); - var wip_switch = try self.wip.@"switch"(lhs_rhs_ored, mixed_block, 2); + var wip_switch = try self.wip.@"switch"(lhs_rhs_ored, mixed_block, 2, .none); defer wip_switch.finish(&self.wip); try wip_switch.addCase( try o.builder.intConst(llvm_i2, 0b00), @@ -5948,21 +5951,62 @@ pub const FuncGen = struct { const then_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.then_body_len]); const else_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]); + const Hint = enum { + none, + unpredictable, + then_likely, + else_likely, + then_cold, + else_cold, + }; + const hint: Hint = switch (extra.data.branch_hints.true) { + .none => switch (extra.data.branch_hints.false) { + .none => .none, + .likely => .else_likely, + .unlikely => .then_likely, + .cold => .else_cold, + .unpredictable => .unpredictable, + }, + .likely => switch (extra.data.branch_hints.false) { + .none => .then_likely, + .likely => .unpredictable, + .unlikely => .then_likely, + .cold => .else_cold, + .unpredictable => .unpredictable, + }, + .unlikely => switch (extra.data.branch_hints.false) { + .none => .else_likely, + .likely => .else_likely, + .unlikely => .unpredictable, + .cold => .else_cold, + .unpredictable => .unpredictable, + }, + .cold => .then_cold, + .unpredictable => .unpredictable, + }; + const then_block = try self.wip.block(1, "Then"); const else_block = try self.wip.block(1, "Else"); - _ = try self.wip.brCond(cond, then_block, else_block); + _ = try self.wip.brCond(cond, then_block, else_block, switch (hint) { + .none, .then_cold, .else_cold => .none, + .unpredictable => .unpredictable, + .then_likely => .then_likely, + .else_likely => .else_likely, + }); self.wip.cursor = .{ .block = then_block }; + if (hint == .then_cold) _ = try self.wip.callIntrinsicAssumeCold(); try self.genBodyDebugScope(null, then_body); self.wip.cursor = .{ .block = else_block }; + if (hint == .else_cold) _ = try self.wip.callIntrinsicAssumeCold(); try self.genBodyDebugScope(null, else_body); // No need to reset the insert cursor since this instruction is noreturn. return .none; } - fn airTry(self: *FuncGen, body_tail: []const Air.Inst.Index) !Builder.Value { + fn airTry(self: *FuncGen, body_tail: []const Air.Inst.Index, err_cold: bool) !Builder.Value { const o = self.ng.object; const pt = o.pt; const zcu = pt.zcu; @@ -5975,10 +6019,10 @@ pub const FuncGen = struct { const payload_ty = self.typeOfIndex(inst); const can_elide_load = if (isByRef(payload_ty, zcu)) self.canElideLoad(body_tail) else false; const is_unused = self.liveness.isUnused(inst); - return lowerTry(self, err_union, body, err_union_ty, false, can_elide_load, is_unused); + return lowerTry(self, err_union, body, err_union_ty, false, can_elide_load, is_unused, err_cold); } - fn airTryPtr(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { + fn airTryPtr(self: *FuncGen, inst: Air.Inst.Index, err_cold: bool) !Builder.Value { const o = self.ng.object; const zcu = o.pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -5987,7 +6031,7 @@ pub const FuncGen = struct { const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]); const err_union_ty = self.typeOf(extra.data.ptr).childType(zcu); const is_unused = self.liveness.isUnused(inst); - return lowerTry(self, err_union_ptr, body, err_union_ty, true, true, is_unused); + return lowerTry(self, err_union_ptr, body, err_union_ty, true, true, is_unused, err_cold); } fn lowerTry( @@ -5998,6 +6042,7 @@ pub const FuncGen = struct { operand_is_ptr: bool, can_elide_load: bool, is_unused: bool, + err_cold: bool, ) !Builder.Value { const o = fg.ng.object; const pt = o.pt; @@ -6036,9 +6081,10 @@ pub const FuncGen = struct { const return_block = try fg.wip.block(1, "TryRet"); const continue_block = try fg.wip.block(1, "TryCont"); - _ = try fg.wip.brCond(is_err, return_block, continue_block); + _ = try fg.wip.brCond(is_err, return_block, continue_block, if (err_cold) .none else .else_likely); fg.wip.cursor = .{ .block = return_block }; + if (err_cold) _ = try fg.wip.callIntrinsicAssumeCold(); try fg.genBodyDebugScope(null, body); fg.wip.cursor = .{ .block = continue_block }; @@ -6065,9 +6111,11 @@ pub const FuncGen = struct { fn airSwitchBr(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const o = self.ng.object; - const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const cond = try self.resolveInst(pl_op.operand); - const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); + + const switch_br = self.air.unwrapSwitch(inst); + + const cond = try self.resolveInst(switch_br.operand); + const else_block = try self.wip.block(1, "Default"); const llvm_usize = try o.lowerType(Type.usize); const cond_int = if (cond.typeOfWip(&self.wip).isPointer(&o.builder)) @@ -6075,34 +6123,70 @@ pub const FuncGen = struct { else cond; - var extra_index: usize = switch_br.end; - var case_i: u32 = 0; - var llvm_cases_len: u32 = 0; - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = - @ptrCast(self.air.extra[case.end..][0..case.data.items_len]); - const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len]; - extra_index = case.end + case.data.items_len + case_body.len; + const llvm_cases_len = llvm_cases_len: { + var len: u32 = 0; + var it = switch_br.iterateCases(); + while (it.next()) |case| len += @intCast(case.items.len); + break :llvm_cases_len len; + }; - llvm_cases_len += @intCast(items.len); - } + const weights: Builder.Function.Instruction.BrCond.Weights = weights: { + // First pass. If any weights are `.unpredictable`, unpredictable. + // If all are `.none` or `.cold`, none. + var any_likely = false; + for (0..switch_br.cases_len) |case_idx| { + switch (switch_br.getHint(@intCast(case_idx))) { + .none, .cold => {}, + .likely, .unlikely => any_likely = true, + .unpredictable => break :weights .unpredictable, + } + } + switch (switch_br.getElseHint()) { + .none, .cold => {}, + .likely, .unlikely => any_likely = true, + .unpredictable => break :weights .unpredictable, + } + if (!any_likely) break :weights .none; - var wip_switch = try self.wip.@"switch"(cond_int, else_block, llvm_cases_len); + var weights = try self.gpa.alloc(Builder.Metadata, llvm_cases_len + 1); + defer self.gpa.free(weights); + + const else_weight: u32 = switch (switch_br.getElseHint()) { + .unpredictable => unreachable, + .none, .cold => 1000, + .likely => 2000, + .unlikely => 1, + }; + weights[0] = try o.builder.metadataConstant(try o.builder.intConst(.i32, else_weight)); + + var weight_idx: usize = 1; + var it = switch_br.iterateCases(); + while (it.next()) |case| { + const weight_val: u32 = switch (switch_br.getHint(case.idx)) { + .unpredictable => unreachable, + .none, .cold => 1000, + .likely => 2000, + .unlikely => 1, + }; + const weight_meta = try o.builder.metadataConstant(try o.builder.intConst(.i32, weight_val)); + @memset(weights[weight_idx..][0..case.items.len], weight_meta); + weight_idx += case.items.len; + } + + assert(weight_idx == weights.len); + + const branch_weights_str = try o.builder.metadataString("branch_weights"); + const tuple = try o.builder.strTuple(branch_weights_str, weights); + break :weights @enumFromInt(@intFromEnum(tuple)); + }; + + var wip_switch = try self.wip.@"switch"(cond_int, else_block, llvm_cases_len, weights); defer wip_switch.finish(&self.wip); - extra_index = switch_br.end; - case_i = 0; - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = - @ptrCast(self.air.extra[case.end..][0..case.data.items_len]); - const case_body: []const Air.Inst.Index = @ptrCast(self.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + case.data.items_len + case_body.len; - - const case_block = try self.wip.block(@intCast(items.len), "Case"); - - for (items) |item| { + var it = switch_br.iterateCases(); + while (it.next()) |case| { + const case_block = try self.wip.block(@intCast(case.items.len), "Case"); + for (case.items) |item| { const llvm_item = (try self.resolveInst(item)).toConst().?; const llvm_int_item = if (llvm_item.typeOf(&o.builder).isPointer(&o.builder)) try o.builder.castConst(.ptrtoint, llvm_item, llvm_usize) @@ -6110,13 +6194,14 @@ pub const FuncGen = struct { llvm_item; try wip_switch.addCase(llvm_int_item, case_block, &self.wip); } - self.wip.cursor = .{ .block = case_block }; - try self.genBodyDebugScope(null, case_body); + if (switch_br.getHint(case.idx) == .cold) _ = try self.wip.callIntrinsicAssumeCold(); + try self.genBodyDebugScope(null, case.body); } + const else_body = it.elseBody(); self.wip.cursor = .{ .block = else_block }; - const else_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]); + if (switch_br.getElseHint() == .cold) _ = try self.wip.callIntrinsicAssumeCold(); if (else_body.len != 0) { try self.genBodyDebugScope(null, else_body); } else { @@ -7748,7 +7833,7 @@ pub const FuncGen = struct { const fail_block = try fg.wip.block(1, "OverflowFail"); const ok_block = try fg.wip.block(1, "OverflowOk"); - _ = try fg.wip.brCond(overflow_bit, fail_block, ok_block); + _ = try fg.wip.brCond(overflow_bit, fail_block, ok_block, .none); fg.wip.cursor = .{ .block = fail_block }; try fg.buildSimplePanic(.integer_overflow); @@ -9389,7 +9474,7 @@ pub const FuncGen = struct { self.wip.cursor = .{ .block = loop_block }; const it_ptr = try self.wip.phi(.ptr, ""); const end = try self.wip.icmp(.ne, it_ptr.toValue(), end_ptr, ""); - _ = try self.wip.brCond(end, body_block, end_block); + _ = try self.wip.brCond(end, body_block, end_block, .none); self.wip.cursor = .{ .block = body_block }; const elem_abi_align = elem_ty.abiAlignment(zcu); @@ -9427,7 +9512,7 @@ pub const FuncGen = struct { const cond = try self.cmp(.normal, .neq, Type.usize, len, usize_zero); const memset_block = try self.wip.block(1, "MemsetTrapSkip"); const end_block = try self.wip.block(2, "MemsetTrapEnd"); - _ = try self.wip.brCond(cond, memset_block, end_block); + _ = try self.wip.brCond(cond, memset_block, end_block, .none); self.wip.cursor = .{ .block = memset_block }; _ = try self.wip.callMemSet(dest_ptr, dest_ptr_align, fill_byte, len, access_kind); _ = try self.wip.br(end_block); @@ -9462,7 +9547,7 @@ pub const FuncGen = struct { const cond = try self.cmp(.normal, .neq, Type.usize, len, usize_zero); const memcpy_block = try self.wip.block(1, "MemcpyTrapSkip"); const end_block = try self.wip.block(2, "MemcpyTrapEnd"); - _ = try self.wip.brCond(cond, memcpy_block, end_block); + _ = try self.wip.brCond(cond, memcpy_block, end_block, .none); self.wip.cursor = .{ .block = memcpy_block }; _ = try self.wip.callMemCpy( dest_ptr, @@ -9632,7 +9717,7 @@ pub const FuncGen = struct { const valid_block = try self.wip.block(@intCast(names.len), "Valid"); const invalid_block = try self.wip.block(1, "Invalid"); const end_block = try self.wip.block(2, "End"); - var wip_switch = try self.wip.@"switch"(operand, invalid_block, @intCast(names.len)); + var wip_switch = try self.wip.@"switch"(operand, invalid_block, @intCast(names.len), .none); defer wip_switch.finish(&self.wip); for (0..names.len) |name_index| { @@ -9708,7 +9793,7 @@ pub const FuncGen = struct { const named_block = try wip.block(@intCast(enum_type.names.len), "Named"); const unnamed_block = try wip.block(1, "Unnamed"); const tag_int_value = wip.arg(0); - var wip_switch = try wip.@"switch"(tag_int_value, unnamed_block, @intCast(enum_type.names.len)); + var wip_switch = try wip.@"switch"(tag_int_value, unnamed_block, @intCast(enum_type.names.len), .none); defer wip_switch.finish(&wip); for (0..enum_type.names.len) |field_index| { @@ -9858,7 +9943,7 @@ pub const FuncGen = struct { const cond = try self.wip.icmp(.ult, i, llvm_vector_len, ""); const loop_then = try self.wip.block(1, "ReduceLoopThen"); - _ = try self.wip.brCond(cond, loop_then, loop_exit); + _ = try self.wip.brCond(cond, loop_then, loop_exit, .none); { self.wip.cursor = .{ .block = loop_then }; diff --git a/src/codegen/llvm/Builder.zig b/src/codegen/llvm/Builder.zig index f3acbd5224..9ada51acad 100644 --- a/src/codegen/llvm/Builder.zig +++ b/src/codegen/llvm/Builder.zig @@ -4817,12 +4817,22 @@ pub const Function = struct { cond: Value, then: Block.Index, @"else": Block.Index, + weights: Weights, + pub const Weights = enum(u32) { + // We can do this as metadata indices 0 and 1 are reserved. + none = 0, + unpredictable = 1, + /// These values should be converted to `Metadata` to be used + /// in a `prof` annotation providing branch weights. + _, + }; }; pub const Switch = struct { val: Value, default: Block.Index, cases_len: u32, + weights: BrCond.Weights, //case_vals: [cases_len]Constant, //case_blocks: [cases_len]Block.Index, }; @@ -4969,7 +4979,8 @@ pub const Function = struct { }; pub const Info = packed struct(u32) { call_conv: CallConv, - _: u22 = undefined, + has_op_bundle_cold: bool, + _: u21 = undefined, }; }; @@ -5036,6 +5047,7 @@ pub const Function = struct { FunctionAttributes, Type, Value, + Instruction.BrCond.Weights, => @enumFromInt(value), MemoryAccessInfo, Instruction.Alloca.Info, @@ -5201,6 +5213,7 @@ pub const WipFunction = struct { cond: Value, then: Block.Index, @"else": Block.Index, + weights: enum { none, unpredictable, then_likely, else_likely }, ) Allocator.Error!Instruction.Index { assert(cond.typeOfWip(self) == .i1); try self.ensureUnusedExtraCapacity(1, Instruction.BrCond, 0); @@ -5210,6 +5223,22 @@ pub const WipFunction = struct { .cond = cond, .then = then, .@"else" = @"else", + .weights = switch (weights) { + .none => .none, + .unpredictable => .unpredictable, + .then_likely, .else_likely => w: { + const branch_weights_str = try self.builder.metadataString("branch_weights"); + const unlikely_const = try self.builder.metadataConstant(try self.builder.intConst(.i32, 1)); + const likely_const = try self.builder.metadataConstant(try self.builder.intConst(.i32, 2000)); + const weight_vals: [2]Metadata = switch (weights) { + .none, .unpredictable => unreachable, + .then_likely => .{ likely_const, unlikely_const }, + .else_likely => .{ unlikely_const, likely_const }, + }; + const tuple = try self.builder.strTuple(branch_weights_str, &weight_vals); + break :w @enumFromInt(@intFromEnum(tuple)); + }, + }, }), }); then.ptr(self).branches += 1; @@ -5248,6 +5277,7 @@ pub const WipFunction = struct { val: Value, default: Block.Index, cases_len: u32, + weights: Instruction.BrCond.Weights, ) Allocator.Error!WipSwitch { try self.ensureUnusedExtraCapacity(1, Instruction.Switch, cases_len * 2); const instruction = try self.addInst(null, .{ @@ -5256,6 +5286,7 @@ pub const WipFunction = struct { .val = val, .default = default, .cases_len = cases_len, + .weights = weights, }), }); _ = self.extra.addManyAsSliceAssumeCapacity(cases_len * 2); @@ -5895,6 +5926,20 @@ pub const WipFunction = struct { callee: Value, args: []const Value, name: []const u8, + ) Allocator.Error!Value { + return self.callInner(kind, call_conv, function_attributes, ty, callee, args, name, false); + } + + fn callInner( + self: *WipFunction, + kind: Instruction.Call.Kind, + call_conv: CallConv, + function_attributes: FunctionAttributes, + ty: Type, + callee: Value, + args: []const Value, + name: []const u8, + has_op_bundle_cold: bool, ) Allocator.Error!Value { const ret_ty = ty.functionReturn(self.builder); assert(ty.isFunction(self.builder)); @@ -5918,7 +5963,10 @@ pub const WipFunction = struct { .tail_fast => .@"tail call fast", }, .data = self.addExtraAssumeCapacity(Instruction.Call{ - .info = .{ .call_conv = call_conv }, + .info = .{ + .call_conv = call_conv, + .has_op_bundle_cold = has_op_bundle_cold, + }, .attributes = function_attributes, .ty = ty, .callee = callee, @@ -5964,6 +6012,20 @@ pub const WipFunction = struct { ); } + pub fn callIntrinsicAssumeCold(self: *WipFunction) Allocator.Error!Value { + const intrinsic = try self.builder.getIntrinsic(.assume, &.{}); + return self.callInner( + .normal, + CallConv.default, + .none, + intrinsic.typeOf(self.builder), + intrinsic.toValue(self.builder), + &.{try self.builder.intValue(.i1, 1)}, + "", + true, + ); + } + pub fn callMemCpy( self: *WipFunction, dst: Value, @@ -6040,7 +6102,7 @@ pub const WipFunction = struct { break :blk metadata; }, - .constant => |constant| try self.builder.debugConstant(constant), + .constant => |constant| try self.builder.metadataConstant(constant), .metadata => |metadata| metadata, }; } @@ -6099,6 +6161,7 @@ pub const WipFunction = struct { FunctionAttributes, Type, Value, + Instruction.BrCond.Weights, => @intFromEnum(value), MemoryAccessInfo, Instruction.Alloca.Info, @@ -6380,6 +6443,7 @@ pub const WipFunction = struct { .cond = instructions.map(extra.cond), .then = extra.then, .@"else" = extra.@"else", + .weights = extra.weights, }); }, .call, @@ -6522,6 +6586,7 @@ pub const WipFunction = struct { .val = instructions.map(extra.data.val), .default = extra.data.default, .cases_len = extra.data.cases_len, + .weights = extra.data.weights, }); wip_extra.appendSlice(case_vals); wip_extra.appendSlice(case_blocks); @@ -6744,6 +6809,7 @@ pub const WipFunction = struct { FunctionAttributes, Type, Value, + Instruction.BrCond.Weights, => @intFromEnum(value), MemoryAccessInfo, Instruction.Alloca.Info, @@ -6792,6 +6858,7 @@ pub const WipFunction = struct { FunctionAttributes, Type, Value, + Instruction.BrCond.Weights, => @enumFromInt(value), MemoryAccessInfo, Instruction.Alloca.Info, @@ -7735,6 +7802,7 @@ pub const Metadata = enum(u32) { enumerator_signed_negative, subrange, tuple, + str_tuple, module_flag, expression, local_var, @@ -7780,6 +7848,7 @@ pub const Metadata = enum(u32) { .enumerator_signed_negative, .subrange, .tuple, + .str_tuple, .module_flag, .local_var, .parameter, @@ -8044,6 +8113,13 @@ pub const Metadata = enum(u32) { // elements: [elements_len]Metadata }; + pub const StrTuple = struct { + str: MetadataString, + elements_len: u32, + + // elements: [elements_len]Metadata + }; + pub const ModuleFlag = struct { behavior: Metadata, name: MetadataString, @@ -8455,11 +8531,12 @@ pub fn init(options: Options) Allocator.Error!Builder { assert(try self.intConst(.i32, 0) == .@"0"); assert(try self.intConst(.i32, 1) == .@"1"); assert(try self.noneConst(.token) == .none); - if (!self.strip) assert(try self.debugNone() == .none); + + assert(try self.metadataNone() == .none); + assert(try self.metadataTuple(&.{}) == .empty_tuple); try self.metadata_string_indices.append(self.gpa, 0); assert(try self.metadataString("") == .none); - assert(try self.debugTuple(&.{}) == .empty_tuple); return self; } @@ -9685,6 +9762,13 @@ pub fn printUnbuffered( extra.then.toInst(&function).fmt(function_index, self), extra.@"else".toInst(&function).fmt(function_index, self), }); + switch (extra.weights) { + .none => {}, + .unpredictable => try writer.writeAll(", !unpredictable !{}"), + _ => try writer.print("{}", .{ + try metadata_formatter.fmt(", !prof ", @as(Metadata, @enumFromInt(@intFromEnum(extra.weights)))), + }), + } }, .call, .@"call fast", @@ -9729,6 +9813,9 @@ pub fn printUnbuffered( }); } try writer.writeByte(')'); + if (extra.data.info.has_op_bundle_cold) { + try writer.writeAll(" [ \"cold\"() ]"); + } const call_function_attributes = extra.data.attributes.func(self); if (call_function_attributes != .none) try writer.print(" #{d}", .{ (try attribute_groups.getOrPutValue( @@ -9939,6 +10026,13 @@ pub fn printUnbuffered( }, ); try writer.writeAll(" ]"); + switch (extra.data.weights) { + .none => {}, + .unpredictable => try writer.writeAll(", !unpredictable !{}"), + _ => try writer.print("{}", .{ + try metadata_formatter.fmt(", !prof ", @as(Metadata, @enumFromInt(@intFromEnum(extra.data.weights)))), + }), + } }, .va_arg => |tag| { const extra = function.extraData(Function.Instruction.VaArg, instruction.data); @@ -10287,6 +10381,17 @@ pub fn printUnbuffered( }); try writer.writeAll("}\n"); }, + .str_tuple => { + var extra = self.metadataExtraDataTrail(Metadata.StrTuple, metadata_item.data); + const elements = extra.trail.next(extra.data.elements_len, Metadata, self); + try writer.print("!{{{[str]%}", .{ + .str = try metadata_formatter.fmt("", extra.data.str), + }); + for (elements) |element| try writer.print("{[element]%}", .{ + .element = try metadata_formatter.fmt("", element), + }); + try writer.writeAll("}\n"); + }, .module_flag => { const extra = self.metadataExtraData(Metadata.ModuleFlag, metadata_item.data); try writer.print("!{{{[behavior]%}{[name]%}{[constant]%}}}\n", .{ @@ -11799,9 +11904,9 @@ pub fn debugNamed(self: *Builder, name: MetadataString, operands: []const Metada self.debugNamedAssumeCapacity(name, operands); } -fn debugNone(self: *Builder) Allocator.Error!Metadata { +fn metadataNone(self: *Builder) Allocator.Error!Metadata { try self.ensureUnusedMetadataCapacity(1, NoExtra, 0); - return self.debugNoneAssumeCapacity(); + return self.metadataNoneAssumeCapacity(); } pub fn debugFile( @@ -12090,12 +12195,21 @@ pub fn debugExpression( return self.debugExpressionAssumeCapacity(elements); } -pub fn debugTuple( +pub fn metadataTuple( self: *Builder, elements: []const Metadata, ) Allocator.Error!Metadata { try self.ensureUnusedMetadataCapacity(1, Metadata.Tuple, elements.len); - return self.debugTupleAssumeCapacity(elements); + return self.metadataTupleAssumeCapacity(elements); +} + +pub fn strTuple( + self: *Builder, + str: MetadataString, + elements: []const Metadata, +) Allocator.Error!Metadata { + try self.ensureUnusedMetadataCapacity(1, Metadata.StrTuple, elements.len); + return self.strTupleAssumeCapacity(str, elements); } pub fn debugModuleFlag( @@ -12166,9 +12280,9 @@ pub fn debugGlobalVarExpression( return self.debugGlobalVarExpressionAssumeCapacity(variable, expression); } -pub fn debugConstant(self: *Builder, value: Constant) Allocator.Error!Metadata { +pub fn metadataConstant(self: *Builder, value: Constant) Allocator.Error!Metadata { try self.ensureUnusedMetadataCapacity(1, NoExtra, 0); - return self.debugConstantAssumeCapacity(value); + return self.metadataConstantAssumeCapacity(value); } pub fn debugForwardReferenceSetType(self: *Builder, fwd_ref: Metadata, ty: Metadata) void { @@ -12263,8 +12377,7 @@ fn debugNamedAssumeCapacity(self: *Builder, name: MetadataString, operands: []co }; } -pub fn debugNoneAssumeCapacity(self: *Builder) Metadata { - assert(!self.strip); +pub fn metadataNoneAssumeCapacity(self: *Builder) Metadata { return self.metadataSimpleAssumeCapacity(.none, .{}); } @@ -12740,11 +12853,10 @@ fn debugExpressionAssumeCapacity( return @enumFromInt(gop.index); } -fn debugTupleAssumeCapacity( +fn metadataTupleAssumeCapacity( self: *Builder, elements: []const Metadata, ) Metadata { - assert(!self.strip); const Key = struct { elements: []const Metadata, }; @@ -12787,6 +12899,55 @@ fn debugTupleAssumeCapacity( return @enumFromInt(gop.index); } +fn strTupleAssumeCapacity( + self: *Builder, + str: MetadataString, + elements: []const Metadata, +) Metadata { + const Key = struct { + str: MetadataString, + elements: []const Metadata, + }; + const Adapter = struct { + builder: *const Builder, + pub fn hash(_: @This(), key: Key) u32 { + var hasher = comptime std.hash.Wyhash.init(std.hash.uint32(@intFromEnum(Metadata.Tag.tuple))); + hasher.update(std.mem.sliceAsBytes(key.elements)); + return @truncate(hasher.final()); + } + + pub fn eql(ctx: @This(), lhs_key: Key, _: void, rhs_index: usize) bool { + if (.str_tuple != ctx.builder.metadata_items.items(.tag)[rhs_index]) return false; + const rhs_data = ctx.builder.metadata_items.items(.data)[rhs_index]; + var rhs_extra = ctx.builder.metadataExtraDataTrail(Metadata.StrTuple, rhs_data); + return rhs_extra.data.str == lhs_key.str and std.mem.eql( + Metadata, + lhs_key.elements, + rhs_extra.trail.next(rhs_extra.data.elements_len, Metadata, ctx.builder), + ); + } + }; + + const gop = self.metadata_map.getOrPutAssumeCapacityAdapted( + Key{ .str = str, .elements = elements }, + Adapter{ .builder = self }, + ); + + if (!gop.found_existing) { + gop.key_ptr.* = {}; + gop.value_ptr.* = {}; + self.metadata_items.appendAssumeCapacity(.{ + .tag = .str_tuple, + .data = self.addMetadataExtraAssumeCapacity(Metadata.StrTuple{ + .str = str, + .elements_len = @intCast(elements.len), + }), + }); + self.metadata_extra.appendSliceAssumeCapacity(@ptrCast(elements)); + } + return @enumFromInt(gop.index); +} + fn debugModuleFlagAssumeCapacity( self: *Builder, behavior: Metadata, @@ -12877,8 +13038,7 @@ fn debugGlobalVarExpressionAssumeCapacity( }); } -fn debugConstantAssumeCapacity(self: *Builder, constant: Constant) Metadata { - assert(!self.strip); +fn metadataConstantAssumeCapacity(self: *Builder, constant: Constant) Metadata { const Adapter = struct { builder: *const Builder, pub fn hash(_: @This(), key: Constant) u32 { @@ -13757,15 +13917,18 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co } // METADATA_KIND_BLOCK - if (!self.strip) { + { const MetadataKindBlock = ir.MetadataKindBlock; var metadata_kind_block = try module_block.enterSubBlock(MetadataKindBlock, true); inline for (@typeInfo(ir.FixedMetadataKind).Enum.fields) |field| { - try metadata_kind_block.writeAbbrev(MetadataKindBlock.Kind{ - .id = field.value, - .name = field.name, - }); + // don't include `dbg` in stripped functions + if (!(self.strip and std.mem.eql(u8, field.name, "dbg"))) { + try metadata_kind_block.writeAbbrev(MetadataKindBlock.Kind{ + .id = field.value, + .name = field.name, + }); + } } try metadata_kind_block.end(); @@ -13810,14 +13973,14 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co const metadata_adapter = MetadataAdapter.init(self, constant_adapter); // METADATA_BLOCK - if (!self.strip) { + { const MetadataBlock = ir.MetadataBlock; var metadata_block = try module_block.enterSubBlock(MetadataBlock, true); const MetadataBlockWriter = @TypeOf(metadata_block); // Emit all MetadataStrings - { + if (self.metadata_string_map.count() > 1) { const strings_offset, const strings_size = blk: { var strings_offset: u32 = 0; var strings_size: u32 = 0; @@ -14087,6 +14250,22 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co .elements = elements, }, metadata_adapter); }, + .str_tuple => { + var extra = self.metadataExtraDataTrail(Metadata.StrTuple, data); + + const elements = extra.trail.next(extra.data.elements_len, Metadata, self); + + const all_elems = try self.gpa.alloc(Metadata, elements.len + 1); + defer self.gpa.free(all_elems); + all_elems[0] = @enumFromInt(metadata_adapter.getMetadataStringIndex(extra.data.str)); + for (elements, all_elems[1..]) |elem, *out_elem| { + out_elem.* = @enumFromInt(metadata_adapter.getMetadataIndex(elem)); + } + + try metadata_block.writeAbbrev(MetadataBlock.Node{ + .elements = all_elems, + }); + }, .module_flag => { const extra = self.metadataExtraData(Metadata.ModuleFlag, data); try metadata_block.writeAbbrev(MetadataBlock.Node{ @@ -14188,6 +14367,18 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co try metadata_block.end(); } + // OPERAND_BUNDLE_TAGS_BLOCK + { + const OperandBundleTags = ir.OperandBundleTags; + var operand_bundle_tags_block = try module_block.enterSubBlock(OperandBundleTags, true); + + try operand_bundle_tags_block.writeAbbrev(OperandBundleTags.OperandBundleTag{ + .tag = "cold", + }); + + try operand_bundle_tags_block.end(); + } + // Block info { const BlockInfo = ir.BlockInfo; @@ -14243,7 +14434,6 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co .instruction => |instruction| instruction.valueIndex(adapter.func) + adapter.firstInstr(), .constant => |constant| adapter.constant_adapter.getConstantIndex(constant), .metadata => |metadata| { - assert(!adapter.func.strip); const real_metadata = metadata.unwrap(adapter.metadata_adapter.builder); if (@intFromEnum(real_metadata) < Metadata.first_local_metadata) return adapter.metadata_adapter.getMetadataIndex(real_metadata) - 1; @@ -14335,6 +14525,10 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co => |kind| { var extra = func.extraDataTrail(Function.Instruction.Call, data); + if (extra.data.info.has_op_bundle_cold) { + try function_block.writeAbbrev(FunctionBlock.ColdOperandBundle{}); + } + const call_conv = extra.data.info.call_conv; const args = extra.trail.next(extra.data.args_len, Value, &func); try function_block.writeAbbrevAdapted(FunctionBlock.Call{ @@ -14358,6 +14552,10 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co => |kind| { var extra = func.extraDataTrail(Function.Instruction.Call, data); + if (extra.data.info.has_op_bundle_cold) { + try function_block.writeAbbrev(FunctionBlock.ColdOperandBundle{}); + } + const call_conv = extra.data.info.call_conv; const args = extra.trail.next(extra.data.args_len, Value, &func); try function_block.writeAbbrevAdapted(FunctionBlock.CallFast{ @@ -14837,14 +15035,14 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co } // METADATA_ATTACHMENT_BLOCK - const any_nosanitize = true; - if (!func.strip or any_nosanitize) { + { const MetadataAttachmentBlock = ir.MetadataAttachmentBlock; var metadata_attach_block = try function_block.enterSubBlock(MetadataAttachmentBlock, false); - if (!func.strip) blk: { + dbg: { + if (func.strip) break :dbg; const dbg = func.global.ptrConst(self).dbg; - if (dbg == .none) break :blk; + if (dbg == .none) break :dbg; try metadata_attach_block.writeAbbrev(MetadataAttachmentBlock.AttachmentGlobalSingle{ .kind = .dbg, .metadata = @enumFromInt(metadata_adapter.getMetadataIndex(dbg) - 1), @@ -14852,14 +15050,30 @@ pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]co } var instr_index: u32 = 0; - for (func.instructions.items(.tag)) |instr_tag| switch (instr_tag) { - .arg, .block => {}, + for (func.instructions.items(.tag), func.instructions.items(.data)) |instr_tag, data| switch (instr_tag) { + .arg, .block => {}, // not an actual instruction else => { - try metadata_attach_block.writeAbbrev(MetadataAttachmentBlock.AttachmentInstructionSingle{ - .inst = instr_index, - .kind = .nosanitize, - .metadata = @enumFromInt(metadata_adapter.getMetadataIndex(.empty_tuple) - 1), - }); + instr_index += 1; + }, + .br_cond, .@"switch" => { + const weights = switch (instr_tag) { + .br_cond => func.extraData(Function.Instruction.BrCond, data).weights, + .@"switch" => func.extraData(Function.Instruction.Switch, data).weights, + else => unreachable, + }; + switch (weights) { + .none => {}, + .unpredictable => try metadata_attach_block.writeAbbrev(MetadataAttachmentBlock.AttachmentInstructionSingle{ + .inst = instr_index, + .kind = .unpredictable, + .metadata = @enumFromInt(metadata_adapter.getMetadataIndex(.empty_tuple) - 1), + }), + _ => try metadata_attach_block.writeAbbrev(MetadataAttachmentBlock.AttachmentInstructionSingle{ + .inst = instr_index, + .kind = .prof, + .metadata = @enumFromInt(metadata_adapter.getMetadataIndex(@enumFromInt(@intFromEnum(weights))) - 1), + }), + } instr_index += 1; }, }; diff --git a/src/codegen/llvm/ir.zig b/src/codegen/llvm/ir.zig index 3b8cc14fcf..4d7effdaaf 100644 --- a/src/codegen/llvm/ir.zig +++ b/src/codegen/llvm/ir.zig @@ -25,7 +25,7 @@ const BlockAbbrev = AbbrevOp{ .vbr = 6 }; pub const FixedMetadataKind = enum(u8) { dbg = 0, //tbaa = 1, - //prof = 2, + prof = 2, //fpmath = 3, //range = 4, //@"tbaa.struct" = 5, @@ -38,7 +38,7 @@ pub const FixedMetadataKind = enum(u8) { //dereferenceable = 12, //dereferenceable_or_null = 13, //@"make.implicit" = 14, - //unpredictable = 15, + unpredictable = 15, //@"invariant.group" = 16, //@"align" = 17, //@"llvm.loop" = 18, @@ -54,7 +54,7 @@ pub const FixedMetadataKind = enum(u8) { //vcall_visibility = 28, //noundef = 29, //annotation = 30, - nosanitize = 31, + //nosanitize = 31, //func_sanitize = 32, //exclude = 33, //memprof = 34, @@ -1220,6 +1220,20 @@ pub const MetadataBlock = struct { }; }; +pub const OperandBundleTags = struct { + pub const id = 21; + + pub const abbrevs = [_]type{OperandBundleTag}; + + pub const OperandBundleTag = struct { + pub const ops = [_]AbbrevOp{ + .{ .literal = 1 }, + .array_char6, + }; + tag: []const u8, + }; +}; + pub const FunctionMetadataBlock = struct { pub const id = 15; @@ -1279,6 +1293,7 @@ pub const FunctionBlock = struct { Fence, DebugLoc, DebugLocAgain, + ColdOperandBundle, }; pub const DeclareBlocks = struct { @@ -1791,6 +1806,13 @@ pub const FunctionBlock = struct { .{ .literal = 33 }, }; }; + + pub const ColdOperandBundle = struct { + pub const ops = [_]AbbrevOp{ + .{ .literal = 55 }, + .{ .literal = 0 }, + }; + }; }; pub const FunctionValueSymbolTable = struct { diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig index adab565508..68961d2241 100644 --- a/src/codegen/spirv.zig +++ b/src/codegen/spirv.zig @@ -6173,11 +6173,10 @@ const NavGen = struct { const pt = self.pt; const zcu = pt.zcu; const target = self.getTarget(); - const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const cond_ty = self.typeOf(pl_op.operand); - const cond = try self.resolve(pl_op.operand); + const switch_br = self.air.unwrapSwitch(inst); + const cond_ty = self.typeOf(switch_br.operand); + const cond = try self.resolve(switch_br.operand); var cond_indirect = try self.convertToIndirect(cond_ty, cond); - const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); const cond_words: u32 = switch (cond_ty.zigTypeTag(zcu)) { .Bool, .ErrorSet => 1, @@ -6204,18 +6203,15 @@ const NavGen = struct { else => return self.todo("implement switch for type {s}", .{@tagName(cond_ty.zigTypeTag(zcu))}), }; - const num_cases = switch_br.data.cases_len; + const num_cases = switch_br.cases_len; // Compute the total number of arms that we need. // Zig switches are grouped by condition, so we need to loop through all of them const num_conditions = blk: { - var extra_index: usize = switch_br.end; var num_conditions: u32 = 0; - for (0..num_cases) |_| { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const case_body = self.air.extra[case.end + case.data.items_len ..][0..case.data.body_len]; - extra_index = case.end + case.data.items_len + case_body.len; - num_conditions += case.data.items_len; + var it = switch_br.iterateCases(); + while (it.next()) |case| { + num_conditions += @intCast(case.items.len); } break :blk num_conditions; }; @@ -6244,17 +6240,12 @@ const NavGen = struct { // Emit each of the cases { - var extra_index: usize = switch_br.end; - for (0..num_cases) |case_i| { + var it = switch_br.iterateCases(); + while (it.next()) |case| { // SPIR-V needs a literal here, which' width depends on the case condition. - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = @ptrCast(self.air.extra[case.end..][0..case.data.items_len]); - const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len]; - extra_index = case.end + case.data.items_len + case_body.len; + const label = case_labels.at(case.idx); - const label = case_labels.at(case_i); - - for (items) |item| { + for (case.items) |item| { const value = (try self.air.value(item, pt)) orelse unreachable; const int_val: u64 = switch (cond_ty.zigTypeTag(zcu)) { .Bool, .Int => if (cond_ty.isSignedInt(zcu)) @bitCast(value.toSignedInt(zcu)) else value.toUnsignedInt(zcu), @@ -6285,20 +6276,15 @@ const NavGen = struct { } // Now, finally, we can start emitting each of the cases. - var extra_index: usize = switch_br.end; - for (0..num_cases) |case_i| { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items: []const Air.Inst.Ref = @ptrCast(self.air.extra[case.end..][0..case.data.items_len]); - const case_body: []const Air.Inst.Index = @ptrCast(self.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + case.data.items_len + case_body.len; - - const label = case_labels.at(case_i); + var it = switch_br.iterateCases(); + while (it.next()) |case| { + const label = case_labels.at(case.idx); try self.beginSpvBlock(label); switch (self.control_flow) { .structured => { - const next_block = try self.genStructuredBody(.selection, case_body); + const next_block = try self.genStructuredBody(.selection, case.body); incoming_structured_blocks.appendAssumeCapacity(.{ .src_label = self.current_block_label, .next_block = next_block, @@ -6306,12 +6292,12 @@ const NavGen = struct { try self.func.body.emitBranch(self.spv.gpa, merge_label.?); }, .unstructured => { - try self.genBody(case_body); + try self.genBody(case.body); }, } } - const else_body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra_index..][0..switch_br.data.else_body_len]); + const else_body = it.elseBody(); try self.beginSpvBlock(default); if (else_body.len != 0) { switch (self.control_flow) { diff --git a/src/print_air.zig b/src/print_air.zig index b7ae53c5cc..8e6e21801c 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -297,8 +297,8 @@ const Writer = struct { .union_init => try w.writeUnionInit(s, inst), .br => try w.writeBr(s, inst), .cond_br => try w.writeCondBr(s, inst), - .@"try" => try w.writeTry(s, inst), - .try_ptr => try w.writeTryPtr(s, inst), + .@"try", .try_cold => try w.writeTry(s, inst), + .try_ptr, .try_ptr_cold => try w.writeTryPtr(s, inst), .switch_br => try w.writeSwitchBr(s, inst), .cmpxchg_weak, .cmpxchg_strong => try w.writeCmpxchg(s, inst), .fence => try w.writeFence(s, inst), @@ -825,41 +825,40 @@ const Writer = struct { } fn writeSwitchBr(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { - const pl_op = w.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const switch_br = w.air.extraData(Air.SwitchBr, pl_op.payload); + const switch_br = w.air.unwrapSwitch(inst); + const liveness = if (w.liveness) |liveness| - liveness.getSwitchBr(w.gpa, inst, switch_br.data.cases_len + 1) catch + liveness.getSwitchBr(w.gpa, inst, switch_br.cases_len + 1) catch @panic("out of memory") else blk: { - const slice = w.gpa.alloc([]const Air.Inst.Index, switch_br.data.cases_len + 1) catch + const slice = w.gpa.alloc([]const Air.Inst.Index, switch_br.cases_len + 1) catch @panic("out of memory"); @memset(slice, &.{}); break :blk Liveness.SwitchBrTable{ .deaths = slice }; }; defer w.gpa.free(liveness.deaths); - var extra_index: usize = switch_br.end; - var case_i: u32 = 0; - try w.writeOperand(s, inst, 0, pl_op.operand); + try w.writeOperand(s, inst, 0, switch_br.operand); if (w.skip_body) return s.writeAll(", ..."); const old_indent = w.indent; w.indent += 2; - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = w.air.extraData(Air.SwitchBr.Case, extra_index); - const items = @as([]const Air.Inst.Ref, @ptrCast(w.air.extra[case.end..][0..case.data.items_len])); - const case_body: []const Air.Inst.Index = @ptrCast(w.air.extra[case.end + items.len ..][0..case.data.body_len]); - extra_index = case.end + case.data.items_len + case_body.len; - + var it = switch_br.iterateCases(); + while (it.next()) |case| { try s.writeAll(", ["); - for (items, 0..) |item, item_i| { + for (case.items, 0..) |item, item_i| { if (item_i != 0) try s.writeAll(", "); try w.writeInstRef(s, item, false); } - try s.writeAll("] => {\n"); + try s.writeAll("] "); + const hint = switch_br.getHint(case.idx); + if (hint != .none) { + try s.print(".{s} ", .{@tagName(hint)}); + } + try s.writeAll("=> {\n"); w.indent += 2; - const deaths = liveness.deaths[case_i]; + const deaths = liveness.deaths[case.idx]; if (deaths.len != 0) { try s.writeByteNTimes(' ', w.indent); for (deaths, 0..) |operand, i| { @@ -869,15 +868,20 @@ const Writer = struct { try s.writeAll("\n"); } - try w.writeBody(s, case_body); + try w.writeBody(s, case.body); w.indent -= 2; try s.writeByteNTimes(' ', w.indent); try s.writeAll("}"); } - const else_body: []const Air.Inst.Index = @ptrCast(w.air.extra[extra_index..][0..switch_br.data.else_body_len]); + const else_body = it.elseBody(); if (else_body.len != 0) { - try s.writeAll(", else => {\n"); + try s.writeAll(", else "); + const hint = switch_br.getElseHint(); + if (hint != .none) { + try s.print(".{s} ", .{@tagName(hint)}); + } + try s.writeAll("=> {\n"); w.indent += 2; const deaths = liveness.deaths[liveness.deaths.len - 1]; diff --git a/src/print_zir.zig b/src/print_zir.zig index 71e051af5a..80f1500d0e 100644 --- a/src/print_zir.zig +++ b/src/print_zir.zig @@ -564,7 +564,6 @@ const Writer = struct { .fence, .set_float_mode, .set_align_stack, - .set_cold, .wasm_memory_size, .int_from_error, .error_from_int, @@ -573,6 +572,7 @@ const Writer = struct { .work_item_id, .work_group_size, .work_group_id, + .branch_hint, => { const inst_data = self.code.extraData(Zir.Inst.UnNode, extended.operand).data; try self.writeInstRef(stream, inst_data.operand);