From 1059b57898ec929e9d6ebd8c35660acffe0bac99 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 6 Apr 2023 05:26:20 +0100 Subject: [PATCH] Liveness: defer deaths of externally-scoped instructions in loop bodies --- src/Liveness.zig | 620 +++++++++++++++++++++++++++- src/arch/aarch64/CodeGen.zig | 8 + src/arch/arm/CodeGen.zig | 8 + src/arch/sparc64/CodeGen.zig | 8 + src/arch/wasm/CodeGen.zig | 6 + src/arch/x86_64/CodeGen.zig | 7 + src/codegen/c.zig | 87 ++-- src/print_air.zig | 31 +- test/behavior/const_slice_child.zig | 1 - test/behavior/for.zig | 2 - test/behavior/slice.zig | 1 - 11 files changed, 713 insertions(+), 66 deletions(-) diff --git a/src/Liveness.zig b/src/Liveness.zig index 1d57b80097..6412792037 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -25,6 +25,7 @@ tomb_bits: []usize, /// array. The meaning of the data depends on the AIR tag. /// * `cond_br` - points to a `CondBr` in `extra` at this index. /// * `switch_br` - points to a `SwitchBr` in `extra` at this index. +/// * `loop` - points to a `Loop` in `extra` at this index. /// * `asm`, `call`, `aggregate_init` - the value is a set of bits which are the extra tomb /// bits of operands. /// The main tomb bits are still used and the extra ones are starting with the lsb of the @@ -51,6 +52,11 @@ pub const SwitchBr = struct { else_death_count: u32, }; +/// Trailing is the set of instructions whose lifetimes end at the end of the loop body. +pub const Loop = struct { + death_count: u32, +}; + pub fn analyze(gpa: Allocator, air: Air) Allocator.Error!Liveness { const tracy = trace(@src()); defer tracy.end(); @@ -76,6 +82,11 @@ pub fn analyze(gpa: Allocator, air: Air) Allocator.Error!Liveness { const main_body = air.getMainBody(); try a.table.ensureTotalCapacity(gpa, @intCast(u32, main_body.len)); try analyzeWithContext(&a, null, main_body); + { + var to_remove: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}; + defer to_remove.deinit(gpa); + try removeDeaths(&a, &to_remove, main_body); + } return Liveness{ .tomb_bits = a.tomb_bits, .special = a.special, @@ -650,6 +661,18 @@ pub fn getSwitchBr(l: Liveness, gpa: Allocator, inst: Air.Inst.Index, cases_len: }; } +pub const LoopSlice = struct { + deaths: []const Air.Inst.Index, +}; + +pub fn getLoop(l: Liveness, inst: Air.Inst.Index) LoopSlice { + const index: usize = l.special.get(inst) orelse return .{ + .deaths = &.{}, + }; + const death_count = l.extra[index]; + return .{ .deaths = l.extra[index + 1 ..][0..death_count] }; +} + pub fn deinit(l: *Liveness, gpa: Allocator) void { gpa.free(l.tomb_bits); gpa.free(l.extra); @@ -1138,7 +1161,39 @@ fn analyzeInst( .loop => { const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); const body = a.air.extra[extra.end..][0..extra.data.body_len]; - try analyzeWithContext(a, new_set, body); + + var body_table: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}; + defer body_table.deinit(gpa); + + // Instructions outside the loop body cannot die within the loop, since further loop + // iterations may occur. Track deaths from the loop body - we'll remove all of these + // retroactively, and add them to our extra data. + + try analyzeWithContext(a, &body_table, body); + + if (new_set) |ns| { + try ns.ensureUnusedCapacity(gpa, body_table.count()); + var it = body_table.keyIterator(); + while (it.next()) |key| { + _ = ns.putAssumeCapacity(key.*, {}); + } + } + + try a.extra.ensureUnusedCapacity(gpa, std.meta.fields(Loop).len + body_table.count()); + const extra_index = a.addExtraAssumeCapacity(Loop{ + .death_count = body_table.count(), + }); + { + var it = body_table.keyIterator(); + while (it.next()) |key| { + a.extra.appendAssumeCapacity(key.*); + } + } + try a.special.put(gpa, inst, extra_index); + + // We'll remove invalid deaths in a separate pass after main liveness analysis. See + // removeDeaths for more details. + return; // Loop has no operands and it is always unreferenced. }, .@"try" => { @@ -1412,3 +1467,566 @@ const ExtraTombs = struct { et.big_tomb_bits_extra.deinit(et.analysis.gpa); } }; + +/// Remove any deaths invalidated by the deaths from an enclosing `loop`. Reshuffling deaths stored +/// in `extra` causes it to become non-dense, but that's fine - we won't remove too much data. +/// Making it dense would be a lot more work - it'd require recomputing every index in `special`. +fn removeDeaths( + a: *Analysis, + to_remove: *std.AutoHashMapUnmanaged(Air.Inst.Index, void), + body: []const Air.Inst.Index, +) error{OutOfMemory}!void { + for (body) |inst| { + try removeInstDeaths(a, to_remove, inst); + } +} + +fn removeInstDeaths( + a: *Analysis, + to_remove: *std.AutoHashMapUnmanaged(Air.Inst.Index, void), + inst: Air.Inst.Index, +) !void { + const inst_tags = a.air.instructions.items(.tag); + const inst_datas = a.air.instructions.items(.data); + + switch (inst_tags[inst]) { + .add, + .add_optimized, + .addwrap, + .addwrap_optimized, + .add_sat, + .sub, + .sub_optimized, + .subwrap, + .subwrap_optimized, + .sub_sat, + .mul, + .mul_optimized, + .mulwrap, + .mulwrap_optimized, + .mul_sat, + .div_float, + .div_float_optimized, + .div_trunc, + .div_trunc_optimized, + .div_floor, + .div_floor_optimized, + .div_exact, + .div_exact_optimized, + .rem, + .rem_optimized, + .mod, + .mod_optimized, + .bit_and, + .bit_or, + .xor, + .cmp_lt, + .cmp_lt_optimized, + .cmp_lte, + .cmp_lte_optimized, + .cmp_eq, + .cmp_eq_optimized, + .cmp_gte, + .cmp_gte_optimized, + .cmp_gt, + .cmp_gt_optimized, + .cmp_neq, + .cmp_neq_optimized, + .bool_and, + .bool_or, + .store, + .array_elem_val, + .slice_elem_val, + .ptr_elem_val, + .shl, + .shl_exact, + .shl_sat, + .shr, + .shr_exact, + .atomic_store_unordered, + .atomic_store_monotonic, + .atomic_store_release, + .atomic_store_seq_cst, + .set_union_tag, + .min, + .max, + => { + const o = inst_datas[inst].bin_op; + removeOperandDeaths(a, to_remove, inst, .{ o.lhs, o.rhs, .none }); + }, + + .vector_store_elem => { + const o = inst_datas[inst].vector_store_elem; + const extra = a.air.extraData(Air.Bin, o.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ o.vector_ptr, extra.lhs, extra.rhs }); + }, + + .arg, + .alloc, + .ret_ptr, + .constant, + .const_ty, + .trap, + .breakpoint, + .dbg_stmt, + .dbg_inline_begin, + .dbg_inline_end, + .dbg_block_begin, + .dbg_block_end, + .unreach, + .fence, + .ret_addr, + .frame_addr, + .wasm_memory_size, + .err_return_trace, + .save_err_return_trace_index, + .c_va_start, + .work_item_id, + .work_group_size, + .work_group_id, + => {}, + + .not, + .bitcast, + .load, + .fpext, + .fptrunc, + .intcast, + .trunc, + .optional_payload, + .optional_payload_ptr, + .optional_payload_ptr_set, + .errunion_payload_ptr_set, + .wrap_optional, + .unwrap_errunion_payload, + .unwrap_errunion_err, + .unwrap_errunion_payload_ptr, + .unwrap_errunion_err_ptr, + .wrap_errunion_payload, + .wrap_errunion_err, + .slice_ptr, + .slice_len, + .ptr_slice_len_ptr, + .ptr_slice_ptr_ptr, + .struct_field_ptr_index_0, + .struct_field_ptr_index_1, + .struct_field_ptr_index_2, + .struct_field_ptr_index_3, + .array_to_slice, + .float_to_int, + .float_to_int_optimized, + .int_to_float, + .get_union_tag, + .clz, + .ctz, + .popcount, + .byte_swap, + .bit_reverse, + .splat, + .error_set_has_value, + .addrspace_cast, + .c_va_arg, + .c_va_copy, + => { + const o = inst_datas[inst].ty_op; + removeOperandDeaths(a, to_remove, inst, .{ o.operand, .none, .none }); + }, + + .is_null, + .is_non_null, + .is_null_ptr, + .is_non_null_ptr, + .is_err, + .is_non_err, + .is_err_ptr, + .is_non_err_ptr, + .ptrtoint, + .bool_to_int, + .ret, + .ret_load, + .is_named_enum_value, + .tag_name, + .error_name, + .sqrt, + .sin, + .cos, + .tan, + .exp, + .exp2, + .log, + .log2, + .log10, + .fabs, + .floor, + .ceil, + .round, + .trunc_float, + .neg, + .neg_optimized, + .cmp_lt_errors_len, + .set_err_return_trace, + .c_va_end, + => { + const operand = inst_datas[inst].un_op; + removeOperandDeaths(a, to_remove, inst, .{ operand, .none, .none }); + }, + + .add_with_overflow, + .sub_with_overflow, + .mul_with_overflow, + .shl_with_overflow, + .ptr_add, + .ptr_sub, + .ptr_elem_ptr, + .slice_elem_ptr, + .slice, + => { + const ty_pl = inst_datas[inst].ty_pl; + const extra = a.air.extraData(Air.Bin, ty_pl.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.lhs, extra.rhs, .none }); + }, + + .dbg_var_ptr, + .dbg_var_val, + => { + const operand = inst_datas[inst].pl_op.operand; + removeOperandDeaths(a, to_remove, inst, .{ operand, .none, .none }); + }, + + .prefetch => { + const prefetch = inst_datas[inst].prefetch; + removeOperandDeaths(a, to_remove, inst, .{ prefetch.ptr, .none, .none }); + }, + + .call, .call_always_tail, .call_never_tail, .call_never_inline => { + const inst_data = inst_datas[inst].pl_op; + const callee = inst_data.operand; + const extra = a.air.extraData(Air.Call, inst_data.payload); + const args = @ptrCast([]const Air.Inst.Ref, a.air.extra[extra.end..][0..extra.data.args_len]); + + var death_remover = BigTombDeathRemover.init(a, to_remove, inst); + death_remover.feed(callee); + for (args) |operand| { + death_remover.feed(operand); + } + death_remover.finish(); + }, + .select => { + const pl_op = inst_datas[inst].pl_op; + const extra = a.air.extraData(Air.Bin, pl_op.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, extra.lhs, extra.rhs }); + }, + .shuffle => { + const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.a, extra.b, .none }); + }, + .reduce, .reduce_optimized => { + const reduce = inst_datas[inst].reduce; + removeOperandDeaths(a, to_remove, inst, .{ reduce.operand, .none, .none }); + }, + .cmp_vector, .cmp_vector_optimized => { + const extra = a.air.extraData(Air.VectorCmp, inst_datas[inst].ty_pl.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.lhs, extra.rhs, .none }); + }, + .aggregate_init => { + const ty_pl = inst_datas[inst].ty_pl; + const aggregate_ty = a.air.getRefType(ty_pl.ty); + const len = @intCast(usize, aggregate_ty.arrayLen()); + const elements = @ptrCast([]const Air.Inst.Ref, a.air.extra[ty_pl.payload..][0..len]); + + var death_remover = BigTombDeathRemover.init(a, to_remove, inst); + for (elements) |elem| { + death_remover.feed(elem); + } + death_remover.finish(); + }, + .union_init => { + const extra = a.air.extraData(Air.UnionInit, inst_datas[inst].ty_pl.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.init, .none, .none }); + }, + .struct_field_ptr, .struct_field_val => { + const extra = a.air.extraData(Air.StructField, inst_datas[inst].ty_pl.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.struct_operand, .none, .none }); + }, + .field_parent_ptr => { + const extra = a.air.extraData(Air.FieldParentPtr, inst_datas[inst].ty_pl.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.field_ptr, .none, .none }); + }, + .cmpxchg_strong, .cmpxchg_weak => { + const extra = a.air.extraData(Air.Cmpxchg, inst_datas[inst].ty_pl.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.ptr, extra.expected_value, extra.new_value }); + }, + .mul_add => { + const pl_op = inst_datas[inst].pl_op; + const extra = a.air.extraData(Air.Bin, pl_op.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ extra.lhs, extra.rhs, pl_op.operand }); + }, + .atomic_load => { + const ptr = inst_datas[inst].atomic_load.ptr; + removeOperandDeaths(a, to_remove, inst, .{ ptr, .none, .none }); + }, + .atomic_rmw => { + const pl_op = inst_datas[inst].pl_op; + const extra = a.air.extraData(Air.AtomicRmw, pl_op.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, extra.operand, .none }); + }, + .memset, + .memcpy, + => { + const pl_op = inst_datas[inst].pl_op; + const extra = a.air.extraData(Air.Bin, pl_op.payload).data; + removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, extra.lhs, extra.rhs }); + }, + + .br => { + const br = inst_datas[inst].br; + removeOperandDeaths(a, to_remove, inst, .{ br.operand, .none, .none }); + }, + .assembly => { + const extra = a.air.extraData(Air.Asm, inst_datas[inst].ty_pl.payload); + var extra_i: usize = extra.end; + const outputs = @ptrCast([]const Air.Inst.Ref, a.air.extra[extra_i..][0..extra.data.outputs_len]); + extra_i += outputs.len; + const inputs = @ptrCast([]const Air.Inst.Ref, a.air.extra[extra_i..][0..extra.data.inputs_len]); + extra_i += inputs.len; + + var death_remover = BigTombDeathRemover.init(a, to_remove, inst); + for (outputs) |output| { + if (output != .none) { + death_remover.feed(output); + } + } + for (inputs) |input| { + death_remover.feed(input); + } + death_remover.finish(); + }, + .block => { + const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); + const body = a.air.extra[extra.end..][0..extra.data.body_len]; + try removeDeaths(a, to_remove, body); + }, + .loop => { + const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); + const body = a.air.extra[extra.end..][0..extra.data.body_len]; + + const liveness_extra_idx = a.special.get(inst) orelse { + try removeDeaths(a, to_remove, body); + return; + }; + + const death_count = a.extra.items[liveness_extra_idx]; + var deaths = a.extra.items[liveness_extra_idx + 1 ..][0..death_count]; + + // Remove any deaths in `to_remove` from this loop's deaths + deaths.len = removeExtraDeaths(to_remove, deaths); + a.extra.items[liveness_extra_idx] = @intCast(u32, deaths.len); + + // Temporarily add any deaths of ours to `to_remove` + try to_remove.ensureUnusedCapacity(a.gpa, @intCast(u32, deaths.len)); + for (deaths) |d| { + to_remove.putAssumeCapacity(d, {}); + } + try removeDeaths(a, to_remove, body); + for (deaths) |d| { + _ = to_remove.remove(d); + } + }, + .@"try" => { + const pl_op = inst_datas[inst].pl_op; + const extra = a.air.extraData(Air.Try, pl_op.payload); + const body = a.air.extra[extra.end..][0..extra.data.body_len]; + try removeDeaths(a, to_remove, body); + removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, .none, .none }); + }, + .try_ptr => { + const extra = a.air.extraData(Air.TryPtr, inst_datas[inst].ty_pl.payload); + const body = a.air.extra[extra.end..][0..extra.data.body_len]; + try removeDeaths(a, to_remove, body); + removeOperandDeaths(a, to_remove, inst, .{ extra.data.ptr, .none, .none }); + }, + .cond_br => { + const inst_data = inst_datas[inst].pl_op; + const condition = inst_data.operand; + const extra = a.air.extraData(Air.CondBr, inst_data.payload); + const then_body = a.air.extra[extra.end..][0..extra.data.then_body_len]; + const else_body = a.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]; + + if (a.special.get(inst)) |liveness_extra_idx| { + const then_death_count = a.extra.items[liveness_extra_idx + 0]; + const else_death_count = a.extra.items[liveness_extra_idx + 1]; + var then_deaths = a.extra.items[liveness_extra_idx + 2 ..][0..then_death_count]; + var else_deaths = a.extra.items[liveness_extra_idx + 2 + then_death_count ..][0..else_death_count]; + + const new_then_death_count = removeExtraDeaths(to_remove, then_deaths); + const new_else_death_count = removeExtraDeaths(to_remove, else_deaths); + + a.extra.items[liveness_extra_idx + 0] = new_then_death_count; + a.extra.items[liveness_extra_idx + 1] = new_else_death_count; + + if (new_then_death_count < then_death_count) { + // `else` deaths need to be moved earlier in `extra` + const src = a.extra.items[liveness_extra_idx + 2 + then_death_count ..]; + const dest = a.extra.items[liveness_extra_idx + 2 + new_then_death_count ..]; + std.mem.copy(u32, dest, src[0..new_else_death_count]); + } + } + + try removeDeaths(a, to_remove, then_body); + try removeDeaths(a, to_remove, else_body); + + removeOperandDeaths(a, to_remove, inst, .{ condition, .none, .none }); + }, + .switch_br => { + const pl_op = inst_datas[inst].pl_op; + const condition = pl_op.operand; + const switch_br = a.air.extraData(Air.SwitchBr, pl_op.payload); + + var air_extra_index: usize = switch_br.end; + for (0..switch_br.data.cases_len) |_| { + const case = a.air.extraData(Air.SwitchBr.Case, air_extra_index); + const case_body = a.air.extra[case.end + case.data.items_len ..][0..case.data.body_len]; + air_extra_index = case.end + case.data.items_len + case_body.len; + try removeDeaths(a, to_remove, case_body); + } + { // else + const else_body = a.air.extra[air_extra_index..][0..switch_br.data.else_body_len]; + try removeDeaths(a, to_remove, else_body); + } + + if (a.special.get(inst)) |liveness_extra_idx| { + const else_death_count = a.extra.items[liveness_extra_idx]; + var read_idx = liveness_extra_idx + 1; + var write_idx = read_idx; // write_idx <= read_idx always + for (0..switch_br.data.cases_len) |_| { + const case_death_count = a.extra.items[read_idx]; + const case_deaths = a.extra.items[read_idx + 1 ..][0..case_death_count]; + const new_death_count = removeExtraDeaths(to_remove, case_deaths); + a.extra.items[write_idx] = new_death_count; + if (write_idx < read_idx) { + std.mem.copy(u32, a.extra.items[write_idx + 1 ..], a.extra.items[read_idx + 1 ..][0..new_death_count]); + } + read_idx += 1 + case_death_count; + write_idx += 1 + new_death_count; + } + const else_deaths = a.extra.items[read_idx..][0..else_death_count]; + const new_else_death_count = removeExtraDeaths(to_remove, else_deaths); + a.extra.items[liveness_extra_idx] = new_else_death_count; + if (write_idx < read_idx) { + std.mem.copy(u32, a.extra.items[write_idx..], a.extra.items[read_idx..][0..new_else_death_count]); + } + } + + removeOperandDeaths(a, to_remove, inst, .{ condition, .none, .none }); + }, + .wasm_memory_grow => { + const pl_op = inst_datas[inst].pl_op; + removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, .none, .none }); + }, + } +} + +fn removeOperandDeaths( + a: *Analysis, + to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), + inst: Air.Inst.Index, + operands: [bpi - 1]Air.Inst.Ref, +) void { + const usize_index = (inst * bpi) / @bitSizeOf(usize); + + const cur_tomb = @truncate(Bpi, a.tomb_bits[usize_index] >> + @intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi)); + + var toggle_bits: Bpi = 0; + + for (operands, 0..) |op_ref, i| { + const mask = @as(Bpi, 1) << @intCast(OperandInt, i); + const op_int = @enumToInt(op_ref); + if (op_int < Air.Inst.Ref.typed_value_map.len) continue; + const operand: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len); + if ((cur_tomb & mask) != 0 and to_remove.contains(operand)) { + log.debug("remove death of %{} in %{}", .{ operand, inst }); + toggle_bits ^= mask; + } + } + + a.tomb_bits[usize_index] ^= @as(usize, toggle_bits) << + @intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi); +} + +fn removeExtraDeaths( + to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), + deaths: []Air.Inst.Index, +) u32 { + var new_len = @intCast(u32, deaths.len); + var i: usize = 0; + while (i < new_len) { + if (to_remove.contains(deaths[i])) { + log.debug("remove extra death of %{}", .{deaths[i]}); + deaths[i] = deaths[new_len - 1]; + new_len -= 1; + } else { + i += 1; + } + } + return new_len; +} + +const BigTombDeathRemover = struct { + a: *Analysis, + to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), + inst: Air.Inst.Index, + + operands: [bpi - 1]Air.Inst.Ref = .{.none} ** (bpi - 1), + next_oper: OperandInt = 0, + + bit_index: u32 = 0, + // Initialized once we finish the small tomb operands: see `feed` + extra_start: u32 = undefined, + extra_offset: u32 = 0, + + fn init(a: *Analysis, to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), inst: Air.Inst.Index) BigTombDeathRemover { + return .{ + .a = a, + .to_remove = to_remove, + .inst = inst, + }; + } + + fn feed(dr: *BigTombDeathRemover, operand: Air.Inst.Ref) void { + if (dr.next_oper < bpi - 1) { + dr.operands[dr.next_oper] = operand; + dr.next_oper += 1; + if (dr.next_oper == bpi - 1) { + removeOperandDeaths(dr.a, dr.to_remove, dr.inst, dr.operands); + if (dr.a.special.get(dr.inst)) |idx| dr.extra_start = idx; + } + return; + } + + defer dr.bit_index += 1; + + const op_int = @enumToInt(operand); + if (op_int < Air.Inst.Ref.typed_value_map.len) return; + + const op_inst: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len); + + while (dr.bit_index - dr.extra_offset * 31 >= 31) { + dr.extra_offset += 1; + } + const dies = @truncate(u1, dr.a.extra.items[dr.extra_start + dr.extra_offset] >> + @intCast(u5, dr.bit_index - dr.extra_offset * 31)) != 0; + + if (dies and dr.to_remove.contains(op_inst)) { + log.debug("remove big death of %{}", .{op_inst}); + dr.a.extra.items[dr.extra_start + dr.extra_offset] ^= + (@as(u32, 1) << @intCast(u5, dr.bit_index - dr.extra_offset * 31)); + } + } + + fn finish(dr: *BigTombDeathRemover) void { + if (dr.next_oper < bpi) { + removeOperandDeaths(dr.a, dr.to_remove, dr.inst, dr.operands); + } + } +}; diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 1e07e7e719..3a70144407 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -5000,9 +5000,17 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end..][0..loop.data.body_len]; + const liveness_loop = self.liveness.getLoop(inst); const start_index = @intCast(u32, self.mir_instructions.len); + try self.genBody(body); try self.jump(start_index); + + try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); + for (liveness_loop.deaths) |operand| { + self.processDeath(operand); + } + return self.finishAirBookkeeping(); } diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 1cf0e2981e..ed94b895ed 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -4923,9 +4923,17 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end..][0..loop.data.body_len]; + const liveness_loop = self.liveness.getLoop(inst); const start_index = @intCast(Mir.Inst.Index, self.mir_instructions.len); + try self.genBody(body); try self.jump(start_index); + + try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); + for (liveness_loop.deaths) |operand| { + self.processDeath(operand); + } + return self.finishAirBookkeeping(); } diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index a1b1be3a76..3816b63ab6 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -1750,9 +1750,17 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end .. loop.end + loop.data.body_len]; + const liveness_loop = self.liveness.getLoop(inst); const start = @intCast(u32, self.mir_instructions.len); + try self.genBody(body); try self.jump(start); + + try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); + for (liveness_loop.deaths) |operand| { + self.processDeath(operand); + } + return self.finishAirBookkeeping(); } diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index fc5d13e5a4..da66ab29f4 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -3041,6 +3041,7 @@ fn airLoop(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const loop = func.air.extraData(Air.Block, ty_pl.payload); const body = func.air.extra[loop.end..][0..loop.data.body_len]; + const liveness_loop = func.liveness.getLoop(inst); // result type of loop is always 'noreturn', meaning we can always // emit the wasm type 'block_empty'. @@ -3051,6 +3052,11 @@ fn airLoop(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.addLabel(.br, 0); try func.endBlock(); + try func.currentBranch().values.ensureUnusedCapacity(func.gpa, @intCast(u32, liveness_loop.deaths.len)); + for (liveness_loop.deaths) |death| { + func.processDeath(Air.indexToRef(death)); + } + func.finishAir(inst, .none, &.{}); } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 8a3e0852a2..cff63fa6e1 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6185,6 +6185,7 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end..][0..loop.data.body_len]; const jmp_target = @intCast(u32, self.mir_instructions.len); + const liveness_loop = self.liveness.getLoop(inst); { try self.branch_stack.append(.{}); @@ -6208,6 +6209,12 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { try self.canonicaliseBranches(true, &dummy_branch, &branch, true, false); _ = try self.asmJmpReloc(jmp_target); + + try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); + for (liveness_loop.deaths) |operand| { + self.processDeath(operand); + } + return self.finishAirBookkeeping(); } diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 6577089806..5818655d0a 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -77,9 +77,8 @@ pub const LazyFnMap = std.AutoArrayHashMapUnmanaged(LazyFnKey, LazyFnValue); const LoopDepth = u16; const Local = struct { cty_idx: CType.Index, - /// How many loops the last definition was nested in. - loop_depth: LoopDepth, alignas: CType.AlignAs, + is_in_clone: bool, pub fn getType(local: Local) LocalType { return .{ .cty_idx = local.cty_idx, .alignas = local.alignas }; @@ -90,7 +89,6 @@ const LocalIndex = u16; const LocalType = struct { cty_idx: CType.Index, alignas: CType.AlignAs }; const LocalsList = std.AutoArrayHashMapUnmanaged(LocalIndex, void); const LocalsMap = std.AutoArrayHashMapUnmanaged(LocalType, LocalsList); -const LocalsStack = std.ArrayListUnmanaged(LocalsMap); const ValueRenderLocation = enum { FunctionArgument, @@ -279,16 +277,16 @@ pub const Function = struct { /// Which locals are available for reuse, based on Type. /// Only locals in the last stack entry are available for reuse, /// other entries will become available on loop exit. - free_locals_stack: LocalsStack = .{}, - free_locals_clone_depth: LoopDepth = 0, + free_locals_map: LocalsMap = .{}, + is_in_clone: bool = false, /// Locals which will not be freed by Liveness. This is used after a - /// Function body is lowered in order to make `free_locals_stack` have + /// Function body is lowered in order to make `free_locals_map` have /// 100% of the locals within so that it can be used to render the block /// of variable declarations at the top of a function, sorted descending /// by type alignment. /// The value is whether the alloc is static or not. allocs: std.AutoArrayHashMapUnmanaged(LocalIndex, bool) = .{}, - /// Needed for memory used by the keys of free_locals_stack entries. + /// Needed for memory used by the keys of free_locals_map entries. arena: std.heap.ArenaAllocator, fn resolveInst(f: *Function, inst: Air.Inst.Ref) !CValue { @@ -323,18 +321,14 @@ pub const Function = struct { }; } - fn getFreeLocals(f: *Function) *LocalsMap { - return &f.free_locals_stack.items[f.free_locals_stack.items.len - 1]; - } - /// Skips the reuse logic. fn allocLocalValue(f: *Function, ty: Type, alignment: u32) !CValue { const gpa = f.object.dg.gpa; const target = f.object.dg.module.getTarget(); try f.locals.append(gpa, .{ .cty_idx = try f.typeToIndex(ty, .complete), - .loop_depth = @intCast(LoopDepth, f.free_locals_stack.items.len - 1), .alignas = CType.AlignAs.init(alignment, ty.abiAlignment(target)), + .is_in_clone = f.is_in_clone, }); return .{ .new_local = @intCast(LocalIndex, f.locals.items.len - 1) }; } @@ -348,13 +342,11 @@ pub const Function = struct { /// Only allocates the local; does not print anything. fn allocAlignedLocal(f: *Function, ty: Type, _: CQualifiers, alignment: u32) !CValue { const target = f.object.dg.module.getTarget(); - if (f.getFreeLocals().getPtr(.{ + if (f.free_locals_map.getPtr(.{ .cty_idx = try f.typeToIndex(ty, .complete), .alignas = CType.AlignAs.init(alignment, ty.abiAlignment(target)), })) |locals_list| { if (locals_list.popOrNull()) |local_entry| { - const local = &f.locals.items[local_entry.key]; - local.loop_depth = @intCast(LoopDepth, f.free_locals_stack.items.len - 1); return .{ .new_local = local_entry.key }; } } @@ -485,10 +477,7 @@ pub const Function = struct { const gpa = f.object.dg.gpa; f.allocs.deinit(gpa); f.locals.deinit(gpa); - for (f.free_locals_stack.items) |*free_locals| { - deinitFreeLocalsMap(gpa, free_locals); - } - f.free_locals_stack.deinit(gpa); + deinitFreeLocalsMap(gpa, &f.free_locals_map); f.blocks.deinit(gpa); f.value_map.deinit(); f.lazy_fns.deinit(gpa); @@ -2592,8 +2581,7 @@ pub fn genFunc(f: *Function) !void { o.code_header.appendSliceAssumeCapacity("{\n "); const empty_header_len = o.code_header.items.len; - f.free_locals_stack.clearRetainingCapacity(); - try f.free_locals_stack.append(gpa, .{}); + f.free_locals_map.clearRetainingCapacity(); const main_body = f.air.getMainBody(); try genBody(f, main_body); @@ -2605,7 +2593,7 @@ pub fn genFunc(f: *Function) !void { // Liveness analysis, however, locals from alloc instructions will be // missing. These are added now to complete the map. Then we can sort by // alignment, descending. - const free_locals = f.getFreeLocals(); + const free_locals = &f.free_locals_map; for (f.allocs.keys(), f.allocs.values()) |local_index, value| { if (value) continue; // static const local = f.locals.items[local_index]; @@ -4630,30 +4618,16 @@ fn airLoop(f: *Function, inst: Air.Inst.Index) !CValue { const ty_pl = f.air.instructions.items(.data)[inst].ty_pl; const loop = f.air.extraData(Air.Block, ty_pl.payload); const body = f.air.extra[loop.end..][0..loop.data.body_len]; + const liveness_loop = f.liveness.getLoop(inst); const writer = f.object.writer(); - const gpa = f.object.dg.gpa; - try f.free_locals_stack.insert(gpa, f.free_locals_stack.items.len - 1, .{}); - try writer.writeAll("for (;;) "); try genBody(f, body); try writer.writeByte('\n'); - var old_free_locals = f.free_locals_stack.pop(); - defer deinitFreeLocalsMap(gpa, &old_free_locals); - const new_free_locals = f.getFreeLocals(); - var it = new_free_locals.iterator(); - while (it.next()) |entry| { - const gop = try old_free_locals.getOrPut(gpa, entry.key_ptr.*); - if (gop.found_existing) { - try gop.value_ptr.ensureUnusedCapacity(gpa, entry.value_ptr.count()); - for (entry.value_ptr.keys()) |local_index| { - gop.value_ptr.putAssumeCapacityNoClobber(local_index, {}); - } - } else gop.value_ptr.* = entry.value_ptr.move(); + for (liveness_loop.deaths) |operand| { + try die(f, inst, Air.indexToRef(operand)); } - deinitFreeLocalsMap(gpa, new_free_locals); - new_free_locals.* = old_free_locals.move(); return .none; } @@ -4673,7 +4647,7 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue { const gpa = f.object.dg.gpa; var cloned_map = try f.value_map.clone(); defer cloned_map.deinit(); - var cloned_frees = try cloneFreeLocalsMap(gpa, f.getFreeLocals()); + var cloned_frees = try cloneFreeLocalsMap(gpa, &f.free_locals_map); defer deinitFreeLocalsMap(gpa, &cloned_frees); // Remember how many locals there were before entering the then branch so @@ -4684,8 +4658,8 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue { // that we can notice and make sure not to use them in the else branch. // Any new allocs must be removed from the free list. const pre_allocs_len = @intCast(LocalIndex, f.allocs.count()); - const pre_clone_depth = f.free_locals_clone_depth; - f.free_locals_clone_depth = @intCast(LoopDepth, f.free_locals_stack.items.len); + const was_in_clone = f.is_in_clone; + f.is_in_clone = true; for (liveness_condbr.then_deaths) |operand| { try die(f, inst, Air.indexToRef(operand)); @@ -4706,10 +4680,10 @@ fn airCondBr(f: *Function, inst: Air.Inst.Index) !CValue { f.value_map.deinit(); f.value_map = cloned_map.move(); - const free_locals = f.getFreeLocals(); + const free_locals = &f.free_locals_map; deinitFreeLocalsMap(gpa, free_locals); free_locals.* = cloned_frees.move(); - f.free_locals_clone_depth = pre_clone_depth; + f.is_in_clone = was_in_clone; for (liveness_condbr.else_deaths) |operand| { try die(f, inst, Air.indexToRef(operand)); } @@ -4781,7 +4755,7 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue { if (case_i != last_case_i) { const old_value_map = f.value_map; f.value_map = try old_value_map.clone(); - var free_locals = f.getFreeLocals(); + var free_locals = &f.free_locals_map; const old_free_locals = free_locals.*; free_locals.* = try cloneFreeLocalsMap(gpa, free_locals); @@ -4793,14 +4767,13 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index) !CValue { // we can notice and make sure not to use them in subsequent branches. // Any new allocs must be removed from the free list. const pre_allocs_len = @intCast(LocalIndex, f.allocs.count()); - const pre_clone_depth = f.free_locals_clone_depth; - f.free_locals_clone_depth = @intCast(LoopDepth, f.free_locals_stack.items.len); + const was_in_clone = f.is_in_clone; + f.is_in_clone = true; { defer { - f.free_locals_clone_depth = pre_clone_depth; + f.is_in_clone = was_in_clone; f.value_map.deinit(); - free_locals = f.getFreeLocals(); deinitFreeLocalsMap(gpa, free_locals); f.value_map = old_value_map; free_locals.* = old_free_locals; @@ -7870,8 +7843,8 @@ fn freeLocal(f: *Function, inst: Air.Inst.Index, local_index: LocalIndex, ref_in const gpa = f.object.dg.gpa; const local = &f.locals.items[local_index]; log.debug("%{d}: freeing t{d} (operand %{d})", .{ inst, local_index, ref_inst }); - if (local.loop_depth < f.free_locals_clone_depth) return; - const gop = try f.free_locals_stack.items[local.loop_depth].getOrPut(gpa, local.getType()); + if (f.is_in_clone != local.is_in_clone) return; + const gop = try f.free_locals_map.getOrPut(gpa, local.getType()); if (!gop.found_existing) gop.value_ptr.* = .{}; if (std.debug.runtime_safety) { // If this trips, an unfreeable allocation was attempted to be freed. @@ -7935,23 +7908,21 @@ fn noticeBranchFrees( pre_allocs_len: LocalIndex, inst: Air.Inst.Index, ) !void { - const free_locals = f.getFreeLocals(); - for (f.locals.items[pre_locals_len..], pre_locals_len..) |*local, local_i| { const local_index = @intCast(LocalIndex, local_i); if (f.allocs.contains(local_index)) { if (std.debug.runtime_safety) { // new allocs are no longer freeable, so make sure they aren't in the free list - if (free_locals.getPtr(local.getType())) |locals_list| { + if (f.free_locals_map.getPtr(local.getType())) |locals_list| { assert(!locals_list.contains(local_index)); } } continue; } - // free more deeply nested locals from other branches at current depth - assert(local.loop_depth >= f.free_locals_stack.items.len - 1); - local.loop_depth = @intCast(LoopDepth, f.free_locals_stack.items.len - 1); + // free cloned locals from other branches at current cloned-ness + std.debug.assert(local.is_in_clone or !f.is_in_clone); + local.is_in_clone = f.is_in_clone; try freeLocal(f, inst, local_index, 0); } @@ -7959,6 +7930,6 @@ fn noticeBranchFrees( const local_index = @intCast(LocalIndex, local_i); const local = &f.locals.items[local_index]; // new allocs are no longer freeable, so remove them from the free list - if (free_locals.getPtr(local.getType())) |locals_list| _ = locals_list.swapRemove(local_index); + if (f.free_locals_map.getPtr(local.getType())) |locals_list| _ = locals_list.swapRemove(local_index); } } diff --git a/src/print_air.zig b/src/print_air.zig index 2493d234eb..2970d43925 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -267,9 +267,9 @@ const Writer = struct { .c_va_copy, => try w.writeTyOp(s, inst), - .block, - .loop, - => try w.writeBlock(s, inst), + .block => try w.writeBlock(s, inst), + + .loop => try w.writeLoop(s, inst), .slice, .slice_elem_ptr, @@ -401,6 +401,31 @@ const Writer = struct { try s.writeAll("}"); } + fn writeLoop(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { + const ty_pl = w.air.instructions.items(.data)[inst].ty_pl; + const extra = w.air.extraData(Air.Block, ty_pl.payload); + const body = w.air.extra[extra.end..][0..extra.data.body_len]; + const liveness_loop = w.liveness.getLoop(inst); + + try w.writeType(s, w.air.getRefType(ty_pl.ty)); + if (w.skip_body) return s.writeAll(", ..."); + try s.writeAll(", {\n"); + const old_indent = w.indent; + w.indent += 2; + try w.writeBody(s, body); + if (liveness_loop.deaths.len != 0) { + try s.writeByteNTimes(' ', w.indent); + for (liveness_loop.deaths, 0..) |operand, i| { + if (i != 0) try s.writeAll(" "); + try s.print("%{d}!", .{operand}); + } + try s.writeAll("\n"); + } + w.indent = old_indent; + try s.writeByteNTimes(' ', w.indent); + try s.writeAll("}"); + } + fn writeAggregateInit(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { const ty_pl = w.air.instructions.items(.data)[inst].ty_pl; const vector_ty = w.air.getRefType(ty_pl.ty); diff --git a/test/behavior/const_slice_child.zig b/test/behavior/const_slice_child.zig index 9ce526562c..35bc007d84 100644 --- a/test/behavior/const_slice_child.zig +++ b/test/behavior/const_slice_child.zig @@ -7,7 +7,6 @@ const expect = testing.expect; var argv: [*]const [*]const u8 = undefined; test "const slice child" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/for.zig b/test/behavior/for.zig index 8ab378c6d4..cdef0c040e 100644 --- a/test/behavior/for.zig +++ b/test/behavior/for.zig @@ -227,7 +227,6 @@ test "else continue outer for" { test "for loop with else branch" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO { var x = [_]u32{ 1, 2 }; @@ -377,7 +376,6 @@ test "raw pointer and slice" { test "raw pointer and counter" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO var buf: [10]u8 = undefined; diff --git a/test/behavior/slice.zig b/test/behavior/slice.zig index a9aa9e50e1..237ddd195b 100644 --- a/test/behavior/slice.zig +++ b/test/behavior/slice.zig @@ -238,7 +238,6 @@ test "C pointer" { test "C pointer slice access" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO var buf: [10]u32 = [1]u32{42} ** 10;