From 8c8dfb35f398407319764f0f8998de34c5247ed6 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Tue, 14 Jan 2025 17:56:25 -0500 Subject: [PATCH] x86_64: fix crashes compiling the compiler and tests --- lib/std/Thread.zig | 4 +- lib/std/Thread/Pool.zig | 6 +- lib/std/crypto/aes/aesni.zig | 2 +- src/arch/x86_64/CodeGen.zig | 720 ++++++++++++++++++---------------- src/arch/x86_64/abi.zig | 8 +- src/main.zig | 8 + src/register_manager.zig | 78 ++-- test/behavior/x86_64/math.zig | 10 + 8 files changed, 446 insertions(+), 390 deletions(-) diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig index 69dbcf3947..9650bf8373 100644 --- a/lib/std/Thread.zig +++ b/lib/std/Thread.zig @@ -372,9 +372,11 @@ pub const SpawnConfig = struct { // https://github.com/ziglang/zig/issues/157 /// Size in bytes of the Thread's stack - stack_size: usize = 16 * 1024 * 1024, + stack_size: usize = default_stack_size, /// The allocator to be used to allocate memory for the to-be-spawned thread allocator: ?std.mem.Allocator = null, + + pub const default_stack_size = 16 * 1024 * 1024; }; pub const SpawnError = error{ diff --git a/lib/std/Thread/Pool.zig b/lib/std/Thread/Pool.zig index 86bac7ce46..874050a35f 100644 --- a/lib/std/Thread/Pool.zig +++ b/lib/std/Thread/Pool.zig @@ -27,6 +27,7 @@ pub const Options = struct { allocator: std.mem.Allocator, n_jobs: ?usize = null, track_ids: bool = false, + stack_size: usize = std.Thread.SpawnConfig.default_stack_size, }; pub fn init(pool: *Pool, options: Options) !void { @@ -54,7 +55,10 @@ pub fn init(pool: *Pool, options: Options) !void { errdefer pool.join(spawned); for (pool.threads) |*thread| { - thread.* = try std.Thread.spawn(.{}, worker, .{pool}); + thread.* = try std.Thread.spawn(.{ + .stack_size = options.stack_size, + .allocator = allocator, + }, worker, .{pool}); spawned += 1; } } diff --git a/lib/std/crypto/aes/aesni.zig b/lib/std/crypto/aes/aesni.zig index 2793ff4184..fbf3e37300 100644 --- a/lib/std/crypto/aes/aesni.zig +++ b/lib/std/crypto/aes/aesni.zig @@ -4,7 +4,7 @@ const mem = std.mem; const debug = std.debug; const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes); -const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); +const has_avx512f = builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); /// A single AES block. pub const Block = struct { diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d291ec7da9..f30ecf34df 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -634,42 +634,14 @@ const InstTracking = struct { } fn reuseFrame(self: *InstTracking) void { - switch (self.long) { - .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, - else => {}, - } - self.short = switch (self.long) { - .none, - .unreach, - .undef, - .immediate, - .memory, - .load_direct, - .lea_direct, - .load_got, - .lea_got, - .load_tlv, - .lea_tlv, - .load_frame, - .lea_frame, - .load_symbol, - .lea_symbol, - => self.long, - .dead, - .eflags, - .register, - .register_pair, - .register_triple, - .register_quadruple, - .register_offset, - .register_overflow, - .register_mask, - .indirect, - .elementwise_regs_then_frame, - .reserved_frame, - .air_ref, - => unreachable, - }; + self.* = .init(switch (self.long) { + .none => switch (self.short) { + .dead => .none, + else => |short| short, + }, + .reserved_frame => |index| .{ .load_frame = .{ .index = index } }, + else => |long| long, + }); } fn trackSpill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { @@ -681,6 +653,15 @@ const InstTracking = struct { fn verifyMaterialize(self: InstTracking, target: InstTracking) void { switch (self.long) { .none, + .load_frame, + .reserved_frame, + => switch (target.long) { + .none, + .load_frame, + .reserved_frame, + => {}, + else => unreachable, + }, .unreach, .undef, .immediate, @@ -695,15 +676,6 @@ const InstTracking = struct { .load_symbol, .lea_symbol, => assert(std.meta.eql(self.long, target.long)), - .load_frame, - .reserved_frame, - => switch (target.long) { - .none, - .load_frame, - .reserved_frame, - => {}, - else => unreachable, - }, .dead, .eflags, .register, @@ -754,10 +726,11 @@ const InstTracking = struct { tracking_log.debug("{} => {} (materialize)", .{ inst, self.* }); } - fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void { + fn resurrect(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index, scope_generation: u32) !void { switch (self.short) { .dead => |die_generation| if (die_generation >= scope_generation) { self.reuseFrame(); + try function.getValue(self.short, inst); tracking_log.debug("{} => {} (resurrect)", .{ inst, self.* }); }, else => {}, @@ -767,6 +740,7 @@ const InstTracking = struct { fn die(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { if (self.short == .dead) return; try function.freeValue(self.short); + if (self.long == .none) self.long = self.short; self.short = .{ .dead = function.scope_generation }; tracking_log.debug("{} => {} (death)", .{ inst, self.* }); } @@ -2359,7 +2333,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(1_600); + @setEvalBranchQuota(1_700); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2520,7 +2494,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ + if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, }, @@ -2625,7 +2599,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ops[1].tracking(cg), }), else => |e| return e, - }; + } else { // hack around Sema OPV bugs + res[0] = ops[0]; + } for (ops) |op| for (res) |r| { if (op.index == r.index) break; } else try op.die(cg); @@ -2637,7 +2613,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ + if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, }, @@ -2757,7 +2733,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ops[1].tracking(cg), }), else => |e| return e, - }; + } else { + // hack around Sema OPV bugs + res[0] = ops[0]; + } for (ops) |op| for (res) |r| { if (op.index == r.index) break; } else try op.die(cg); @@ -2799,79 +2778,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{ - .required_features = .{ .avx2, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, - .patterns = &.{ - .{ .src = .{ .to_ymm, .mem } }, - .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_ymm, .to_ymm } }, - }, - .dst_temps = .{.{ .rc = .sse }}, - .each = .{ .once = &.{ - .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, - .patterns = &.{ - .{ .src = .{ .to_ymm, .mem } }, - .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_ymm, .to_ymm } }, - }, - .dst_temps = .{.{ .rc = .sse }}, - .each = .{ .once = &.{ - .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_xmm, .mem } }, - .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_xmm, .to_xmm } }, - }, - .dst_temps = .{.{ .rc = .sse }}, - .each = .{ .once = &.{ - .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_xmm, .mem } }, - .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_xmm, .to_xmm } }, - }, - .dst_temps = .{.{ .ref = .src0 }}, - .each = .{ .once = &.{ - .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_xmm, .mem } }, - .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_xmm, .to_xmm } }, - }, - .dst_temps = .{.{ .ref = .src0 }}, - .each = .{ .once = &.{ - .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .mmx, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_mm, .mem } }, - .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_mm, .to_mm } }, - }, - .dst_temps = .{.{ .ref = .src0 }}, - .each = .{ .once = &.{ - .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } }, + .src_constraints = .{ .{ .size = .byte }, .{ .size = .byte } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2889,7 +2796,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } }, + .src_constraints = .{ .{ .size = .word }, .{ .size = .word } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2907,7 +2814,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } }, + .src_constraints = .{ .{ .size = .dword }, .{ .size = .dword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2926,7 +2833,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .@"64bit", null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, + .src_constraints = .{ .{ .size = .qword }, .{ .size = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2944,11 +2851,80 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .avx2, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .size = .qword }, .{ .size = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .size = .yword }, .{ .size = .yword } }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .size = .yword }, .{ .size = .yword } }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .multiple_size = .yword }, .{ .multiple_size = .yword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2972,10 +2948,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, - }, + .src_constraints = .{ .{ .multiple_size = .yword }, .{ .multiple_size = .yword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2999,10 +2972,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - }, + .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3026,10 +2996,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - }, + .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3053,10 +3020,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - }, + .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3080,10 +3044,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .mmx, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - }, + .src_constraints = .{ .{ .multiple_size = .qword }, .{ .multiple_size = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3106,10 +3067,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - }, + .src_constraints = .{ .{ .multiple_size = .qword }, .{ .multiple_size = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -6983,7 +6941,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; switch (extra.compareOperator()) { - .eq, .neq => {}, + .eq, .neq => if (cg.typeOf(extra.lhs).scalarType(zcu).isRuntimeFloat()) + break :fallback try cg.airCmpVector(inst), else => break :fallback try cg.airCmpVector(inst), } var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs }); @@ -9763,22 +9722,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(extra.field_index, zcu)), .@"packed" => break :fallback try cg.airStructFieldVal(inst), }; - if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) { - var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); - var res = try ops[0].read(field_ty, .{ .disp = field_off }, cg); - for (ops) |op| if (op.index != res.index) try op.die(cg); - try res.moveTo(inst, cg); - } else { - // hack around Sema OPV bugs - const res = try cg.tempInit(field_ty, .none); - try res.moveTo(inst, cg); - } + var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); + // hack around Sema OPV bugs + var res = if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) + try ops[0].read(field_ty, .{ .disp = field_off }, cg) + else + try cg.tempInit(field_ty, .none); + for (ops) |op| if (op.index != res.index) try op.die(cg); + try res.moveTo(inst, cg); }, .set_union_tag => if (use_old) try cg.airSetUnionTag(inst) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const union_ty = cg.typeOf(bin_op.lhs).childType(zcu); - var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); const union_layout = union_ty.unionGetLayout(zcu); + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); // hack around Sema OPV bugs if (union_layout.tag_size > 0) try ops[0].store(&ops[1], .{ .disp = @intCast(union_layout.tagOffset()), @@ -9834,11 +9791,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .ptr_elem_val => try cg.airPtrElemVal(inst), } else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; + const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu); - cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ + if (res_ty.hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ .dst_constraints = .{.{ .int = .byte }}, .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, @@ -9912,51 +9869,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .dst0q, .leasi(.qword, .src0, .@"8", .src1), ._, ._ }, } }, } }) catch |err| switch (err) { - error.SelectFailed => switch (res_ty.abiSize(zcu)) { - // hack around Sema OPV bugs - 0 => res[0] = try cg.tempInit(res_ty, .none), - else => |elem_size| { - while (true) for (&ops) |*op| { - if (try op.toRegClass(true, .general_purpose, cg)) break; - } else break; - const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); - const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); - if (!std.math.isPowerOfTwo(elem_size)) { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegisterImmediate( - .{ .i_, .mul }, - rhs_reg, - rhs_reg, - .u(elem_size), - ); - try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ - .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, - }); - } else if (elem_size > 8) { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate( - .{ ._l, .sh }, - rhs_reg, - .u(std.math.log2_int(u64, elem_size)), - ); - try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ - .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, - }); - } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + error.SelectFailed => { + const elem_size = res_ty.abiSize(zcu); + while (true) for (&ops) |*op| { + if (try op.toRegClass(true, .general_purpose, cg)) break; + } else break; + const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); + const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); + if (!std.math.isPowerOfTwo(elem_size)) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + rhs_reg, + rhs_reg, + .u(elem_size), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .index = rhs_reg, - .scale = .fromFactor(@intCast(elem_size)), - } }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, }); - res[0] = try ops[0].load(res_ty, .{}, cg); - }, + } else if (elem_size > 8) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._l, .sh }, + rhs_reg, + .u(std.math.log2_int(u64, elem_size)), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + }); + } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ + .size = .qword, + .index = rhs_reg, + .scale = .fromFactor(@intCast(elem_size)), + } }, + }); + res[0] = try ops[0].load(res_ty, .{}, cg); }, else => |e| return e, - }; + } else { + // hack around Sema OPV bugs + res[0] = try cg.tempInit(res_ty, .none); + } for (ops) |op| for (res) |r| { if (op.index == r.index) break; } else try op.die(cg); @@ -10499,7 +10456,7 @@ fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, co if (opts.resurrect) for ( self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len], self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len], - ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation); + ) |inst, *tracking| try tracking.resurrect(self, inst, state.scope_generation); for (deaths) |death| try self.processDeath(death); const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).array.len]RegisterLock; @@ -10879,7 +10836,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_ty = self.typeOfIndex(inst); const result = @as(?MCValue, result: { - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); + const src_abi_size: u31 = @intCast(src_ty.abiSize(zcu)); + const dst_abi_size: u31 = @intCast(dst_ty.abiSize(zcu)); const src_int_info = src_ty.intInfo(zcu); const dst_int_info = dst_ty.intInfo(zcu); @@ -10890,7 +10848,6 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const src_mcv = try self.resolveInst(ty_op.operand); if (dst_ty.isVector(zcu)) { - const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); const max_abi_size = @max(dst_abi_size, src_abi_size); if (max_abi_size > self.vectorSize(.int)) break :result null; const has_avx = self.hasFeature(.avx); @@ -11060,7 +11017,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { else => src_int_info.bits, }; - const dst_mcv = if (dst_int_info.bits <= src_storage_bits and + const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else src_abi_size > 8) and + dst_int_info.bits <= src_storage_bits and std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { @@ -11079,8 +11037,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }; } - const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable; - const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; + const src_limbs_len = std.math.divCeil(u31, src_abi_size, 8) catch unreachable; + const dst_limbs_len = @divExact(dst_abi_size, 8); const high_mcv: MCValue = if (dst_mcv.isBase()) dst_mcv.address().offset((src_limbs_len - 1) * 8).deref() @@ -12067,6 +12025,7 @@ fn genSetFrameTruncatedOverflowCompare( defer if (src_lock) |lock| self.register_manager.unlockReg(lock); const ty = tuple_ty.fieldType(0, zcu); + const ty_size = ty.abiSize(zcu); const int_info = ty.intInfo(zcu); const hi_bits = (int_info.bits - 1) % 64 + 1; @@ -12100,6 +12059,13 @@ fn genSetFrameTruncatedOverflowCompare( try self.asmSetccRegister(.ne, eq_reg.to8()); try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); } + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(tuple_ty.structFieldOffset(1, zcu)), + tuple_ty.fieldType(1, zcu), + if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, + .{}, + ); const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu)); if (hi_limb_off > 0) try self.genSetMem( @@ -12116,13 +12082,20 @@ fn genSetFrameTruncatedOverflowCompare( .{ .register = scratch_reg }, .{}, ); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - tuple_ty.fieldType(1, zcu), - if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, - .{}, - ); + var ext_off: i32 = hi_limb_off + 8; + if (ext_off < ty_size) { + switch (int_info.signedness) { + .signed => try self.asmRegisterImmediate(.{ ._r, .sa }, scratch_reg.to64(), .s(63)), + .unsigned => try self.asmRegisterRegister(.{ ._, .xor }, scratch_reg.to32(), scratch_reg.to32()), + } + while (ext_off < ty_size) : (ext_off += 8) try self.genSetMem( + .{ .frame = frame_index }, + payload_off + ext_off, + limb_ty, + .{ .register = scratch_reg }, + .{}, + ); + } } fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { @@ -13581,9 +13554,12 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._, .bt }, .{ .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = .qword } }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = @intCast(index_imm / 64 * 8), + } }, }, - .u(index_imm), + .u(index_imm % 64), ), else => try self.asmMemoryRegister( .{ ._, .bt }, @@ -13603,8 +13579,11 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .load_frame => switch (index_mcv) { .immediate => |index_imm| try self.asmMemoryImmediate( .{ ._, .bt }, - try array_mat_mcv.mem(self, .{ .size = .qword }), - .u(index_imm), + try array_mat_mcv.mem(self, .{ + .size = .qword, + .disp = @intCast(index_imm / 64 * 8), + }), + .u(index_imm % 64), ), else => try self.asmMemoryRegister( .{ ._, .bt }, @@ -13622,9 +13601,12 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .base = .{ .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), }, - .mod = .{ .rm = .{ .size = .qword } }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = @intCast(index_imm / 64 * 8), + } }, }, - .u(index_imm), + .u(index_imm % 64), ), else => try self.asmMemoryRegister( .{ ._, .bt }, @@ -14451,13 +14433,18 @@ fn genByteSwap( return src_mcv; }, 9...16 => { - switch (src_mcv) { + const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) { + .register => { + const frame_index = try self.allocFrameIndex(.initSpill(src_ty, zcu)); + try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{}); + break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } }; + }, .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64()); return .{ .register_pair = .{ src_regs[1], src_regs[0] } }; - }, - else => {}, - } + } else src_mcv, + else => src_mcv, + }; const dst_regs = try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); @@ -14465,18 +14452,18 @@ fn genByteSwap( defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); for (dst_regs, 0..) |dst_reg, limb_index| { - if (src_mcv.isBase()) { + if (mat_src_mcv.isBase()) { try self.asmRegisterMemory( .{ ._, if (has_movbe) .movbe else .mov }, dst_reg.to64(), - try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), + try mat_src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), ); if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); } else { try self.asmRegisterRegister( .{ ._, .mov }, dst_reg.to64(), - src_mcv.register_pair[limb_index].to64(), + mat_src_mcv.register_pair[limb_index].to64(), ); try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); } @@ -15680,6 +15667,15 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); defer self.register_manager.unlockReg(ptr_lock); + const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) { + .register => if (src_bit_size > 64) { + const frame_index = try self.allocFrameIndex(.initSpill(src_ty, self.pt.zcu)); + try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{}); + break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } }; + } else src_mcv, + else => src_mcv, + }; + var limb_i: u16 = 0; while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) { const part_bit_off = if (limb_i == 0) src_bit_off else 0; @@ -15712,7 +15708,7 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.genSetReg(tmp_reg, limb_ty, src_mcv, .{}); + try self.genSetReg(tmp_reg, limb_ty, mat_src_mcv, .{}); switch (limb_i) { 0 => try self.genShiftBinOpMir( .{ ._l, .sh }, @@ -15743,8 +15739,8 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) defer self.register_manager.unlockReg(tmp_lock); try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) { - 0 => src_mcv, - else => src_mcv.address().offset(limb_i * limb_abi_size).deref(), + 0 => mat_src_mcv, + else => mat_src_mcv.address().offset(limb_i * limb_abi_size).deref(), }, .{}); try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask }); try self.asmMemoryRegister( @@ -17228,7 +17224,7 @@ fn genMulDivBinOp( dst_mcv.address(), lhs_mcv.address(), rhs_mcv.address(), - .{ .immediate = src_info.bits }, + .{ .immediate = 8 * src_abi_size }, }, .{}); return dst_mcv; }, @@ -17246,7 +17242,8 @@ fn genMulDivBinOp( const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx }); defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); - const signedness = ty.intInfo(zcu).signedness; + const int_info = ty.intInfo(zcu); + const signedness = int_info.signedness; switch (tag) { .mul, .mul_wrap, @@ -17279,6 +17276,15 @@ fn genMulDivBinOp( }, }, ty, lhs_mcv, rhs_mcv); + switch (tag) { + .mul, .rem, .div_trunc, .div_exact => {}, + .mul_wrap => if (dst_ty.intInfo(zcu).bits < 8 * dst_abi_size) try self.truncateRegister( + dst_ty, + if (dst_abi_size <= 8) .rax else .rdx, + ), + else => unreachable, + } + if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) { .mul, .mul_wrap, .div_trunc, .div_exact => .rax, .rem => .rdx, @@ -21954,8 +21960,9 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(.usize, ptr_mcv) } }; }; }; - if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { - _ = self.register_manager.lockReg(reg); + if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |tracked_index| { + try self.register_manager.getRegIndex(tracked_index, if (output == .none) inst else null); + _ = self.register_manager.lockRegIndexAssumeUnused(tracked_index); }; if (!std.mem.eql(u8, name, "_")) arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len)); @@ -22881,7 +22888,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128()); - try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128()); + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), tmp_reg.to128()); } return; } else unreachable, @@ -23831,10 +23838,12 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_rc = self.regSetForType(dst_ty); const src_rc = self.regSetForType(src_ty); - const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + const src_lock = if (src_mcv.getReg()) |src_reg| self.register_manager.lockReg(src_reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and + const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else true) and + dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and + dst_ty.abiAlignment(zcu).order(src_ty.abiAlignment(zcu)).compare(.lte) and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) { @@ -27702,7 +27711,7 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { reg else unreachable, - .x87 => if (size_bytes == 16) + .x87 => if (size_bytes >= 10 and size_bytes <= 16) reg else unreachable, @@ -28574,23 +28583,19 @@ const Temp = struct { try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }), ); }, - .register => |val_reg| try ptr.storeReg(val_ty, registerAlias( + .register => |val_reg| try ptr.storeRegs(val_ty, &.{registerAlias( val_reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), + )}, cg), inline .register_pair, .register_triple, .register_quadruple, - => |val_regs| for (val_regs) |val_reg| { - try ptr.storeReg(val_ty, val_reg, cg); - try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg); - while (try ptr.toLea(cg)) {} - }, + => |val_regs| try ptr.storeRegs(val_ty, &val_regs, cg), .register_offset => |val_reg_off| switch (val_reg_off.off) { - 0 => try ptr.storeReg(val_ty, registerAlias( + 0 => try ptr.storeRegs(val_ty, &.{registerAlias( val_reg_off.reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), + )}, cg), else => continue :val_to_gpr, }, .register_overflow => |val_reg_ov| { @@ -28608,7 +28613,7 @@ const Temp = struct { else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }), }); const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu)); - try ptr.storeReg(first_ty, registerAlias(val_reg_ov.reg, first_size), cg); + try ptr.storeRegs(first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg); try ptr.toOffset(first_size, cg); try cg.asmSetccMemory( val_reg_ov.eflags, @@ -28675,6 +28680,15 @@ const Temp = struct { const val_mcv = val.tracking(cg).short; switch (val_mcv) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), + .undef => if (opts.safe) { + var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp)); + var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa }); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + try dst_ptr.memset(&pat, &len, cg); + try dst_ptr.die(cg); + try pat.die(cg); + try len.die(cg); + }, .immediate => |val_imm| { const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31| .u(val_uimm31) @@ -28691,24 +28705,52 @@ const Temp = struct { val_op, ); }, - .register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias( + .eflags => |cc| try cg.asmSetccMemory( + cc, + try dst.tracking(cg).short.mem(cg, .{ + .size = .byte, + .disp = opts.disp, + }), + ), + .register => |val_reg| try dst.writeRegs(opts.disp, val_ty, &.{registerAlias( val_reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), - inline .register_pair, .register_triple, .register_quadruple => |val_regs| { - var disp = opts.disp; - for (val_regs) |val_reg| { - try dst.writeReg(disp, val_ty, val_reg, cg); - disp += @divExact(val_reg.bitSize(), 8); - } - }, + )}, cg), + inline .register_pair, + .register_triple, + .register_quadruple, + => |val_regs| try dst.writeRegs(opts.disp, val_ty, &val_regs, cg), .register_offset => |val_reg_off| switch (val_reg_off.off) { - 0 => try dst.writeReg(opts.disp, val_ty, registerAlias( + 0 => try dst.writeRegs(opts.disp, val_ty, &.{registerAlias( val_reg_off.reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), + )}, cg), else => continue :val_to_gpr, }, + .register_overflow => |val_reg_ov| { + const ip = &cg.pt.zcu.intern_pool; + const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) { + .tuple_type => |tuple_type| { + const tuple_field_types = tuple_type.types.get(ip); + assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type); + break :first_ty tuple_field_types[0]; + }, + .opt_type => |opt_child| { + assert(!val_ty.optionalReprIsPayload(cg.pt.zcu)); + break :first_ty opt_child; + }, + else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }), + }); + const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu)); + try dst.writeRegs(opts.disp, first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg); + try cg.asmSetccMemory( + val_reg_ov.eflags, + try dst.tracking(cg).short.mem(cg, .{ + .size = .byte, + .disp = opts.disp + first_size, + }), + ); + }, .lea_frame, .lea_symbol => continue :val_to_gpr, .memory, .indirect, .load_frame, .load_symbol => { var dst_ptr = @@ -28739,33 +28781,47 @@ const Temp = struct { })); } - fn storeReg(ptr: *Temp, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { - const src_rc = src_reg.class(); - const src_abi_size = src_ty.abiSize(cg.pt.zcu); - const strat = try cg.moveStrategy(src_ty, src_rc, false); - // hack around linker relocation bugs - switch (ptr.tracking(cg).short) { - else => {}, - .lea_symbol => |sym_off| if (src_rc != .general_purpose or sym_off.off != 0) - while (try ptr.toRegClass(false, .general_purpose, cg)) {}, - } - if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { - try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{ - .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), - }), src_reg); - } else { - const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); - const frame_index = try cg.allocFrameIndex(frame_alloc); - const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); - try strat.write(cg, .{ - .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = frame_size } }, - }, src_reg); - var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); - var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); - try ptr.memcpy(&src_ptr, &len, cg); - try src_ptr.die(cg); - try len.die(cg); + fn storeRegs(ptr: *Temp, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void { + var part_disp: u31 = 0; + var deferred_disp: u31 = 0; + var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu)); + for (src_regs) |src_reg| { + const src_rc = src_reg.class(); + const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize()); + const part_size = @divExact(part_bit_size, 8); + if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) { + // hack around linker relocation bugs + switch (ptr.tracking(cg).short) { + else => {}, + .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {}, + } + const strat = try cg.moveStrategy(src_ty, src_rc, false); + try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{ + .size = .fromBitSize(part_bit_size), + .disp = part_disp, + }), registerAlias(src_reg, part_size)); + } else { + const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size); + const frame_index = try cg.allocFrameIndex(.init(.{ + .size = frame_size, + .alignment = .fromNonzeroByteUnits(frame_size), + })); + const strat = try cg.moveStrategy(src_ty, src_rc, true); + try strat.write(cg, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .fromSize(frame_size) } }, + }, registerAlias(src_reg, frame_size)); + try ptr.toOffset(deferred_disp, cg); + deferred_disp = 0; + var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); + var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); + try ptr.memcpy(&src_ptr, &len, cg); + try src_ptr.die(cg); + try len.die(cg); + } + part_disp += part_size; + deferred_disp += part_size; + src_abi_size -= part_size; } } @@ -28777,30 +28833,41 @@ const Temp = struct { })); } - fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { - const src_rc = src_reg.class(); - const src_abi_size = src_ty.abiSize(cg.pt.zcu); - const strat = try cg.moveStrategy(src_ty, src_rc, false); - if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { - try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{ - .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), - .disp = disp, - }), src_reg); - } else { - const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); - const frame_index = try cg.allocFrameIndex(frame_alloc); - const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); - try strat.write(cg, .{ - .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = frame_size } }, - }, src_reg); - var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address()); - var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); - var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); - try dst_ptr.memcpy(&src_ptr, &len, cg); - try dst_ptr.die(cg); - try src_ptr.die(cg); - try len.die(cg); + fn writeRegs(dst: Temp, disp: i32, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void { + var part_disp = disp; + var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu)); + for (src_regs) |src_reg| { + const src_rc = src_reg.class(); + const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize()); + const part_size = @divExact(part_bit_size, 8); + if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) { + const strat = try cg.moveStrategy(src_ty, src_rc, false); + try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{ + .size = .fromBitSize(part_bit_size), + .disp = part_disp, + }), registerAlias(src_reg, part_size)); + } else { + const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size); + const frame_index = try cg.allocFrameIndex(.init(.{ + .size = frame_size, + .alignment = .fromNonzeroByteUnits(frame_size), + })); + const strat = try cg.moveStrategy(src_ty, src_rc, true); + try strat.write(cg, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .fromSize(frame_size) } }, + }, registerAlias(src_reg, frame_size)); + var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address()); + try dst_ptr.toOffset(part_disp, cg); + var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); + var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); + try dst_ptr.memcpy(&src_ptr, &len, cg); + try dst_ptr.die(cg); + try src_ptr.die(cg); + try len.die(cg); + } + part_disp += part_size; + src_abi_size -= part_size; } } @@ -29123,8 +29190,8 @@ const Select = struct { signed_int_vec: Memory.Size, signed_int_or_full_vec: Memory.Size, unsigned_int_vec: Memory.Size, - int_or_vec: Memory.Size, - exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size }, + size: Memory.Size, + multiple_size: Memory.Size, int: Memory.Size, scalar_int: Memory.Size, scalar_signed_int: Memory.Size, @@ -29170,15 +29237,8 @@ const Select = struct { } else false, .unsigned_int_vec => |size| ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu) and if (intInfo(ty.childType(zcu), cg)) |int_info| int_info.signedness == .unsigned else false, - .int_or_vec => |size| if (intInfo(ty, cg)) |int_info| - size.bitSize(cg.target) >= int_info.bits - else - ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu), - .exact_remainder_int_or_vec => |of_is| if (intInfo(ty, cg)) |int_info| - of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 - else - ty.isVector(zcu) and ty.childType(zcu).toIntern() != .bool_type and - of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1, + .size => |size| size.bitSize(cg.target) >= 8 * ty.abiSize(zcu), + .multiple_size => |size| size.bitSize(cg.target) % 8 * ty.abiSize(zcu) == 0, .int => |size| if (intInfo(ty, cg)) |int_info| size.bitSize(cg.target) >= int_info.bits else false, .scalar_int => |size| if (intInfo(ty.scalarType(zcu), cg)) |int_info| size.bitSize(cg.target) >= int_info.bits diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index 750ea99706..0be0833ff0 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -540,8 +540,12 @@ pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Regis } pub fn getCAbiLinkerScratchReg(cc: std.builtin.CallingConvention.Tag) Register { - const int_return_regs = getCAbiIntReturnRegs(cc); - return int_return_regs[int_return_regs.len - 1]; + return switch (cc) { + .auto => zigcc.int_return_regs[zigcc.int_return_regs.len - 1], + .x86_64_sysv => SysV.c_abi_int_return_regs[0], + .x86_64_win => Win64.c_abi_int_return_regs[0], + else => unreachable, + }; } const gp_regs = [_]Register{ diff --git a/src/main.zig b/src/main.zig index b17a753b2b..7d035ab135 100644 --- a/src/main.zig +++ b/src/main.zig @@ -39,6 +39,11 @@ test { _ = Package; } +const thread_stack_size = switch (builtin.zig_backend) { + else => std.Thread.SpawnConfig.default_stack_size, + .stage2_x86_64 => 32 << 20, +}; + pub const std_options: std.Options = .{ .wasiCwd = wasi_cwd, .logFn = log, @@ -3320,6 +3325,7 @@ fn buildOutputType( .allocator = gpa, .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)), .track_ids = true, + .stack_size = thread_stack_size, }); defer thread_pool.deinit(); @@ -5024,6 +5030,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .allocator = gpa, .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)), .track_ids = true, + .stack_size = thread_stack_size, }); defer thread_pool.deinit(); @@ -5460,6 +5467,7 @@ fn jitCmd( .allocator = gpa, .n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)), .track_ids = true, + .stack_size = thread_stack_size, }); defer thread_pool.deinit(); diff --git a/src/register_manager.zig b/src/register_manager.zig index b9d3b6db0b..5621c8f750 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -58,11 +58,6 @@ pub fn RegisterManager( return @alignCast(@fieldParentPtr("register_manager", self)); } - fn excludeRegister(reg: Register, register_class: RegisterBitSet) bool { - const index = indexOfRegIntoTracked(reg) orelse return true; - return !register_class.isSet(index); - } - fn markRegIndexAllocated(self: *Self, tracked_index: TrackedIndex) void { self.allocated_registers.set(tracked_index); } @@ -234,28 +229,20 @@ pub fn RegisterManager( ) ?[count]Register { comptime assert(count > 0 and count <= tracked_registers.len); - var free_and_not_locked_registers = self.free_registers; - free_and_not_locked_registers.setIntersection(register_class); - - var unlocked_registers = self.locked_registers; - unlocked_registers.toggleAll(); - - free_and_not_locked_registers.setIntersection(unlocked_registers); - - if (free_and_not_locked_registers.count() < count) return null; + var free_and_unlocked_registers = self.locked_registers; + free_and_unlocked_registers.toggleAll(); + free_and_unlocked_registers.setIntersection(self.free_registers); + free_and_unlocked_registers.setIntersection(register_class); var regs: [count]Register = undefined; var i: usize = 0; - for (tracked_registers) |reg| { - if (i >= count) break; - if (excludeRegister(reg, register_class)) continue; - if (self.isRegLocked(reg)) continue; - if (!self.isRegFree(reg)) continue; - - regs[i] = reg; + var it = free_and_unlocked_registers.iterator(.{}); + while (it.next()) |reg_index| { + regs[i] = regAtTrackedIndex(@intCast(reg_index)); i += 1; + if (i >= count) break; } - assert(i == count); + if (i < count) return null; for (regs, insts) |reg, inst| { log.debug("tryAllocReg {} for inst {?}", .{ reg, inst }); @@ -290,46 +277,27 @@ pub fn RegisterManager( ) AllocationError![count]Register { comptime assert(count > 0 and count <= tracked_registers.len); - var locked_registers = self.locked_registers; - locked_registers.setIntersection(register_class); - - if (count > register_class.count() - locked_registers.count()) return error.OutOfRegisters; - const result = self.tryAllocRegs(count, insts, register_class) orelse blk: { + var unlocked_registers = self.locked_registers; + unlocked_registers.toggleAll(); + unlocked_registers.setIntersection(register_class); + // We'll take over the first count registers. Spill // the instructions that were previously there to a // stack allocations. var regs: [count]Register = undefined; var i: usize = 0; - for (tracked_registers) |reg| { - if (i >= count) break; - if (excludeRegister(reg, register_class)) break; - if (self.isRegLocked(reg)) continue; - - log.debug("allocReg {} for inst {?}", .{ reg, insts[i] }); - regs[i] = reg; - self.markRegAllocated(reg); - const index = indexOfRegIntoTracked(reg).?; // indexOfReg() on a callee-preserved reg should never return null - if (insts[i]) |inst| { - // Track the register - if (self.isRegFree(reg)) { - self.markRegUsed(reg); - } else { - const spilled_inst = self.registers[index]; - try self.getFunction().spillInstruction(reg, spilled_inst); - } - self.registers[index] = inst; - } else { - // Don't track the register - if (!self.isRegFree(reg)) { - const spilled_inst = self.registers[index]; - try self.getFunction().spillInstruction(reg, spilled_inst); - self.freeReg(reg); - } - } - + var it = unlocked_registers.iterator(.{}); + while (it.next()) |reg_index| { + const tracked_index: TrackedIndex = @intCast(reg_index); + if (!self.isRegIndexFree(tracked_index) and + self.registers[tracked_index].unwrap() == .target) continue; + try self.getRegIndex(tracked_index, insts[i]); + regs[i] = regAtTrackedIndex(tracked_index); i += 1; + if (i >= count) break; } + if (i < count) return error.OutOfRegisters; break :blk regs; }; @@ -351,7 +319,7 @@ pub fn RegisterManager( /// Spills the register if it is currently allocated. If a /// corresponding instruction is passed, will also track this /// register. - fn getRegIndex( + pub fn getRegIndex( self: *Self, tracked_index: TrackedIndex, inst: ?Air.Inst.Index, diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index 5bb257a5f4..6e36473b17 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -742,6 +742,16 @@ fn testBinary(comptime op: anytype) !void { 0xb7935f5c2f3b1ae7a422c0a7c446884294b7d5370bada307d2fe5a4c4284a999, 0x310e6e196ba4f143b8d285ca6addf7f3bb3344224aff221b27607a31e148be08, ); + try testType( + u258, + 0x186d5ddaab8cb8cb04e5b41e36f812e039d008baf49f12894c39e29a07796d800, + 0x2072daba6ffad168826163eb136f6d28ca4360c8e7e5e41e29755e19e4753a4f5, + ); + try testType( + u495, + 0x6eaf4e252b3bf74b75bac59e0b43ca5326bad2a25b3fdb74a67ef132ac5e47d72eebc3316fb2351ee66c50dc5afb92a75cea9b0e35160652c7db39eeb158, + 0x49fbed744a92b549d8c05bb3512c617d24dd824f3f69bdf3923bc326a75674b85f5b828d2566fab9c86f571d12c2a63c9164feb0d191d27905533d09622a, + ); try testType( u512, 0xe5b1fedca3c77db765e517aabd05ffc524a3a8aff1784bbf67c45b894447ede32b65b9940e78173c591e56e078932d465f235aece7ad47b7f229df7ba8f12295,