x86_64: rewrite vector element pointer access

This commit is contained in:
Jacob Young 2025-09-11 08:40:17 -04:00 committed by Alex Rønne Petersen
parent 92b0ec989c
commit e647d1a570
No known key found for this signature in database

View File

@ -2291,7 +2291,7 @@ fn genBodyBlock(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
@setEvalBranchQuota(29_400);
@setEvalBranchQuota(29_500);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@ -86774,52 +86774,313 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
const is_non_err = try cg.tempInit(.bool, .{ .eflags = .e });
try is_non_err.finish(inst, &.{un_op}, &ops, cg);
},
.load => fallback: {
.load => {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
const val_ty = ty_op.ty.toType();
const ptr_ty = cg.typeOf(ty_op.operand);
const ptr_info = ptr_ty.ptrInfo(zcu);
if (ptr_info.packed_offset.host_size > 0 and
(ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type))
break :fallback try cg.airLoad(inst);
var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
const res = try ops[0].load(val_ty, .{
.disp = switch (ptr_info.flags.vector_index) {
.none => 0,
.runtime => unreachable,
else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
var res: [1]Temp = undefined;
cg.select(&res, &.{val_ty}, &ops, comptime &.{ .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .none, .none } },
},
}, cg);
try res.finish(inst, &.{ty_op.operand}, &ops, cg);
.extra_temps = .{
.{ .type = .u8, .kind = .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
.{ ._, ._, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .any, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .none, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
} },
}, .{
.src_constraints = .{ .ptr_any_bool_vec_elem, .any, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .none, .none } },
},
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => res[0] = try ops[0].load(val_ty, .{
.disp = switch (cg.typeOf(ty_op.operand).ptrInfo(zcu).flags.vector_index) {
.none => 0,
.runtime => unreachable,
else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
},
}, cg),
else => |e| return e,
};
try res[0].finish(inst, &.{ty_op.operand}, &ops, cg);
},
.ret => try cg.airRet(inst, false),
.ret_safe => try cg.airRet(inst, true),
.ret_load => try cg.airRetLoad(inst),
.store, .store_safe => |air_tag| fallback: {
.store, .store_safe => |air_tag| {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
const ptr_ty = cg.typeOf(bin_op.lhs);
const ptr_info = ptr_ty.ptrInfo(zcu);
const val_ty = cg.typeOf(bin_op.rhs);
if (ptr_info.packed_offset.host_size > 0 and
(ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type))
break :fallback try cg.airStore(inst, switch (air_tag) {
else => unreachable,
.store => false,
.store_safe => true,
});
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
try ops[0].store(&ops[1], .{
.disp = switch (ptr_info.flags.vector_index) {
.none => 0,
.runtime => unreachable,
else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
cg.select(&.{}, &.{}, &ops, comptime &.{ .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } },
},
.safe = switch (air_tag) {
else => unreachable,
.store => false,
.store_safe => true,
.extra_temps = .{
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
}, cg);
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
.{ ._, ._r, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } },
},
.extra_temps = .{
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
.{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
} },
}, .{
.required_features = .{ .cmov, null, null, null },
.src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.extra_temps = .{
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
.{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
.{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
.{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
.{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.extra_temps = .{
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
.{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
.{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
.{ ._, ._r, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
.{ .@"1:", ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } },
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } },
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._s, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
} },
}, .{
.required_features = .{ .cmov, null, null, null },
.src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.extra_temps = .{
.{ .type = .u16, .kind = .{ .rc = .general_purpose } },
.{ .type = .u16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .lea(.src0w), ._, ._ },
.{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
.{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
.{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
.{ ._, ._, .mov, .lea(.src0w), .tmp0w, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
.{ ._, ._r, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
.{ ._, ._mp, .j, .@"0f", ._, ._, ._ },
.{ .@"1:", ._s, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ },
} },
}, .{
.src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } },
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
} },
}, .{
.src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } },
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._s, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
} },
}, .{
.required_features = .{ .cmov, null, null, null },
.src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .leaa(.src0d, .add_vector_index_div_8_down_4), ._, ._ },
.{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
.{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index_rem_32), ._, ._ },
.{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index_rem_32), ._, ._ },
.{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
.{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
.{ ._, ._, .mov, .leaa(.src0d, .add_vector_index_div_8_down_4), .tmp0d, ._, ._ },
} },
}, .{
.src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .@"test", .src1b, .si(1), ._, ._ },
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
.{ ._, ._r, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
.{ ._, ._mp, .j, .@"0f", ._, ._, ._ },
.{ .@"1:", ._s, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => try ops[0].store(&ops[1], .{
.disp = switch (cg.typeOf(bin_op.lhs).ptrInfo(zcu).flags.vector_index) {
.none => 0,
.runtime => unreachable,
else => |vector_index| @intCast(cg.typeOf(bin_op.rhs).abiSize(zcu) * @intFromEnum(vector_index)),
},
.safe = switch (air_tag) {
else => unreachable,
.store => false,
.store_safe => true,
},
}, cg),
else => |e| return e,
};
for (ops) |op| try op.die(cg);
},
.unreach => {},
@ -100863,7 +101124,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .src0d, .ua(.none, .add_src1_rem_32), ._, ._ },
.{ ._, ._, .bt, .src0d, .ua(.none, .add_src1), ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .bool_vec = .dword }, .any, .any },
@ -100884,7 +101145,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.dst_temps = .{ .{ .cc = .c }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .bt, .src0q, .ua(.none, .add_src1_rem_64), ._, ._ },
.{ ._, ._, .bt, .src0q, .ua(.none, .add_src1), ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
@ -174481,114 +174742,6 @@ fn reuseOperandAdvanced(
return true;
}
fn packedLoad(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
const pt = self.pt;
const zcu = pt.zcu;
const ptr_info = ptr_ty.ptrInfo(zcu);
const val_ty: Type = .fromInterned(ptr_info.child);
if (!val_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu));
const val_bit_size: u32 = @intCast(val_ty.bitSize(zcu));
const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
.none => 0,
.runtime => unreachable,
else => |vector_index| @intFromEnum(vector_index) * val_bit_size,
};
if (ptr_bit_off % 8 == 0) {
{
const mat_ptr_mcv: MCValue = switch (ptr_mcv) {
.immediate, .register, .register_offset, .lea_frame => ptr_mcv,
else => .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) },
};
const mat_ptr_lock = switch (mat_ptr_mcv) {
.register => |mat_ptr_reg| self.register_manager.lockReg(mat_ptr_reg),
else => null,
};
defer if (mat_ptr_lock) |lock| self.register_manager.unlockReg(lock);
try self.load(dst_mcv, ptr_ty, mat_ptr_mcv.offset(@intCast(@divExact(ptr_bit_off, 8))));
}
if (val_abi_size * 8 > val_bit_size) {
if (dst_mcv.isRegister()) {
try self.truncateRegister(val_ty, dst_mcv.getReg().?);
} else {
const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref();
try self.genSetReg(tmp_reg, .usize, hi_mcv, .{});
try self.truncateRegister(val_ty, tmp_reg);
try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{});
}
}
return;
}
if (val_abi_size > 8) return self.fail("TODO implement packed load of {f}", .{val_ty.fmt(pt)});
const limb_abi_size: u31 = @min(val_abi_size, 8);
const limb_abi_bits = limb_abi_size * 8;
const val_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
const val_bit_off = ptr_bit_off % limb_abi_bits;
const val_extra_bits = self.regExtraBits(val_ty);
const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
defer self.register_manager.unlockReg(ptr_lock);
const dst_reg = switch (dst_mcv) {
.register => |reg| reg,
else => try self.register_manager.allocReg(null, abi.RegisterClass.gp),
};
const dst_lock = self.register_manager.lockReg(dst_reg);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
const load_abi_size =
if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
if (load_abi_size <= 8) {
const load_reg = registerAlias(dst_reg, load_abi_size);
try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{
.base = .{ .reg = ptr_reg },
.mod = .{ .rm = .{
.size = .fromSize(load_abi_size),
.disp = val_byte_off,
} },
});
try self.spillEflagsIfOccupied();
try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(val_bit_off));
} else {
const tmp_reg =
registerAlias(try self.register_manager.allocReg(null, abi.RegisterClass.gp), val_abi_size);
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
const dst_alias = registerAlias(dst_reg, val_abi_size);
try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{
.base = .{ .reg = ptr_reg },
.mod = .{ .rm = .{
.size = .fromSize(val_abi_size),
.disp = val_byte_off,
} },
});
try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{
.base = .{ .reg = ptr_reg },
.mod = .{ .rm = .{
.size = .fromSize(val_abi_size),
.disp = val_byte_off + limb_abi_size,
} },
});
try self.spillEflagsIfOccupied();
try self.asmRegisterRegisterImmediate(.{ ._rd, .sh }, dst_alias, tmp_reg, .u(val_bit_off));
}
if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg }, .{});
}
fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
const pt = self.pt;
const zcu = pt.zcu;
@ -174636,174 +174789,6 @@ fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerE
}
}
fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
const pt = self.pt;
const zcu = pt.zcu;
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
const elem_ty = self.typeOfIndex(inst);
const result: MCValue = result: {
if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none;
try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
const ptr_ty = self.typeOf(ty_op.operand);
const elem_size = elem_ty.abiSize(zcu);
const elem_rs = self.regSetForType(elem_ty);
const ptr_rs = self.regSetForType(ptr_ty);
const ptr_mcv = try self.resolveInst(ty_op.operand);
const dst_mcv = if (elem_size <= 8 and std.math.isPowerOfTwo(elem_size) and
elem_rs.supersetOf(ptr_rs) and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
// The MCValue that holds the pointer can be re-used as the value.
ptr_mcv
else
try self.allocRegOrMem(inst, true);
const ptr_info = ptr_ty.ptrInfo(zcu);
if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) {
try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
} else {
try self.load(dst_mcv, ptr_ty, ptr_mcv);
}
if (elem_ty.isAbiInt(zcu) and elem_size * 8 > elem_ty.bitSize(zcu)) {
const high_mcv: MCValue = switch (dst_mcv) {
.register => |dst_reg| .{ .register = dst_reg },
.register_pair => |dst_regs| .{ .register = dst_regs[1] },
else => dst_mcv.address().offset(@intCast((elem_size - 1) / 8 * 8)).deref(),
};
const high_reg = if (high_mcv.isRegister())
high_mcv.getReg().?
else
try self.copyToTmpRegister(.usize, high_mcv);
const high_lock = self.register_manager.lockReg(high_reg);
defer if (high_lock) |lock| self.register_manager.unlockReg(lock);
try self.truncateRegister(elem_ty, high_reg);
if (!high_mcv.isRegister()) try self.genCopy(
if (elem_size <= 8) elem_ty else .usize,
high_mcv,
.{ .register = high_reg },
.{},
);
}
break :result dst_mcv;
};
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
const pt = self.pt;
const zcu = pt.zcu;
const ptr_info = ptr_ty.ptrInfo(zcu);
const src_ty: Type = .fromInterned(ptr_info.child);
if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8);
const limb_abi_bits = limb_abi_size * 8;
const limb_ty = try pt.intType(.unsigned, limb_abi_bits);
const src_bit_size = src_ty.bitSize(zcu);
const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
.none => 0,
.runtime => unreachable,
else => |vector_index| @intFromEnum(vector_index) * src_bit_size,
};
const src_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size);
const src_bit_off = ptr_bit_off % limb_abi_bits;
const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
defer self.register_manager.unlockReg(ptr_lock);
const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) {
.register => if (src_bit_size > 64) {
const frame_index = try self.allocFrameIndex(.initSpill(src_ty, self.pt.zcu));
try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{});
break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } };
} else src_mcv,
else => src_mcv,
};
var limb_i: u16 = 0;
while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
const part_bit_off = if (limb_i == 0) src_bit_off else 0;
const part_bit_size =
@min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
const limb_mem: Memory = .{
.base = .{ .reg = ptr_reg },
.mod = .{ .rm = .{
.size = .fromSize(limb_abi_size),
.disp = src_byte_off + limb_i * limb_abi_size,
} },
};
const part_mask = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - part_bit_size)) <<
@intCast(part_bit_off);
const part_mask_not = part_mask ^ (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_bits));
if (limb_abi_size <= 4) {
try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .u(part_mask_not));
} else if (std.math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| {
try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .s(small));
} else {
const part_mask_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, .u(part_mask_not));
try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg);
}
if (src_bit_size <= 64) {
const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
const tmp_mcv = MCValue{ .register = tmp_reg };
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.genSetReg(tmp_reg, limb_ty, mat_src_mcv, .{});
switch (limb_i) {
0 => try self.genShiftBinOpMir(
.{ ._l, .sh },
limb_ty,
tmp_mcv,
.u8,
.{ .immediate = src_bit_off },
),
1 => try self.genShiftBinOpMir(
.{ ._r, .sh },
limb_ty,
tmp_mcv,
.u8,
.{ .immediate = limb_abi_bits - src_bit_off },
),
else => unreachable,
}
try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
try self.asmMemoryRegister(
.{ ._, .@"or" },
limb_mem,
registerAlias(tmp_reg, limb_abi_size),
);
} else if (src_bit_size <= 128 and src_bit_off == 0) {
const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
const tmp_mcv = MCValue{ .register = tmp_reg };
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) {
0 => mat_src_mcv,
else => mat_src_mcv.address().offset(limb_i * limb_abi_size).deref(),
}, .{});
try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask });
try self.asmMemoryRegister(
.{ ._, .@"or" },
limb_mem,
registerAlias(tmp_reg, limb_abi_size),
);
} else return self.fail("TODO: implement packed store of {f}", .{src_ty.fmt(pt)});
}
}
fn store(
self: *CodeGen,
ptr_ty: Type,
@ -174857,35 +174842,6 @@ fn store(
}
}
fn airStore(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
const pt = self.pt;
const zcu = pt.zcu;
const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
result: {
if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result;
try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
const ptr_ty = self.typeOf(bin_op.lhs);
const ptr_info = ptr_ty.ptrInfo(zcu);
const is_packed = ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0;
if (is_packed) try self.spillEflagsIfOccupied();
const src_mcv = try self.resolveInst(bin_op.rhs);
const ptr_mcv = try self.resolveInst(bin_op.lhs);
if (is_packed) {
try self.packedStore(ptr_ty, ptr_mcv, src_mcv);
} else {
try self.store(ptr_ty, ptr_mcv, src_mcv, .{ .safety = safety });
}
}
return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
}
fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
const pt = self.pt;
const zcu = pt.zcu;
@ -192171,6 +192127,8 @@ const Select = struct {
exact_bool_vec: u16,
ptr_any_bool_vec,
ptr_bool_vec: Memory.Size,
ptr_any_bool_vec_elem,
ptr_bool_vec_elem: Memory.Size,
remainder_bool_vec: OfIsSizes,
exact_remainder_bool_vec: struct { of: Memory.Size, is: u16 },
signed_int_vec: Memory.Size,
@ -192273,6 +192231,22 @@ const Select = struct {
.vector_type => |vector_type| vector_type.child == .bool_type and size.bitSize(cg.target) >= vector_type.len,
else => false,
},
.ptr_any_bool_vec_elem => {
const ptr_info = ty.ptrInfo(zcu);
return switch (ptr_info.flags.vector_index) {
.none => false,
.runtime => unreachable,
else => ptr_info.child == .bool_type,
};
},
.ptr_bool_vec_elem => |size| {
const ptr_info = ty.ptrInfo(zcu);
return switch (ptr_info.flags.vector_index) {
.none => false,
.runtime => unreachable,
else => ptr_info.child == .bool_type and size.bitSize(cg.target) >= ptr_info.packed_offset.host_size,
};
},
.remainder_bool_vec => |of_is| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
of_is.is.bitSize(cg.target) >= (ty.vectorLen(zcu) - 1) % of_is.of.bitSize(cg.target) + 1,
.exact_remainder_bool_vec => |of_is| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
@ -193266,7 +193240,7 @@ const Select = struct {
ref: Ref,
scale: Memory.Scale = .@"1",
} = .{ .ref = .none },
unused: u3 = 0,
unused: u2 = 0,
},
imm: i32 = 0,
@ -193279,9 +193253,9 @@ const Select = struct {
lea,
mem,
};
const Adjust = packed struct(u10) {
const Adjust = packed struct(u11) {
sign: enum(u1) { neg, pos },
lhs: enum(u5) {
lhs: enum(u6) {
none,
ptr_size,
ptr_bit_size,
@ -193303,6 +193277,7 @@ const Select = struct {
src0_elem_size,
dst0_elem_size,
src0_elem_size_mul_src1,
vector_index,
src1,
src1_sub_bit_size,
log2_src0_elem_size,
@ -193373,9 +193348,13 @@ const Select = struct {
const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" };
const add_src0_elem_size_mul_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
const sub_src0_elem_size_mul_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" };
const add_vector_index: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .mul, .rhs = .@"1" };
const add_vector_index_rem_32: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .rem_8_mul, .rhs = .@"4" };
const add_vector_index_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .div_8_down, .rhs = .@"4" };
const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" };
const sub_dst0_elem_size: Adjust = .{ .sign = .neg, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" };
const add_src1_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .div_8_down, .rhs = .@"4" };
const add_src1: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .mul, .rhs = .@"1" };
const add_src1_rem_32: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"4" };
const add_src1_rem_64: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"8" };
const add_src1_sub_bit_size: Adjust = .{ .sign = .pos, .lhs = .src1_sub_bit_size, .op = .mul, .rhs = .@"1" };
@ -194258,6 +194237,10 @@ const Select = struct {
.dst0_elem_size => @intCast(Select.Operand.Ref.dst0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)),
.src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
Select.Operand.Ref.src1.valueOf(s).immediate),
.vector_index => switch (op.flags.base.ref.typeOf(s).ptrInfo(s.cg.pt.zcu).flags.vector_index) {
.none, .runtime => unreachable,
else => |vector_index| @intFromEnum(vector_index),
},
.src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate),
.src1_sub_bit_size => @as(SignedImm, @intCast(Select.Operand.Ref.src1.valueOf(s).immediate)) -
@as(SignedImm, @intCast(s.cg.nonBoolScalarBitSize(op.flags.base.ref.typeOf(s)))),