mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 04:48:20 +00:00
x86_64: implement binary operations for float vectors
This commit is contained in:
parent
ea957c4cff
commit
057139fda5
@ -1176,6 +1176,21 @@ fn asmRegisterRegisterRegister(
|
||||
});
|
||||
}
|
||||
|
||||
fn asmRegisterRegisterRegisterImmediate(
|
||||
self: *Self,
|
||||
tag: Mir.Inst.Tag,
|
||||
reg1: Register,
|
||||
reg2: Register,
|
||||
reg3: Register,
|
||||
imm: Immediate,
|
||||
) !void {
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = .rrri,
|
||||
.data = .{ .rrri = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3, .i = @intCast(u8, imm.unsigned) } },
|
||||
});
|
||||
}
|
||||
|
||||
fn asmRegisterRegisterImmediate(
|
||||
self: *Self,
|
||||
tag: Mir.Inst.Tag,
|
||||
@ -2310,20 +2325,31 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
|
||||
}),
|
||||
}
|
||||
} else if (src_bits == 64 and dst_bits == 32) {
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
|
||||
.vcvtsd2ss,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.getReg().?.to128(),
|
||||
) else try self.asmRegisterRegisterMemory(
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
.vcvtsd2ss,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.qword),
|
||||
) else if (src_mcv.isRegister())
|
||||
try self.asmRegisterRegister(.cvtsd2ss, dst_reg, src_mcv.getReg().?.to128())
|
||||
else
|
||||
try self.asmRegisterMemory(.cvtsd2ss, dst_reg, src_mcv.mem(.qword));
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.vcvtsd2ss,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
|
||||
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.cvtsd2ss,
|
||||
dst_reg,
|
||||
src_mcv.mem(.qword),
|
||||
) else try self.asmRegisterRegister(
|
||||
.cvtsd2ss,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
|
||||
);
|
||||
} else return self.fail("TODO implement airFptrunc from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
@ -2360,20 +2386,31 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
|
||||
}),
|
||||
}
|
||||
} else if (src_bits == 32 and dst_bits == 64) {
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
|
||||
.vcvtss2sd,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.getReg().?.to128(),
|
||||
) else try self.asmRegisterRegisterMemory(
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
.vcvtss2sd,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.dword),
|
||||
) else if (src_mcv.isRegister())
|
||||
try self.asmRegisterRegister(.cvtss2sd, dst_reg, src_mcv.getReg().?.to128())
|
||||
else
|
||||
try self.asmRegisterMemory(.cvtss2sd, dst_reg, src_mcv.mem(.dword));
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.vcvtss2sd,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
|
||||
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.cvtss2sd,
|
||||
dst_reg,
|
||||
src_mcv.mem(.dword),
|
||||
) else try self.asmRegisterRegister(
|
||||
.cvtss2sd,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
|
||||
);
|
||||
} else return self.fail("TODO implement airFpext from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
@ -4532,7 +4569,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const result: MCValue = result: {
|
||||
const tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) {
|
||||
const mir_tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) {
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
16 => if (self.hasFeature(.f16c)) {
|
||||
const mat_src_reg = if (src_mcv.isRegister())
|
||||
@ -4558,11 +4595,14 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
.Float => switch (ty.childType().floatBits(self.target.*)) {
|
||||
16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
|
||||
1 => {
|
||||
const mat_src_reg = if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv);
|
||||
try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128());
|
||||
try self.asmRegisterRegister(
|
||||
.vcvtph2ps,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegisterRegister(.vsqrtss, dst_reg, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.vcvtps2ph,
|
||||
@ -4574,16 +4614,19 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
},
|
||||
2...8 => {
|
||||
const wide_reg = registerAlias(dst_reg, abi_size * 2);
|
||||
if (src_mcv.isRegister()) try self.asmRegisterRegister(
|
||||
.vcvtph2ps,
|
||||
wide_reg,
|
||||
src_mcv.getReg().?.to128(),
|
||||
) else try self.asmRegisterMemory(
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.vcvtph2ps,
|
||||
wide_reg,
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(
|
||||
@intCast(u32, @divExact(wide_reg.bitSize(), 16)),
|
||||
)),
|
||||
) else try self.asmRegisterRegister(
|
||||
.vcvtph2ps,
|
||||
wide_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegister(.vsqrtps, wide_reg, wide_reg);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
@ -4617,26 +4660,32 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
})) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{
|
||||
ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
switch (tag) {
|
||||
.vsqrtss, .vsqrtsd => if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
|
||||
tag,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
registerAlias(src_mcv.getReg().?, abi_size),
|
||||
) else try self.asmRegisterRegisterMemory(
|
||||
tag,
|
||||
switch (mir_tag) {
|
||||
.vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
mir_tag,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv), abi_size),
|
||||
),
|
||||
else => if (src_mcv.isRegister()) try self.asmRegisterRegister(
|
||||
tag,
|
||||
dst_reg,
|
||||
registerAlias(src_mcv.getReg().?, abi_size),
|
||||
) else try self.asmRegisterMemory(
|
||||
tag,
|
||||
else => if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
mir_tag,
|
||||
dst_reg,
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
|
||||
) else try self.asmRegisterRegister(
|
||||
mir_tag,
|
||||
dst_reg,
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv), abi_size),
|
||||
),
|
||||
}
|
||||
break :result dst_mcv;
|
||||
@ -5800,25 +5849,22 @@ fn genMulDivBinOp(
|
||||
}
|
||||
}
|
||||
|
||||
/// Result is always a register.
|
||||
fn genBinOp(
|
||||
self: *Self,
|
||||
maybe_inst: ?Air.Inst.Index,
|
||||
tag: Air.Inst.Tag,
|
||||
air_tag: Air.Inst.Tag,
|
||||
lhs_air: Air.Inst.Ref,
|
||||
rhs_air: Air.Inst.Ref,
|
||||
) !MCValue {
|
||||
const lhs = try self.resolveInst(lhs_air);
|
||||
const rhs = try self.resolveInst(rhs_air);
|
||||
const lhs_mcv = try self.resolveInst(lhs_air);
|
||||
const rhs_mcv = try self.resolveInst(rhs_air);
|
||||
const lhs_ty = self.air.typeOf(lhs_air);
|
||||
const rhs_ty = self.air.typeOf(rhs_air);
|
||||
if (lhs_ty.zigTypeTag() == .Vector) {
|
||||
return self.fail("TODO implement genBinOp for {}", .{lhs_ty.fmt(self.bin_file.options.module.?)});
|
||||
}
|
||||
const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
|
||||
|
||||
switch (lhs) {
|
||||
switch (lhs_mcv) {
|
||||
.immediate => |imm| switch (imm) {
|
||||
0 => switch (tag) {
|
||||
0 => switch (air_tag) {
|
||||
.sub, .subwrap => return self.genUnOp(maybe_inst, .neg, rhs_air),
|
||||
else => {},
|
||||
},
|
||||
@ -5827,9 +5873,10 @@ fn genBinOp(
|
||||
else => {},
|
||||
}
|
||||
|
||||
const is_commutative = switch (tag) {
|
||||
const is_commutative = switch (air_tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
.mul,
|
||||
.bool_or,
|
||||
.bit_or,
|
||||
.bool_and,
|
||||
@ -5841,48 +5888,42 @@ fn genBinOp(
|
||||
|
||||
else => false,
|
||||
};
|
||||
const dst_mem_ok = switch (tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
.sub,
|
||||
.subwrap,
|
||||
.mul,
|
||||
.div_float,
|
||||
.div_exact,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
=> !lhs_ty.isRuntimeFloat(),
|
||||
|
||||
else => true,
|
||||
const vec_op = switch (lhs_ty.zigTypeTag()) {
|
||||
else => false,
|
||||
.Float, .Vector => true,
|
||||
};
|
||||
|
||||
const lhs_lock: ?RegisterLock = switch (lhs) {
|
||||
const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
|
||||
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
|
||||
else => null,
|
||||
};
|
||||
defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const rhs_lock: ?RegisterLock = switch (rhs) {
|
||||
const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
|
||||
.register => |reg| self.register_manager.lockReg(reg),
|
||||
else => null,
|
||||
};
|
||||
defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
var flipped: bool = false;
|
||||
var flipped = false;
|
||||
var copied_to_dst = true;
|
||||
const dst_mcv: MCValue = dst: {
|
||||
if (maybe_inst) |inst| {
|
||||
if ((dst_mem_ok or lhs.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs)) {
|
||||
break :dst lhs;
|
||||
if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) {
|
||||
break :dst lhs_mcv;
|
||||
}
|
||||
if (is_commutative and (dst_mem_ok or rhs.isRegister()) and
|
||||
self.reuseOperand(inst, rhs_air, 1, rhs))
|
||||
if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and
|
||||
self.reuseOperand(inst, rhs_air, 1, rhs_mcv))
|
||||
{
|
||||
flipped = true;
|
||||
break :dst rhs;
|
||||
break :dst rhs_mcv;
|
||||
}
|
||||
}
|
||||
const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
|
||||
try self.genCopy(lhs_ty, dst_mcv, lhs);
|
||||
if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx))
|
||||
copied_to_dst = false
|
||||
else
|
||||
try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
|
||||
break :dst dst_mcv;
|
||||
};
|
||||
const dst_lock: ?RegisterLock = switch (dst_mcv) {
|
||||
@ -5891,160 +5932,47 @@ fn genBinOp(
|
||||
};
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const src_mcv = if (flipped) lhs else rhs;
|
||||
switch (tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
=> try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
|
||||
else => .add,
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
|
||||
.addss
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
|
||||
.addsd
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
|
||||
if (!vec_op) {
|
||||
switch (air_tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
=> try self.genBinOpMir(.add, lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> try self.genBinOpMir(.sub, lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.ptr_add,
|
||||
.ptr_sub,
|
||||
=> {
|
||||
const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
|
||||
const tmp_mcv = MCValue{ .register = tmp_reg };
|
||||
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
const elem_size = lhs_ty.elemType2().abiSize(self.target.*);
|
||||
try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
|
||||
try self.genBinOpMir(switch (air_tag) {
|
||||
.ptr_add => .add,
|
||||
.ptr_sub => .sub,
|
||||
else => unreachable,
|
||||
}, lhs_ty, dst_mcv, tmp_mcv);
|
||||
},
|
||||
}, lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
|
||||
else => .sub,
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
|
||||
.subss
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
|
||||
.subsd
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
},
|
||||
}, lhs_ty, dst_mcv, src_mcv),
|
||||
.bool_or,
|
||||
.bit_or,
|
||||
=> try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
|
||||
.mulss
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
|
||||
.mulsd
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
},
|
||||
}, lhs_ty, dst_mcv, src_mcv),
|
||||
.bool_and,
|
||||
.bit_and,
|
||||
=> try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.div_float,
|
||||
.div_exact,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
=> {
|
||||
try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
|
||||
.divss
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
|
||||
.divsd
|
||||
else
|
||||
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
},
|
||||
}, lhs_ty, dst_mcv, src_mcv);
|
||||
switch (tag) {
|
||||
.div_float,
|
||||
.div_exact,
|
||||
=> {},
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
=> if (self.hasFeature(.sse4_1)) {
|
||||
const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
|
||||
const dst_alias = registerAlias(dst_mcv.register, abi_size);
|
||||
try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .roundss,
|
||||
64 => .roundsd,
|
||||
else => unreachable,
|
||||
}, dst_alias, dst_alias, Immediate.u(switch (tag) {
|
||||
.div_trunc => 0b1_0_11,
|
||||
.div_floor => 0b1_0_01,
|
||||
else => unreachable,
|
||||
}));
|
||||
} else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => unreachable,
|
||||
}
|
||||
},
|
||||
.xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.ptr_add,
|
||||
.ptr_sub,
|
||||
=> {
|
||||
const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv);
|
||||
const tmp_mcv = MCValue{ .register = tmp_reg };
|
||||
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
const elem_size = lhs_ty.elemType2().abiSize(self.target.*);
|
||||
try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size });
|
||||
try self.genBinOpMir(switch (tag) {
|
||||
.ptr_add => .add,
|
||||
.ptr_sub => .sub,
|
||||
else => unreachable,
|
||||
}, lhs_ty, dst_mcv, tmp_mcv);
|
||||
},
|
||||
|
||||
.bool_or,
|
||||
.bit_or,
|
||||
=> try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.bool_and,
|
||||
.bit_and,
|
||||
=> try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv),
|
||||
|
||||
.min,
|
||||
.max,
|
||||
=> switch (lhs_ty.zigTypeTag()) {
|
||||
.Int => {
|
||||
.min,
|
||||
.max,
|
||||
=> {
|
||||
const mat_src_mcv: MCValue = if (switch (src_mcv) {
|
||||
.immediate,
|
||||
.eflags,
|
||||
@ -6070,12 +5998,12 @@ fn genBinOp(
|
||||
|
||||
const int_info = lhs_ty.intInfo(self.target.*);
|
||||
const cc: Condition = switch (int_info.signedness) {
|
||||
.unsigned => switch (tag) {
|
||||
.unsigned => switch (air_tag) {
|
||||
.min => .a,
|
||||
.max => .b,
|
||||
else => unreachable,
|
||||
},
|
||||
.signed => switch (tag) {
|
||||
.signed => switch (air_tag) {
|
||||
.min => .g,
|
||||
.max => .l,
|
||||
else => unreachable,
|
||||
@ -6134,26 +6062,222 @@ fn genBinOp(
|
||||
}
|
||||
try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg });
|
||||
},
|
||||
.Float => try self.genBinOpMir(switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => switch (tag) {
|
||||
.min => .minss,
|
||||
.max => .maxss,
|
||||
else => unreachable,
|
||||
},
|
||||
64 => switch (tag) {
|
||||
.min => .minsd,
|
||||
.max => .maxsd,
|
||||
else => unreachable,
|
||||
},
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
}, lhs_ty, dst_mcv, src_mcv),
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
},
|
||||
|
||||
else => return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
}
|
||||
return dst_mcv;
|
||||
}
|
||||
|
||||
const mir_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) {
|
||||
else => unreachable,
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .vaddss else .addss,
|
||||
.sub => if (self.hasFeature(.avx)) .vsubss else .subss,
|
||||
.mul => if (self.hasFeature(.avx)) .vmulss else .mulss,
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .vdivss else .divss,
|
||||
.max => if (self.hasFeature(.avx)) .vmaxss else .maxss,
|
||||
.min => if (self.hasFeature(.avx)) .vminss else .minss,
|
||||
else => unreachable,
|
||||
},
|
||||
64 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .vaddsd else .addsd,
|
||||
.sub => if (self.hasFeature(.avx)) .vsubsd else .subsd,
|
||||
.mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd,
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .vdivsd else .divsd,
|
||||
.max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd,
|
||||
.min => if (self.hasFeature(.avx)) .vminsd else .minsd,
|
||||
else => unreachable,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
else => null,
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .vaddss else .addss,
|
||||
.sub => if (self.hasFeature(.avx)) .vsubss else .subss,
|
||||
.mul => if (self.hasFeature(.avx)) .vmulss else .mulss,
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .vdivss else .divss,
|
||||
.max => if (self.hasFeature(.avx)) .vmaxss else .maxss,
|
||||
.min => if (self.hasFeature(.avx)) .vminss else .minss,
|
||||
else => unreachable,
|
||||
},
|
||||
2...4 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .vaddps else .addps,
|
||||
.sub => if (self.hasFeature(.avx)) .vsubps else .subps,
|
||||
.mul => if (self.hasFeature(.avx)) .vmulps else .mulps,
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .vdivps else .divps,
|
||||
.max => if (self.hasFeature(.avx)) .vmaxps else .maxps,
|
||||
.min => if (self.hasFeature(.avx)) .vminps else .minps,
|
||||
else => unreachable,
|
||||
},
|
||||
5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
|
||||
.add => .vaddps,
|
||||
.sub => .vsubps,
|
||||
.mul => .vmulps,
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .vdivps,
|
||||
.max => .vmaxps,
|
||||
.min => .vminps,
|
||||
else => unreachable,
|
||||
} else null,
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .vaddsd else .addsd,
|
||||
.sub => if (self.hasFeature(.avx)) .vsubsd else .subsd,
|
||||
.mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd,
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .vdivsd else .divsd,
|
||||
.max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd,
|
||||
.min => if (self.hasFeature(.avx)) .vminsd else .minsd,
|
||||
else => unreachable,
|
||||
},
|
||||
2 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .vaddpd else .addpd,
|
||||
.sub => if (self.hasFeature(.avx)) .vsubpd else .subpd,
|
||||
.mul => if (self.hasFeature(.avx)) .vmulpd else .mulpd,
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .vdivpd else .divpd,
|
||||
.max => if (self.hasFeature(.avx)) .vmaxpd else .maxpd,
|
||||
.min => if (self.hasFeature(.avx)) .vminpd else .minpd,
|
||||
else => unreachable,
|
||||
},
|
||||
3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
|
||||
.add => .vaddpd,
|
||||
.sub => .vsubpd,
|
||||
.mul => .vmulpd,
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .vdivpd,
|
||||
.max => .vmaxpd,
|
||||
.min => .vminpd,
|
||||
else => unreachable,
|
||||
} else null,
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
},
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
const dst_alias = registerAlias(dst_mcv.getReg().?, abi_size);
|
||||
if (self.hasFeature(.avx)) {
|
||||
const src1_alias =
|
||||
if (copied_to_dst) dst_alias else registerAlias(lhs_mcv.getReg().?, abi_size);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
src1_alias,
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
src1_alias,
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
|
||||
);
|
||||
} else {
|
||||
assert(copied_to_dst);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
|
||||
) else try self.asmRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
|
||||
);
|
||||
}
|
||||
switch (air_tag) {
|
||||
.add, .sub, .mul, .div_float, .div_exact => {},
|
||||
.div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) {
|
||||
const round_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => if (self.hasFeature(.avx)) .vroundss else .roundss,
|
||||
64 => if (self.hasFeature(.avx)) .vroundsd else .roundsd,
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1 => if (self.hasFeature(.avx)) .vroundss else .roundss,
|
||||
2...4 => if (self.hasFeature(.avx)) .vroundps else .roundps,
|
||||
5...8 => if (self.hasFeature(.avx)) .vroundps else null,
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1 => if (self.hasFeature(.avx)) .vroundsd else .roundsd,
|
||||
2 => if (self.hasFeature(.avx)) .vroundpd else .roundpd,
|
||||
3...4 => if (self.hasFeature(.avx)) .vroundpd else null,
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
const round_mode = Immediate.u(switch (air_tag) {
|
||||
.div_trunc => 0b1_0_11,
|
||||
.div_floor => 0b1_0_01,
|
||||
else => unreachable,
|
||||
});
|
||||
switch (round_tag) {
|
||||
.vroundss, .vroundsd => try self.asmRegisterRegisterRegisterImmediate(
|
||||
round_tag,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
round_mode,
|
||||
),
|
||||
else => try self.asmRegisterRegisterImmediate(
|
||||
round_tag,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
round_mode,
|
||||
),
|
||||
}
|
||||
} else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
.max, .min => {}, // TODO: unordered select
|
||||
else => unreachable,
|
||||
}
|
||||
return dst_mcv;
|
||||
@ -6186,20 +6310,11 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
|
||||
.register_overflow,
|
||||
.reserved_frame,
|
||||
=> unreachable,
|
||||
.register => |src_reg| switch (ty.zigTypeTag()) {
|
||||
.Float => {
|
||||
if (!Target.x86.featureSetHas(self.target.cpu.features, .sse))
|
||||
return self.fail("TODO genBinOpMir for {s} {} without sse", .{
|
||||
@tagName(mir_tag), ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128());
|
||||
},
|
||||
else => try self.asmRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(src_reg, abi_size),
|
||||
),
|
||||
},
|
||||
.register => |src_reg| try self.asmRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(src_reg, abi_size),
|
||||
),
|
||||
.immediate => |imm| switch (self.regBitSize(ty)) {
|
||||
8 => try self.asmRegisterImmediate(
|
||||
mir_tag,
|
||||
@ -9646,7 +9761,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
|
||||
lock.* = self.register_manager.lockRegAssumeUnused(reg);
|
||||
}
|
||||
|
||||
const tag = if (@as(
|
||||
const mir_tag = if (@as(
|
||||
?Mir.Inst.Tag,
|
||||
if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 }))
|
||||
switch (ty.zigTypeTag()) {
|
||||
@ -9741,20 +9856,17 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
|
||||
const mop1_reg = registerAlias(mops[0].getReg().?, abi_size);
|
||||
const mop2_reg = registerAlias(mops[1].getReg().?, abi_size);
|
||||
if (mops[2].isRegister())
|
||||
try self.asmRegisterRegisterRegister(
|
||||
tag,
|
||||
mop1_reg,
|
||||
mop2_reg,
|
||||
registerAlias(mops[2].getReg().?, abi_size),
|
||||
)
|
||||
else
|
||||
try self.asmRegisterRegisterMemory(
|
||||
tag,
|
||||
mop1_reg,
|
||||
mop2_reg,
|
||||
mops[2].mem(Memory.PtrSize.fromSize(abi_size)),
|
||||
);
|
||||
if (mops[2].isRegister()) try self.asmRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
mop1_reg,
|
||||
mop2_reg,
|
||||
registerAlias(mops[2].getReg().?, abi_size),
|
||||
) else try self.asmRegisterRegisterMemory(
|
||||
mir_tag,
|
||||
mop1_reg,
|
||||
mop2_reg,
|
||||
mops[2].mem(Memory.PtrSize.fromSize(abi_size)),
|
||||
);
|
||||
return self.finishAir(inst, mops[0], ops);
|
||||
}
|
||||
|
||||
|
||||
@ -262,61 +262,69 @@ pub const Mnemonic = enum {
|
||||
// MMX
|
||||
movd,
|
||||
// SSE
|
||||
addss,
|
||||
addps, addss,
|
||||
andps,
|
||||
andnps,
|
||||
cmpss,
|
||||
cvtsi2ss,
|
||||
divss,
|
||||
maxss, minss,
|
||||
divps, divss,
|
||||
maxps, maxss,
|
||||
minps, minss,
|
||||
movaps, movss, movups,
|
||||
mulss,
|
||||
mulps, mulss,
|
||||
orps,
|
||||
pextrw, pinsrw,
|
||||
sqrtps,
|
||||
sqrtss,
|
||||
subss,
|
||||
sqrtps, sqrtss,
|
||||
subps, subss,
|
||||
ucomiss,
|
||||
xorps,
|
||||
// SSE2
|
||||
addsd,
|
||||
addpd, addsd,
|
||||
andpd,
|
||||
andnpd,
|
||||
//cmpsd,
|
||||
cvtsd2ss, cvtsi2sd, cvtss2sd,
|
||||
divsd,
|
||||
maxsd, minsd,
|
||||
divpd, divsd,
|
||||
maxpd, maxsd,
|
||||
minpd, minsd,
|
||||
movapd,
|
||||
movq, //movd, movsd,
|
||||
movupd,
|
||||
mulsd,
|
||||
mulpd, mulsd,
|
||||
orpd,
|
||||
pshufhw, pshuflw,
|
||||
psrld, psrlq, psrlw,
|
||||
punpckhbw, punpckhdq, punpckhqdq, punpckhwd,
|
||||
punpcklbw, punpckldq, punpcklqdq, punpcklwd,
|
||||
sqrtpd, sqrtsd,
|
||||
subsd,
|
||||
subpd, subsd,
|
||||
ucomisd,
|
||||
xorpd,
|
||||
// SSE3
|
||||
movddup, movshdup, movsldup,
|
||||
// SSE4.1
|
||||
roundsd, roundss,
|
||||
roundpd, roundps, roundsd, roundss,
|
||||
// AVX
|
||||
vaddpd, vaddps, vaddsd, vaddss,
|
||||
vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd,
|
||||
vdivpd, vdivps, vdivsd, vdivss,
|
||||
vmaxpd, vmaxps, vmaxsd, vmaxss,
|
||||
vminpd, vminps, vminsd, vminss,
|
||||
vmovapd, vmovaps,
|
||||
vmovddup,
|
||||
vmovsd,
|
||||
vmovshdup, vmovsldup,
|
||||
vmovss,
|
||||
vmovupd, vmovups,
|
||||
vmulpd, vmulps, vmulsd, vmulss,
|
||||
vpextrw, vpinsrw,
|
||||
vpshufhw, vpshuflw,
|
||||
vpsrld, vpsrlq, vpsrlw,
|
||||
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
|
||||
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
|
||||
vroundpd, vroundps, vroundsd, vroundss,
|
||||
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
|
||||
vsubpd, vsubps, vsubsd, vsubss,
|
||||
// F16C
|
||||
vcvtph2ps, vcvtps2ph,
|
||||
// FMA
|
||||
|
||||
@ -124,27 +124,34 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
||||
.xchg,
|
||||
.xor,
|
||||
|
||||
.addps,
|
||||
.addss,
|
||||
.andnps,
|
||||
.andps,
|
||||
.cmpss,
|
||||
.cvtsi2ss,
|
||||
.divps,
|
||||
.divss,
|
||||
.maxps,
|
||||
.maxss,
|
||||
.minps,
|
||||
.minss,
|
||||
.movaps,
|
||||
.movss,
|
||||
.movups,
|
||||
.mulps,
|
||||
.mulss,
|
||||
.orps,
|
||||
.pextrw,
|
||||
.pinsrw,
|
||||
.sqrtps,
|
||||
.sqrtss,
|
||||
.subps,
|
||||
.subss,
|
||||
.ucomiss,
|
||||
.xorps,
|
||||
|
||||
.addpd,
|
||||
.addsd,
|
||||
.andnpd,
|
||||
.andpd,
|
||||
@ -152,10 +159,14 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
||||
.cvtsd2ss,
|
||||
.cvtsi2sd,
|
||||
.cvtss2sd,
|
||||
.divpd,
|
||||
.divsd,
|
||||
.maxpd,
|
||||
.maxsd,
|
||||
.minpd,
|
||||
.minsd,
|
||||
.movsd,
|
||||
.mulpd,
|
||||
.mulsd,
|
||||
.orpd,
|
||||
.pshufhw,
|
||||
@ -173,6 +184,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
||||
.punpcklwd,
|
||||
.sqrtpd,
|
||||
.sqrtsd,
|
||||
.subpd,
|
||||
.subsd,
|
||||
.ucomisd,
|
||||
.xorpd,
|
||||
@ -181,13 +193,31 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
||||
.movshdup,
|
||||
.movsldup,
|
||||
|
||||
.roundpd,
|
||||
.roundps,
|
||||
.roundsd,
|
||||
.roundss,
|
||||
|
||||
.vaddpd,
|
||||
.vaddps,
|
||||
.vaddsd,
|
||||
.vaddss,
|
||||
.vcvtsd2ss,
|
||||
.vcvtsi2sd,
|
||||
.vcvtsi2ss,
|
||||
.vcvtss2sd,
|
||||
.vdivpd,
|
||||
.vdivps,
|
||||
.vdivsd,
|
||||
.vdivss,
|
||||
.vmaxpd,
|
||||
.vmaxps,
|
||||
.vmaxsd,
|
||||
.vmaxss,
|
||||
.vminpd,
|
||||
.vminps,
|
||||
.vminsd,
|
||||
.vminss,
|
||||
.vmovapd,
|
||||
.vmovaps,
|
||||
.vmovddup,
|
||||
@ -197,6 +227,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
||||
.vmovss,
|
||||
.vmovupd,
|
||||
.vmovups,
|
||||
.vmulpd,
|
||||
.vmulps,
|
||||
.vmulsd,
|
||||
.vmulss,
|
||||
.vpextrw,
|
||||
.vpinsrw,
|
||||
.vpshufhw,
|
||||
@ -212,10 +246,18 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
||||
.vpunpckldq,
|
||||
.vpunpcklqdq,
|
||||
.vpunpcklwd,
|
||||
.vroundpd,
|
||||
.vroundps,
|
||||
.vroundsd,
|
||||
.vroundss,
|
||||
.vsqrtpd,
|
||||
.vsqrtps,
|
||||
.vsqrtsd,
|
||||
.vsqrtss,
|
||||
.vsubpd,
|
||||
.vsubps,
|
||||
.vsubsd,
|
||||
.vsubss,
|
||||
|
||||
.vcvtph2ps,
|
||||
.vcvtps2ph,
|
||||
@ -304,6 +346,7 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
|
||||
.lock_mi_rip_s,
|
||||
=> Immediate.s(@bitCast(i32, i)),
|
||||
|
||||
.rrri,
|
||||
.rri_u,
|
||||
.ri_u,
|
||||
.i_u,
|
||||
@ -429,6 +472,12 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void {
|
||||
.{ .reg = inst.data.rrr.r2 },
|
||||
.{ .reg = inst.data.rrr.r3 },
|
||||
},
|
||||
.rrri => &.{
|
||||
.{ .reg = inst.data.rrri.r1 },
|
||||
.{ .reg = inst.data.rrri.r2 },
|
||||
.{ .reg = inst.data.rrri.r3 },
|
||||
.{ .imm = lower.imm(inst.ops, inst.data.rrri.i) },
|
||||
},
|
||||
.ri_s, .ri_u => &.{
|
||||
.{ .reg = inst.data.ri.r },
|
||||
.{ .imm = lower.imm(inst.ops, inst.data.ri.i) },
|
||||
|
||||
@ -166,7 +166,9 @@ pub const Inst = struct {
|
||||
/// Logical exclusive-or
|
||||
xor,
|
||||
|
||||
/// Add single precision floating point values
|
||||
/// Add packed single-precision floating-point values
|
||||
addps,
|
||||
/// Add scalar single-precision floating-point values
|
||||
addss,
|
||||
/// Bitwise logical and of packed single precision floating-point values
|
||||
andps,
|
||||
@ -176,11 +178,17 @@ pub const Inst = struct {
|
||||
cmpss,
|
||||
/// Convert doubleword integer to scalar single-precision floating-point value
|
||||
cvtsi2ss,
|
||||
/// Divide packed single-precision floating-point values
|
||||
divps,
|
||||
/// Divide scalar single-precision floating-point values
|
||||
divss,
|
||||
/// Return maximum single-precision floating-point value
|
||||
/// Maximum of packed single-precision floating-point values
|
||||
maxps,
|
||||
/// Maximum of scalar single-precision floating-point values
|
||||
maxss,
|
||||
/// Return minimum single-precision floating-point value
|
||||
/// Minimum of packed single-precision floating-point values
|
||||
minps,
|
||||
/// Minimum of scalar single-precision floating-point values
|
||||
minss,
|
||||
/// Move aligned packed single-precision floating-point values
|
||||
movaps,
|
||||
@ -188,6 +196,8 @@ pub const Inst = struct {
|
||||
movss,
|
||||
/// Move unaligned packed single-precision floating-point values
|
||||
movups,
|
||||
/// Multiply packed single-precision floating-point values
|
||||
mulps,
|
||||
/// Multiply scalar single-precision floating-point values
|
||||
mulss,
|
||||
/// Bitwise logical or of packed single precision floating-point values
|
||||
@ -196,18 +206,22 @@ pub const Inst = struct {
|
||||
pextrw,
|
||||
/// Insert word
|
||||
pinsrw,
|
||||
/// Square root of scalar single precision floating-point value
|
||||
/// Square root of packed single-precision floating-point values
|
||||
sqrtps,
|
||||
/// Subtract scalar single-precision floating-point values
|
||||
/// Square root of scalar single-precision floating-point value
|
||||
sqrtss,
|
||||
/// Square root of single precision floating-point values
|
||||
/// Subtract packed single-precision floating-point values
|
||||
subps,
|
||||
/// Subtract scalar single-precision floating-point values
|
||||
subss,
|
||||
/// Unordered compare scalar single-precision floating-point values
|
||||
ucomiss,
|
||||
/// Bitwise logical xor of packed single precision floating-point values
|
||||
xorps,
|
||||
|
||||
/// Add double precision floating point values
|
||||
/// Add packed double-precision floating-point values
|
||||
addpd,
|
||||
/// Add scalar double-precision floating-point values
|
||||
addsd,
|
||||
/// Bitwise logical and not of packed double precision floating-point values
|
||||
andnpd,
|
||||
@ -221,14 +235,22 @@ pub const Inst = struct {
|
||||
cvtsi2sd,
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
|
||||
cvtss2sd,
|
||||
/// Divide packed double-precision floating-point values
|
||||
divpd,
|
||||
/// Divide scalar double-precision floating-point values
|
||||
divsd,
|
||||
/// Return maximum double-precision floating-point value
|
||||
/// Maximum of packed double-precision floating-point values
|
||||
maxpd,
|
||||
/// Maximum of scalar double-precision floating-point values
|
||||
maxsd,
|
||||
/// Return minimum double-precision floating-point value
|
||||
/// Minimum of packed double-precision floating-point values
|
||||
minpd,
|
||||
/// Minimum of scalar double-precision floating-point values
|
||||
minsd,
|
||||
/// Move scalar double-precision floating-point value
|
||||
movsd,
|
||||
/// Multiply packed double-precision floating-point values
|
||||
mulpd,
|
||||
/// Multiply scalar double-precision floating-point values
|
||||
mulsd,
|
||||
/// Bitwise logical or of packed double precision floating-point values
|
||||
@ -263,6 +285,8 @@ pub const Inst = struct {
|
||||
sqrtpd,
|
||||
/// Square root of scalar double precision floating-point value
|
||||
sqrtsd,
|
||||
/// Subtract packed double-precision floating-point values
|
||||
subpd,
|
||||
/// Subtract scalar double-precision floating-point values
|
||||
subsd,
|
||||
/// Unordered compare scalar double-precision floating-point values
|
||||
@ -277,11 +301,23 @@ pub const Inst = struct {
|
||||
/// Replicate single floating-point values
|
||||
movsldup,
|
||||
|
||||
/// Round scalar double-precision floating-point values
|
||||
/// Round packed double-precision floating-point values
|
||||
roundpd,
|
||||
/// Round packed single-precision floating-point values
|
||||
roundps,
|
||||
/// Round scalar double-precision floating-point value
|
||||
roundsd,
|
||||
/// Round scalar single-precision floating-point values
|
||||
/// Round scalar single-precision floating-point value
|
||||
roundss,
|
||||
|
||||
/// Add packed double-precision floating-point values
|
||||
vaddpd,
|
||||
/// Add packed single-precision floating-point values
|
||||
vaddps,
|
||||
/// Add scalar double-precision floating-point values
|
||||
vaddsd,
|
||||
/// Add scalar single-precision floating-point values
|
||||
vaddss,
|
||||
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
|
||||
vcvtsd2ss,
|
||||
/// Convert doubleword integer to scalar double-precision floating-point value
|
||||
@ -290,6 +326,30 @@ pub const Inst = struct {
|
||||
vcvtsi2ss,
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
|
||||
vcvtss2sd,
|
||||
/// Divide packed double-precision floating-point values
|
||||
vdivpd,
|
||||
/// Divide packed single-precision floating-point values
|
||||
vdivps,
|
||||
/// Divide scalar double-precision floating-point values
|
||||
vdivsd,
|
||||
/// Divide scalar single-precision floating-point values
|
||||
vdivss,
|
||||
/// Maximum of packed double-precision floating-point values
|
||||
vmaxpd,
|
||||
/// Maximum of packed single-precision floating-point values
|
||||
vmaxps,
|
||||
/// Maximum of scalar double-precision floating-point values
|
||||
vmaxsd,
|
||||
/// Maximum of scalar single-precision floating-point values
|
||||
vmaxss,
|
||||
/// Minimum of packed double-precision floating-point values
|
||||
vminpd,
|
||||
/// Minimum of packed single-precision floating-point values
|
||||
vminps,
|
||||
/// Minimum of scalar double-precision floating-point values
|
||||
vminsd,
|
||||
/// Minimum of scalar single-precision floating-point values
|
||||
vminss,
|
||||
/// Move aligned packed double-precision floating-point values
|
||||
vmovapd,
|
||||
/// Move aligned packed single-precision floating-point values
|
||||
@ -308,6 +368,14 @@ pub const Inst = struct {
|
||||
vmovupd,
|
||||
/// Move unaligned packed single-precision floating-point values
|
||||
vmovups,
|
||||
/// Multiply packed double-precision floating-point values
|
||||
vmulpd,
|
||||
/// Multiply packed single-precision floating-point values
|
||||
vmulps,
|
||||
/// Multiply scalar double-precision floating-point values
|
||||
vmulsd,
|
||||
/// Multiply scalar single-precision floating-point values
|
||||
vmulss,
|
||||
/// Extract word
|
||||
vpextrw,
|
||||
/// Insert word
|
||||
@ -338,6 +406,14 @@ pub const Inst = struct {
|
||||
vpunpcklqdq,
|
||||
/// Unpack low data
|
||||
vpunpcklwd,
|
||||
/// Round packed double-precision floating-point values
|
||||
vroundpd,
|
||||
/// Round packed single-precision floating-point values
|
||||
vroundps,
|
||||
/// Round scalar double-precision floating-point value
|
||||
vroundsd,
|
||||
/// Round scalar single-precision floating-point value
|
||||
vroundss,
|
||||
/// Square root of packed double-precision floating-point value
|
||||
vsqrtpd,
|
||||
/// Square root of packed single-precision floating-point value
|
||||
@ -346,6 +422,14 @@ pub const Inst = struct {
|
||||
vsqrtsd,
|
||||
/// Square root of scalar single-precision floating-point value
|
||||
vsqrtss,
|
||||
/// Subtract packed double-precision floating-point values
|
||||
vsubpd,
|
||||
/// Subtract packed single-precision floating-point values
|
||||
vsubps,
|
||||
/// Subtract scalar double-precision floating-point values
|
||||
vsubsd,
|
||||
/// Subtract scalar single-precision floating-point values
|
||||
vsubss,
|
||||
|
||||
/// Convert 16-bit floating-point values to single-precision floating-point values
|
||||
vcvtph2ps,
|
||||
@ -442,6 +526,9 @@ pub const Inst = struct {
|
||||
/// Register, register, register operands.
|
||||
/// Uses `rrr` payload.
|
||||
rrr,
|
||||
/// Register, register, register, immediate (byte) operands.
|
||||
/// Uses `rrri` payload.
|
||||
rrri,
|
||||
/// Register, register, immediate (sign-extended) operands.
|
||||
/// Uses `rri` payload.
|
||||
rri_s,
|
||||
@ -625,6 +712,12 @@ pub const Inst = struct {
|
||||
r2: Register,
|
||||
r3: Register,
|
||||
},
|
||||
rrri: struct {
|
||||
r1: Register,
|
||||
r2: Register,
|
||||
r3: Register,
|
||||
i: u8,
|
||||
},
|
||||
rri: struct {
|
||||
r1: Register,
|
||||
r2: Register,
|
||||
|
||||
@ -837,6 +837,8 @@ pub const table = [_]Entry{
|
||||
.{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none },
|
||||
|
||||
// SSE
|
||||
.{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse },
|
||||
|
||||
.{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .none, .sse },
|
||||
|
||||
.{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .none, .sse },
|
||||
@ -848,10 +850,16 @@ pub const table = [_]Entry{
|
||||
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse },
|
||||
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse },
|
||||
|
||||
.{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse },
|
||||
|
||||
.{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse },
|
||||
|
||||
.{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse },
|
||||
|
||||
.{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .none, .sse },
|
||||
|
||||
.{ .minps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .none, .sse },
|
||||
|
||||
.{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .none, .sse },
|
||||
|
||||
.{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse },
|
||||
@ -863,10 +871,14 @@ pub const table = [_]Entry{
|
||||
.{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .none, .sse },
|
||||
.{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .none, .sse },
|
||||
|
||||
.{ .mulps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .none, .sse },
|
||||
|
||||
.{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .none, .sse },
|
||||
|
||||
.{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse },
|
||||
|
||||
.{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
|
||||
|
||||
.{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
|
||||
|
||||
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
|
||||
@ -878,6 +890,8 @@ pub const table = [_]Entry{
|
||||
.{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse },
|
||||
|
||||
// SSE2
|
||||
.{ .addpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .none, .sse2 },
|
||||
@ -893,10 +907,16 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 },
|
||||
|
||||
.{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 },
|
||||
|
||||
.{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 },
|
||||
|
||||
.{ .maxpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .none, .sse2 },
|
||||
|
||||
.{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 },
|
||||
|
||||
.{ .minpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .none, .sse2 },
|
||||
|
||||
.{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .none, .sse2 },
|
||||
|
||||
.{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .none, .sse2 },
|
||||
@ -914,6 +934,8 @@ pub const table = [_]Entry{
|
||||
.{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .none, .sse2 },
|
||||
.{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .mulpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
|
||||
@ -947,6 +969,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .subpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .none, .sse2 },
|
||||
|
||||
.{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 },
|
||||
|
||||
.{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 },
|
||||
@ -966,10 +990,25 @@ pub const table = [_]Entry{
|
||||
// SSE4.1
|
||||
.{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 },
|
||||
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 },
|
||||
|
||||
// AVX
|
||||
.{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vaddps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vaddps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vaddsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
|
||||
@ -980,6 +1019,36 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx },
|
||||
.{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vdivps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .vex_128_wig, .avx },
|
||||
.{ .vdivps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5e }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vdivsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmaxps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmaxps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vmaxss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vminpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_128_wig, .avx },
|
||||
.{ .vminpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vminps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .vex_128_wig, .avx },
|
||||
.{ .vminps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5d }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vminsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vminss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
|
||||
@ -1019,6 +1088,16 @@ pub const table = [_]Entry{
|
||||
.{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmulpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmulpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmulps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmulps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x59 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmulsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
@ -1041,6 +1120,16 @@ pub const table = [_]Entry{
|
||||
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vroundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vroundps, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vroundsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
@ -1051,6 +1140,16 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_128_wig, .avx },
|
||||
.{ .vsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vsubps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .vex_128_wig, .avx },
|
||||
.{ .vsubps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5c }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vsubsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
// F16C
|
||||
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
|
||||
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user