mirror of
https://github.com/ziglang/zig.git
synced 2026-01-26 01:05:22 +00:00
x86_64: implement integer vector mul
This commit is contained in:
parent
bd771bec49
commit
f39ff6cc68
@ -2800,8 +2800,10 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const result = result: {
|
||||
const tag = self.air.instructions.items(.tag)[inst];
|
||||
const dst_ty = self.air.typeOfIndex(inst);
|
||||
if (dst_ty.zigTypeTag() == .Float)
|
||||
break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
|
||||
switch (dst_ty.zigTypeTag()) {
|
||||
.Float, .Vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs),
|
||||
else => {},
|
||||
}
|
||||
|
||||
const dst_info = dst_ty.intInfo(self.target.*);
|
||||
var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) {
|
||||
@ -6531,6 +6533,15 @@ fn genBinOp(
|
||||
=> if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
|
||||
else => null,
|
||||
},
|
||||
17...32 => switch (air_tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null,
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null,
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
16 => switch (lhs_ty.vectorLen()) {
|
||||
@ -6541,6 +6552,21 @@ fn genBinOp(
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub },
|
||||
.mul,
|
||||
.mulwrap,
|
||||
=> if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull },
|
||||
else => null,
|
||||
},
|
||||
9...16 => switch (air_tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null,
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null,
|
||||
.mul,
|
||||
.mulwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null,
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
@ -6553,6 +6579,26 @@ fn genBinOp(
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub },
|
||||
.mul,
|
||||
.mulwrap,
|
||||
=> if (self.hasFeature(.avx))
|
||||
.{ .vp_d, .mull }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_d, .mull }
|
||||
else
|
||||
null,
|
||||
else => null,
|
||||
},
|
||||
5...8 => switch (air_tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null,
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null,
|
||||
.mul,
|
||||
.mulwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null,
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
@ -6567,6 +6613,15 @@ fn genBinOp(
|
||||
=> if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
|
||||
else => null,
|
||||
},
|
||||
3...4 => switch (air_tag) {
|
||||
.add,
|
||||
.addwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null,
|
||||
.sub,
|
||||
.subwrap,
|
||||
=> if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null,
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
|
||||
@ -264,6 +264,7 @@ pub const Mnemonic = enum {
|
||||
movd, movq,
|
||||
paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw,
|
||||
pand, pandn, por, pxor,
|
||||
pmulhw, pmullw,
|
||||
psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw,
|
||||
// SSE
|
||||
addps, addss,
|
||||
@ -317,6 +318,7 @@ pub const Mnemonic = enum {
|
||||
insertps,
|
||||
pextrb, pextrd, pextrq,
|
||||
pinsrb, pinsrd, pinsrq,
|
||||
pmulld,
|
||||
roundpd, roundps, roundsd, roundss,
|
||||
// AVX
|
||||
vaddpd, vaddps, vaddsd, vaddss,
|
||||
@ -347,6 +349,7 @@ pub const Mnemonic = enum {
|
||||
vpand, vpandn,
|
||||
vpextrb, vpextrd, vpextrq, vpextrw,
|
||||
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
|
||||
vpmulhw, vpmulld, vpmullw,
|
||||
vpor,
|
||||
vpshufhw, vpshuflw,
|
||||
vpsrld, vpsrlq, vpsrlw,
|
||||
|
||||
@ -453,6 +453,10 @@ pub const Inst = struct {
|
||||
/// Bitwise logical and not of packed single-precision floating-point values
|
||||
/// Bitwise logical and not of packed double-precision floating-point values
|
||||
andn,
|
||||
/// Multiply packed signed integers and store low result
|
||||
mull,
|
||||
/// Multiply packed signed integers and store high result
|
||||
mulh,
|
||||
/// Subtract packed signed integers with signed saturation
|
||||
subs,
|
||||
/// Subtract packed unsigned integers with unsigned saturation
|
||||
|
||||
@ -1011,6 +1011,10 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 },
|
||||
@ -1087,6 +1091,8 @@ pub const table = [_]Entry{
|
||||
.{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
|
||||
.{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
|
||||
|
||||
.{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 },
|
||||
@ -1312,6 +1318,12 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmullw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx },
|
||||
@ -1418,9 +1430,9 @@ pub const table = [_]Entry{
|
||||
.{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma },
|
||||
|
||||
// AVX2
|
||||
.{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 },
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 },
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vpaddb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpaddw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_256_wig, .avx2 },
|
||||
@ -1437,6 +1449,12 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
@ -26,7 +26,8 @@ test "implicit cast vector to array - bool" {
|
||||
|
||||
test "vector wrap operators" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user