diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index b791ec5ecc..c5af53b2cf 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2800,8 +2800,10 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { const result = result: { const tag = self.air.instructions.items(.tag)[inst]; const dst_ty = self.air.typeOfIndex(inst); - if (dst_ty.zigTypeTag() == .Float) - break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); + switch (dst_ty.zigTypeTag()) { + .Float, .Vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs), + else => {}, + } const dst_info = dst_ty.intInfo(self.target.*); var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) { @@ -6531,6 +6533,15 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, else => null, }, + 17...32 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null, + else => null, + }, else => null, }, 16 => switch (lhs_ty.vectorLen()) { @@ -6541,6 +6552,21 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub }, + .mul, + .mulwrap, + => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull }, + else => null, + }, + 9...16 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null, + .mul, + .mulwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null, else => null, }, else => null, @@ -6553,6 +6579,26 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub }, + .mul, + .mulwrap, + => if (self.hasFeature(.avx)) + .{ .vp_d, .mull } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .mull } + else + null, + else => null, + }, + 5...8 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null, + .mul, + .mulwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null, else => null, }, else => null, @@ -6567,6 +6613,15 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, else => null, }, + 3...4 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null, + else => null, + }, else => null, }, else => null, diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index c8919d062d..7b029cdb4f 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -264,6 +264,7 @@ pub const Mnemonic = enum { movd, movq, paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw, pand, pandn, por, pxor, + pmulhw, pmullw, psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw, // SSE addps, addss, @@ -317,6 +318,7 @@ pub const Mnemonic = enum { insertps, pextrb, pextrd, pextrq, pinsrb, pinsrd, pinsrq, + pmulld, roundpd, roundps, roundsd, roundss, // AVX vaddpd, vaddps, vaddsd, vaddss, @@ -347,6 +349,7 @@ pub const Mnemonic = enum { vpand, vpandn, vpextrb, vpextrd, vpextrq, vpextrw, vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpmulhw, vpmulld, vpmullw, vpor, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 58eab29958..a18792e6aa 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -453,6 +453,10 @@ pub const Inst = struct { /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, + /// Multiply packed signed integers and store low result + mull, + /// Multiply packed signed integers and store high result + mulh, /// Subtract packed signed integers with signed saturation subs, /// Subtract packed unsigned integers with unsigned saturation diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 820fd715ba..86a79596cd 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1011,6 +1011,10 @@ pub const table = [_]Entry{ .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + .{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 }, + + .{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 }, + .{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 }, .{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, @@ -1087,6 +1091,8 @@ pub const table = [_]Entry{ .{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 }, .{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 }, + .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 }, + .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 }, @@ -1312,6 +1318,12 @@ pub const table = [_]Entry{ .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, + + .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, + + .{ .vpmullw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_128_wig, .avx }, + .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, @@ -1418,9 +1430,9 @@ pub const table = [_]Entry{ .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma }, // AVX2 - .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 }, - .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, - .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 }, + .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, .{ .vpaddb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_256_wig, .avx2 }, .{ .vpaddw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_256_wig, .avx2 }, @@ -1437,6 +1449,12 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx }, + + .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx }, + + .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx }, + .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 87ccdfb567..5d217a5ce0 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -26,7 +26,8 @@ test "implicit cast vector to array - bool" { test "vector wrap operators" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO