From 403c2d91bed456085eb685a9f89996c4635ce4b9 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 15 May 2023 20:17:06 -0400 Subject: [PATCH] x86_64: fix float min/max behavior --- src/arch/x86_64/CodeGen.zig | 302 +++++++++++++++++++++++++++++- src/arch/x86_64/Encoding.zig | 33 ++-- src/arch/x86_64/Lower.zig | 7 + src/arch/x86_64/Mir.zig | 20 ++ src/arch/x86_64/encoder.zig | 31 +-- src/arch/x86_64/encodings.zig | 34 ++++ test/behavior/maximum_minimum.zig | 6 +- 7 files changed, 393 insertions(+), 40 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2cd5721258..7ea0db516b 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1271,6 +1271,27 @@ fn asmRegisterRegisterRegister( }); } +fn asmRegisterRegisterRegisterRegister( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + reg3: Register, + reg4: Register, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .rrrr, + .data = .{ .rrrr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + .r4 = reg4, + } }, + }); +} + fn asmRegisterRegisterRegisterImmediate( self: *Self, tag: Mir.Inst.FixedTag, @@ -6224,12 +6245,26 @@ fn genBinOp( lhs_air: Air.Inst.Ref, rhs_air: Air.Inst.Ref, ) !MCValue { - const lhs_mcv = try self.resolveInst(lhs_air); - const rhs_mcv = try self.resolveInst(rhs_air); const lhs_ty = self.air.typeOf(lhs_air); const rhs_ty = self.air.typeOf(rhs_air); const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); + const maybe_mask_reg = switch (air_tag) { + else => null, + .max, .min => if (lhs_ty.scalarType().isRuntimeFloat()) registerAlias( + if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: { + try self.register_manager.getReg(.xmm0, null); + break :mask .xmm0; + } else try self.register_manager.allocReg(null, sse), + abi_size, + ) else null, + }; + const mask_lock = + if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null; + defer if (mask_lock) |lock| self.register_manager.unlockReg(lock); + + const lhs_mcv = try self.resolveInst(lhs_air); + const rhs_mcv = try self.resolveInst(rhs_air); switch (lhs_mcv) { .immediate => |imm| switch (imm) { 0 => switch (air_tag) { @@ -6300,7 +6335,16 @@ fn genBinOp( }; defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv; + const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg| + if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and + self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: { + try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv); + break :src .{ .register = mask_reg }; + } + else + unmat_src_mcv; + if (!vec_op) { switch (air_tag) { .add, @@ -7009,18 +7053,26 @@ fn genBinOp( })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), }); + + const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias( + if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?, + abi_size, + ) else null; + const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null; + defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock); + if (self.hasFeature(.avx)) { - const src1_alias = + const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, - src1_alias, + lhs_reg, src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, - src1_alias, + lhs_reg, registerAlias(if (src_mcv.isRegister()) src_mcv.getReg().? else @@ -7041,9 +7093,10 @@ fn genBinOp( try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), ); } + switch (air_tag) { .add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {}, - .div_trunc, .div_floor => try self.genRound( + .div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) try self.genRound( lhs_ty, dst_reg, .{ .register = dst_reg }, @@ -7052,11 +7105,240 @@ fn genBinOp( .div_floor => 0b1_0_01, else => unreachable, }, - ), + ) else return self.fail("TODO implement genBinOp for {s} {} without sse4_1 feature", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), .bit_and, .bit_or, .xor => {}, - .max, .min => {}, // TODO: unordered select + .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) { + const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size); + + try self.asmRegisterRegisterRegisterImmediate( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .cmp }, + 64 => .{ .v_sd, .cmp }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1 => .{ .v_ss, .cmp }, + 2...8 => .{ .v_ps, .cmp }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1 => .{ .v_sd, .cmp }, + 2...4 => .{ .v_pd, .cmp }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + mask_reg, + rhs_copy_reg, + rhs_copy_reg, + Immediate.u(3), // unord + ); + try self.asmRegisterRegisterRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ .v_ps, .blendv }, + 64 => .{ .v_pd, .blendv }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1...8 => .{ .v_ps, .blendv }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1...4 => .{ .v_pd, .blendv }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + dst_reg, + lhs_copy_reg.?, + mask_reg, + ); + } else { + const has_blend = self.hasFeature(.sse4_1); + try self.asmRegisterRegisterImmediate( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ss, .cmp }, + 64 => .{ ._sd, .cmp }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1 => .{ ._ss, .cmp }, + 2...4 => .{ ._ps, .cmp }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1 => .{ ._sd, .cmp }, + 2 => .{ ._pd, .cmp }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + mask_reg, + mask_reg, + Immediate.u(if (has_blend) 3 else 7), // unord, ord + ); + if (has_blend) try self.asmRegisterRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .blendv }, + 64 => .{ ._pd, .blendv }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1...4 => .{ ._ps, .blendv }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1...2 => .{ ._pd, .blendv }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + lhs_copy_reg.?, + mask_reg, + ) else { + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .@"and" }, + 64 => .{ ._pd, .@"and" }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1...4 => .{ ._ps, .@"and" }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1...2 => .{ ._pd, .@"and" }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + mask_reg, + ); + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .andn }, + 64 => .{ ._pd, .andn }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1...4 => .{ ._ps, .andn }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1...2 => .{ ._pd, .andn }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + mask_reg, + lhs_copy_reg.?, + ); + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .@"or" }, + 64 => .{ ._pd, .@"or" }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1...4 => .{ ._ps, .@"or" }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1...2 => .{ ._pd, .@"or" }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + mask_reg, + ); + } + }, else => unreachable, } + return dst_mcv; } @@ -9282,7 +9564,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null, else => null, }, - .Float => switch (ty.floatBits(self.target.*)) { + .Float => switch (ty.scalarType().floatBits(self.target.*)) { 16, 128 => switch (abi_size) { 2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 52d010880e..0aaf12013d 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -178,7 +178,7 @@ pub fn format( try writer.print("+{s} ", .{tag}); }, .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), - .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi, .mvr => try writer.writeAll("/r "), + .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr => try writer.writeAll("/r "), } switch (encoding.data.op_en) { @@ -202,7 +202,8 @@ pub fn format( }; try writer.print("{s} ", .{tag}); }, - .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm, .mvr => {}, + .rvmr => try writer.writeAll("/is4 "), + .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -270,7 +271,7 @@ pub const Mnemonic = enum { addps, addss, andps, andnps, - cmpss, + cmpps, cmpss, cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si, divps, divss, maxps, maxss, @@ -290,7 +291,7 @@ pub const Mnemonic = enum { addpd, addsd, andpd, andnpd, - //cmpsd, + cmppd, //cmpsd, cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd, cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd, cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si, @@ -315,6 +316,7 @@ pub const Mnemonic = enum { // SSE3 movddup, movshdup, movsldup, // SSE4.1 + blendpd, blendps, blendvpd, blendvps, extractps, insertps, pextrb, pextrd, pextrq, @@ -325,7 +327,9 @@ pub const Mnemonic = enum { // AVX vaddpd, vaddps, vaddsd, vaddss, vandnpd, vandnps, vandpd, vandps, + vblendpd, vblendps, vblendvpd, vblendvps, vbroadcastf128, vbroadcastsd, vbroadcastss, + vcmppd, vcmpps, vcmpsd, vcmpss, vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps, vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si, @@ -385,7 +389,7 @@ pub const OpEn = enum { fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, - vmi, rvm, rvmi, mvr, + rm0, vmi, rvm, rvmr, rvmi, mvr, // zig fmt: on }; @@ -407,7 +411,7 @@ pub const Op = enum { moffs, sreg, st, mm, mm_m64, - xmm, xmm_m32, xmm_m64, xmm_m128, + xmm0, xmm, xmm_m32, xmm_m64, xmm_m128, ymm, ymm_m256, // zig fmt: on @@ -436,7 +440,9 @@ pub const Op = enum { .segment => .sreg, .x87 => .st, .mmx => .mm, - .sse => switch (reg.bitSize()) { + .sse => if (reg == .xmm0) + .xmm0 + else switch (reg.bitSize()) { 128 => .xmm, 256 => .ymm, else => unreachable, @@ -494,7 +500,7 @@ pub const Op = enum { .eax, .r32, .rm32, .r32_m16 => unreachable, .rax, .r64, .rm64, .r64_m16 => unreachable, .st, .mm, .mm_m64 => unreachable, - .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, .ymm, .ymm_m256 => unreachable, .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .unity => 1, @@ -516,7 +522,7 @@ pub const Op = enum { .eax, .r32, .rm32, .r32_m8, .r32_m16 => 32, .rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64, .st => 80, - .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, .ymm, .ymm_m256 => 256, }; } @@ -526,7 +532,8 @@ pub const Op = enum { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, - .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable, + .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64 => unreachable, + .st, .mm, .xmm0, .xmm, .ymm => unreachable, .m8, .rm8, .r32_m8 => 8, .m16, .rm16, .r32_m16, .r64_m16 => 16, .m32, .rm32, .xmm_m32 => 32, @@ -558,7 +565,7 @@ pub const Op = enum { .rm8, .rm16, .rm32, .rm64, .r32_m8, .r32_m16, .r64_m16, .st, .mm, .mm_m64, - .xmm, .xmm_m32, .xmm_m64, .xmm_m128, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, .ymm, .ymm_m256, => true, else => false, @@ -612,7 +619,7 @@ pub const Op = enum { .sreg => .segment, .st => .x87, .mm, .mm_m64 => .mmx, - .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, .ymm, .ymm_m256 => .sse, }; } @@ -629,7 +636,7 @@ pub const Op = enum { else => { if (op.isRegister() and target.isRegister()) { return switch (target) { - .cl, .al, .ax, .eax, .rax => op == target, + .cl, .al, .ax, .eax, .rax, .xmm0 => op == target, else => op.class() == target.class() and op.regBitSize() == target.regBitSize(), }; } diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 65d2b64398..d77ddf3050 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -377,6 +377,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .r => inst.data.r.fixes, .rr => inst.data.rr.fixes, .rrr => inst.data.rrr.fixes, + .rrrr => inst.data.rrrr.fixes, .rrri => inst.data.rrri.fixes, .rri_s, .rri_u => inst.data.rri.fixes, .ri_s, .ri_u => inst.data.ri.fixes, @@ -430,6 +431,12 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rrr.r2 }, .{ .reg = inst.data.rrr.r3 }, }, + .rrrr => &.{ + .{ .reg = inst.data.rrrr.r1 }, + .{ .reg = inst.data.rrrr.r2 }, + .{ .reg = inst.data.rrrr.r3 }, + .{ .reg = inst.data.rrrr.r4 }, + }, .rrri => &.{ .{ .reg = inst.data.rrri.r1 }, .{ .reg = inst.data.rrri.r2 }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 4483de858e..9f59a2afba 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -596,6 +596,16 @@ pub const Inst = struct { /// Replicate single floating-point values movsldup, + /// Blend packed single-precision floating-point values + /// Blend scalar single-precision floating-point values + /// Blend packed double-precision floating-point values + /// Blend scalar double-precision floating-point values + blend, + /// Variable blend packed single-precision floating-point values + /// Variable blend scalar single-precision floating-point values + /// Variable blend packed double-precision floating-point values + /// Variable blend scalar double-precision floating-point values + blendv, /// Extract packed floating-point values extract, /// Insert scalar single-precision floating-point value @@ -651,6 +661,9 @@ pub const Inst = struct { /// Register, register, register operands. /// Uses `rrr` payload. rrr, + /// Register, register, register, register operands. + /// Uses `rrrr` payload. + rrrr, /// Register, register, register, immediate (byte) operands. /// Uses `rrri` payload. rrri, @@ -870,6 +883,13 @@ pub const Inst = struct { r2: Register, r3: Register, }, + rrrr: struct { + fixes: Fixes = ._, + r1: Register, + r2: Register, + r3: Register, + r4: Register, + }, rrri: struct { fixes: Fixes = ._, r1: Register, diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 0ce875240d..5f9a2f49b3 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -226,8 +226,8 @@ pub const Instruction = struct { else => { const mem_op = switch (data.op_en) { .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], - .rm, .rmi, .vmi => inst.ops[1], - .rvm, .rvmi => inst.ops[2], + .rm, .rmi, .rm0, .vmi => inst.ops[1], + .rvm, .rvmr, .rvmi => inst.ops[2], else => unreachable, }; switch (mem_op) { @@ -235,7 +235,7 @@ pub const Instruction = struct { const rm = switch (data.op_en) { .m, .mi, .m1, .mc, .vmi => enc.modRmExt(), .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(), - .rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(), + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0].reg.lowEnc(), .mvr => inst.ops[2].reg.lowEnc(), else => unreachable, }; @@ -245,7 +245,7 @@ pub const Instruction = struct { const op = switch (data.op_en) { .m, .mi, .m1, .mc, .vmi => .none, .mr, .mri, .mrc => inst.ops[1], - .rm, .rmi, .rvm, .rvmi => inst.ops[0], + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0], .mvr => inst.ops[2], else => unreachable, }; @@ -257,6 +257,7 @@ pub const Instruction = struct { switch (data.op_en) { .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder), .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), + .rvmr => try encoder.imm8(@as(u8, inst.ops[3].reg.enc()) << 4), .rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder), else => {}, } @@ -298,7 +299,7 @@ pub const Instruction = struct { .i, .zi, .o, .oi, .d, .np => null, .fd => inst.ops[1].mem.base().reg, .td => inst.ops[0].mem.base().reg, - .rm, .rmi => if (inst.ops[1].isSegmentRegister()) + .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister()) switch (inst.ops[1]) { .reg => |reg| reg, .mem => |mem| mem.base().reg, @@ -314,7 +315,7 @@ pub const Instruction = struct { } else null, - .vmi, .rvm, .rvmi, .mvr => unreachable, + .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -333,23 +334,23 @@ pub const Instruction = struct { switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, .o, .oi => rex.b = inst.ops[0].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc => { + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => { const r_op = switch (op_en) { - .rm, .rmi => inst.ops[0], + .rm, .rmi, .rm0 => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], else => .none, }; rex.r = r_op.isBaseExtended(); const b_x_op = switch (op_en) { - .rm, .rmi => inst.ops[1], + .rm, .rmi, .rm0 => inst.ops[1], .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], else => unreachable, }; rex.b = b_x_op.isBaseExtended(); rex.x = b_x_op.isIndexExtended(); }, - .vmi, .rvm, .rvmi, .mvr => unreachable, + .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, } try encoder.rex(rex); @@ -367,9 +368,9 @@ pub const Instruction = struct { switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, .o, .oi => vex.b = inst.ops[0].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi, .mvr => { + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => { const r_op = switch (op_en) { - .rm, .rmi, .rvm, .rvmi => inst.ops[0], + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], .mvr => inst.ops[2], .m, .mi, .m1, .mc, .vmi => .none, @@ -378,9 +379,9 @@ pub const Instruction = struct { vex.r = r_op.isBaseExtended(); const b_x_op = switch (op_en) { - .rm, .rmi, .vmi => inst.ops[1], + .rm, .rmi, .rm0, .vmi => inst.ops[1], .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], - .rvm, .rvmi => inst.ops[2], + .rvm, .rvmr, .rvmi => inst.ops[2], else => unreachable, }; vex.b = b_x_op.isBaseExtended(); @@ -408,7 +409,7 @@ pub const Instruction = struct { switch (op_en) { else => {}, .vmi => vex.v = inst.ops[0].reg, - .rvm, .rvmi => vex.v = inst.ops[1].reg, + .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg, } try encoder.vex(vex); diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index c326f4230a..e087f6dfc7 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -846,6 +846,8 @@ pub const table = [_]Entry{ .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .none, .sse }, + .{ .cmpps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .none, .sse }, + .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse }, .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse }, @@ -917,6 +919,8 @@ pub const table = [_]Entry{ .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .none, .sse2 }, + .{ .cmppd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .none, .sse2 }, + .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 }, .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 }, @@ -1085,6 +1089,14 @@ pub const table = [_]Entry{ .{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 }, // SSE4.1 + .{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 }, + + .{ .blendps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .none, .sse4_1 }, + + .{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 }, + + .{ .blendvps, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 }, + .{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 }, .{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 }, @@ -1146,11 +1158,33 @@ pub const table = [_]Entry{ .{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx }, .{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx }, + .{ .vblendpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_128_wig, .avx }, + .{ .vblendpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_256_wig, .avx }, + + .{ .vblendps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_128_wig, .avx }, + .{ .vblendps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_256_wig, .avx }, + + .{ .vblendvpd, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_128_w0, .avx }, + .{ .vblendvpd, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_256_w0, .avx }, + + .{ .vblendvps, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_128_w0, .avx }, + .{ .vblendvps, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_256_w0, .avx }, + .{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx }, .{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx }, .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx }, .{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx }, + .{ .vcmppd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_128_wig, .avx }, + .{ .vcmppd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_256_wig, .avx }, + + .{ .vcmpps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_128_wig, .avx }, + .{ .vcmpps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_256_wig, .avx }, + + .{ .vcmpsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx }, + + .{ .vcmpss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx }, + .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, diff --git a/test/behavior/maximum_minimum.zig b/test/behavior/maximum_minimum.zig index ecfe596760..db6cad221f 100644 --- a/test/behavior/maximum_minimum.zig +++ b/test/behavior/maximum_minimum.zig @@ -24,7 +24,8 @@ test "@max" { test "@max on vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -72,7 +73,8 @@ test "@min" { test "@min for vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO