diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 956fe76494..a637254838 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5385,46 +5385,104 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const ty = self.typeOf(ty_op.operand); - const scalar_ty = ty.scalarType(mod); - switch (scalar_ty.zigTypeTag(mod)) { - .Int => if (ty.zigTypeTag(mod) == .Vector) { - return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)}); - } else { - if (ty.abiSize(mod) > 8) { - return self.fail("TODO implement abs for integer abi sizes larger than 8", .{}); - } - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const result: MCValue = result: { + const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) { + else => null, + .Int => { + if (ty.abiSize(mod) > 8) { + return self.fail("TODO implement abs for integer abi sizes larger than 8", .{}); + } + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv); + try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv); - const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2); - switch (src_mcv) { - .register => |val_reg| try self.asmCmovccRegisterRegister( - registerAlias(dst_mcv.register, cmov_abi_size), - registerAlias(val_reg, cmov_abi_size), - .l, - ), - .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( - registerAlias(dst_mcv.register, cmov_abi_size), - src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)), - .l, - ), - else => { - const val_reg = try self.copyToTmpRegister(ty, src_mcv); - try self.asmCmovccRegisterRegister( + const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2); + switch (src_mcv) { + .register => |val_reg| try self.asmCmovccRegisterRegister( registerAlias(dst_mcv.register, cmov_abi_size), registerAlias(val_reg, cmov_abi_size), .l, - ); + ), + .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( + registerAlias(dst_mcv.register, cmov_abi_size), + src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)), + .l, + ), + else => { + const val_reg = try self.copyToTmpRegister(ty, src_mcv); + try self.asmCmovccRegisterRegister( + registerAlias(dst_mcv.register, cmov_abi_size), + registerAlias(val_reg, cmov_abi_size), + .l, + ); + }, + } + break :result dst_mcv; + }, + .Float => return self.floatSign(inst, ty_op.operand, ty), + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + else => null, + .Int => switch (ty.childType(mod).intInfo(mod).bits) { + else => null, + 8 => switch (ty.vectorLen(mod)) { + else => null, + 1...16 => if (self.hasFeature(.avx)) + .{ .vp_b, .abs } + else if (self.hasFeature(.ssse3)) + .{ .p_b, .abs } + else + null, + 17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null, + }, + 16 => switch (ty.vectorLen(mod)) { + else => null, + 1...8 => if (self.hasFeature(.avx)) + .{ .vp_w, .abs } + else if (self.hasFeature(.ssse3)) + .{ .p_w, .abs } + else + null, + 9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null, + }, + 32 => switch (ty.vectorLen(mod)) { + else => null, + 1...4 => if (self.hasFeature(.avx)) + .{ .vp_d, .abs } + else if (self.hasFeature(.ssse3)) + .{ .p_d, .abs } + else + null, + 5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null, + }, }, - } - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); - }, - .Float => return self.floatSign(inst, ty_op.operand, ty), - else => unreachable, - } + .Float => return self.floatSign(inst, ty_op.operand, ty), + }, + }) orelse return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)}); + + const abi_size: u32 = @intCast(ty.abiSize(mod)); + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv.getReg().? + else + try self.register_manager.allocReg(inst, self.regClassForType(ty)); + const dst_alias = registerAlias(dst_reg, abi_size); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_alias, + src_mcv.mem(self.memPtrSize(ty)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + ); + break :result .{ .register = dst_reg }; + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 5b4265f176..0a0e3f3b7d 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -316,6 +316,8 @@ pub const Mnemonic = enum { xorpd, // SSE3 movddup, movshdup, movsldup, + // SSSE3 + pabsb, pabsd, pabsw, // SSE4.1 blendpd, blendps, blendvpd, blendvps, extractps, @@ -353,6 +355,7 @@ pub const Mnemonic = enum { vmovupd, vmovups, vmulpd, vmulps, vmulsd, vmulss, vorpd, vorps, + vpabsb, vpabsd, vpabsw, vpackssdw, vpacksswb, vpackusdw, vpackuswb, vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw, vpand, vpandn, @@ -750,6 +753,7 @@ pub const Feature = enum { sse2, sse3, sse4_1, + ssse3, x87, }; diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 281e686690..a4a21061eb 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1108,6 +1108,14 @@ pub const table = [_]Entry{ .{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 }, + // SSSE3 + .{ .pabsb, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 }, + .{ .pabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 }, + .{ .pabsd, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 }, + .{ .pabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 }, + .{ .pabsw, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 }, + .{ .pabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 }, + // SSE4.1 .{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 }, @@ -1368,6 +1376,10 @@ pub const table = [_]Entry{ .{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, .{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + .{ .vpabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_128_wig, .avx }, + .{ .vpabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_128_wig, .avx }, + .{ .vpabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_128_wig, .avx }, + .{ .vpacksswb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_128_wig, .avx }, .{ .vpackssdw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_128_wig, .avx }, @@ -1537,6 +1549,10 @@ pub const table = [_]Entry{ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 }, + .{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 }, + .{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 }, + .{ .vpacksswb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_256_wig, .avx2 }, .{ .vpackssdw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_256_wig, .avx2 }, diff --git a/test/behavior/abs.zig b/test/behavior/abs.zig index f2be872855..98ca04a5a5 100644 --- a/test/behavior/abs.zig +++ b/test/behavior/abs.zig @@ -280,7 +280,7 @@ test "@abs float vectors" { try testAbsFloatVectors(f16, 16); try comptime testAbsFloatVectors(f16, 17); - try testAbsFloatVectors(f32, 17); + try testAbsFloatVectors(f32, 1); try comptime testAbsFloatVectors(f32, 1); try testAbsFloatVectors(f32, 1); try comptime testAbsFloatVectors(f32, 2);