mirror of
https://github.com/ziglang/zig.git
synced 2026-02-20 00:08:56 +00:00
x86_64: implement @abs for some integer vector types
This commit is contained in:
parent
24d76500d2
commit
b8f00ae337
@ -5385,46 +5385,104 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const mod = self.bin_file.options.module.?;
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
const ty = self.typeOf(ty_op.operand);
|
||||
const scalar_ty = ty.scalarType(mod);
|
||||
|
||||
switch (scalar_ty.zigTypeTag(mod)) {
|
||||
.Int => if (ty.zigTypeTag(mod) == .Vector) {
|
||||
return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)});
|
||||
} else {
|
||||
if (ty.abiSize(mod) > 8) {
|
||||
return self.fail("TODO implement abs for integer abi sizes larger than 8", .{});
|
||||
}
|
||||
const src_mcv = try self.resolveInst(ty_op.operand);
|
||||
const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
|
||||
const result: MCValue = result: {
|
||||
const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) {
|
||||
else => null,
|
||||
.Int => {
|
||||
if (ty.abiSize(mod) > 8) {
|
||||
return self.fail("TODO implement abs for integer abi sizes larger than 8", .{});
|
||||
}
|
||||
const src_mcv = try self.resolveInst(ty_op.operand);
|
||||
const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
|
||||
|
||||
try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
|
||||
try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
|
||||
|
||||
const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
|
||||
switch (src_mcv) {
|
||||
.register => |val_reg| try self.asmCmovccRegisterRegister(
|
||||
registerAlias(dst_mcv.register, cmov_abi_size),
|
||||
registerAlias(val_reg, cmov_abi_size),
|
||||
.l,
|
||||
),
|
||||
.memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
|
||||
registerAlias(dst_mcv.register, cmov_abi_size),
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
|
||||
.l,
|
||||
),
|
||||
else => {
|
||||
const val_reg = try self.copyToTmpRegister(ty, src_mcv);
|
||||
try self.asmCmovccRegisterRegister(
|
||||
const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
|
||||
switch (src_mcv) {
|
||||
.register => |val_reg| try self.asmCmovccRegisterRegister(
|
||||
registerAlias(dst_mcv.register, cmov_abi_size),
|
||||
registerAlias(val_reg, cmov_abi_size),
|
||||
.l,
|
||||
);
|
||||
),
|
||||
.memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
|
||||
registerAlias(dst_mcv.register, cmov_abi_size),
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
|
||||
.l,
|
||||
),
|
||||
else => {
|
||||
const val_reg = try self.copyToTmpRegister(ty, src_mcv);
|
||||
try self.asmCmovccRegisterRegister(
|
||||
registerAlias(dst_mcv.register, cmov_abi_size),
|
||||
registerAlias(val_reg, cmov_abi_size),
|
||||
.l,
|
||||
);
|
||||
},
|
||||
}
|
||||
break :result dst_mcv;
|
||||
},
|
||||
.Float => return self.floatSign(inst, ty_op.operand, ty),
|
||||
.Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
|
||||
else => null,
|
||||
.Int => switch (ty.childType(mod).intInfo(mod).bits) {
|
||||
else => null,
|
||||
8 => switch (ty.vectorLen(mod)) {
|
||||
else => null,
|
||||
1...16 => if (self.hasFeature(.avx))
|
||||
.{ .vp_b, .abs }
|
||||
else if (self.hasFeature(.ssse3))
|
||||
.{ .p_b, .abs }
|
||||
else
|
||||
null,
|
||||
17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null,
|
||||
},
|
||||
16 => switch (ty.vectorLen(mod)) {
|
||||
else => null,
|
||||
1...8 => if (self.hasFeature(.avx))
|
||||
.{ .vp_w, .abs }
|
||||
else if (self.hasFeature(.ssse3))
|
||||
.{ .p_w, .abs }
|
||||
else
|
||||
null,
|
||||
9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null,
|
||||
},
|
||||
32 => switch (ty.vectorLen(mod)) {
|
||||
else => null,
|
||||
1...4 => if (self.hasFeature(.avx))
|
||||
.{ .vp_d, .abs }
|
||||
else if (self.hasFeature(.ssse3))
|
||||
.{ .p_d, .abs }
|
||||
else
|
||||
null,
|
||||
5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null,
|
||||
},
|
||||
},
|
||||
}
|
||||
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
|
||||
},
|
||||
.Float => return self.floatSign(inst, ty_op.operand, ty),
|
||||
else => unreachable,
|
||||
}
|
||||
.Float => return self.floatSign(inst, ty_op.operand, ty),
|
||||
},
|
||||
}) orelse return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)});
|
||||
|
||||
const abi_size: u32 = @intCast(ty.abiSize(mod));
|
||||
const src_mcv = try self.resolveInst(ty_op.operand);
|
||||
const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.register_manager.allocReg(inst, self.regClassForType(ty));
|
||||
const dst_alias = registerAlias(dst_reg, abi_size);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
src_mcv.mem(self.memPtrSize(ty)),
|
||||
) else try self.asmRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv), abi_size),
|
||||
);
|
||||
break :result .{ .register = dst_reg };
|
||||
};
|
||||
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
|
||||
@ -316,6 +316,8 @@ pub const Mnemonic = enum {
|
||||
xorpd,
|
||||
// SSE3
|
||||
movddup, movshdup, movsldup,
|
||||
// SSSE3
|
||||
pabsb, pabsd, pabsw,
|
||||
// SSE4.1
|
||||
blendpd, blendps, blendvpd, blendvps,
|
||||
extractps,
|
||||
@ -353,6 +355,7 @@ pub const Mnemonic = enum {
|
||||
vmovupd, vmovups,
|
||||
vmulpd, vmulps, vmulsd, vmulss,
|
||||
vorpd, vorps,
|
||||
vpabsb, vpabsd, vpabsw,
|
||||
vpackssdw, vpacksswb, vpackusdw, vpackuswb,
|
||||
vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
|
||||
vpand, vpandn,
|
||||
@ -750,6 +753,7 @@ pub const Feature = enum {
|
||||
sse2,
|
||||
sse3,
|
||||
sse4_1,
|
||||
ssse3,
|
||||
x87,
|
||||
};
|
||||
|
||||
|
||||
@ -1108,6 +1108,14 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
|
||||
|
||||
// SSSE3
|
||||
.{ .pabsb, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
|
||||
.{ .pabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
|
||||
.{ .pabsd, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
|
||||
.{ .pabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
|
||||
.{ .pabsw, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
|
||||
.{ .pabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
|
||||
|
||||
// SSE4.1
|
||||
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
|
||||
|
||||
@ -1368,6 +1376,10 @@ pub const table = [_]Entry{
|
||||
.{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpacksswb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpackssdw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_128_wig, .avx },
|
||||
|
||||
@ -1537,6 +1549,10 @@ pub const table = [_]Entry{
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpacksswb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpackssdw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
|
||||
@ -280,7 +280,7 @@ test "@abs float vectors" {
|
||||
try testAbsFloatVectors(f16, 16);
|
||||
try comptime testAbsFloatVectors(f16, 17);
|
||||
|
||||
try testAbsFloatVectors(f32, 17);
|
||||
try testAbsFloatVectors(f32, 1);
|
||||
try comptime testAbsFloatVectors(f32, 1);
|
||||
try testAbsFloatVectors(f32, 1);
|
||||
try comptime testAbsFloatVectors(f32, 2);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user