x86_64: implement fabs

This commit is contained in:
Jacob Young 2023-05-02 03:24:04 -04:00
parent 31429a4e86
commit 9ccdbca635
6 changed files with 52 additions and 7 deletions

View File

@ -1458,14 +1458,13 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.log,
.log2,
.log10,
.fabs,
.floor,
.ceil,
.round,
.trunc_float,
=> try self.airUnaryMath(inst),
.neg => try self.airNeg(inst),
.neg, .fabs => try self.airFloatSign(inst),
.add_with_overflow => try self.airAddSubWithOverflow(inst),
.sub_with_overflow => try self.airAddSubWithOverflow(inst),
@ -4185,7 +4184,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
fn airNeg(self: *Self, inst: Air.Inst.Index) !void {
fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
const un_op = self.air.instructions.items(.data)[inst].un_op;
const ty = self.air.typeOf(un_op);
const ty_bits = ty.floatBits(self.target.*);
@ -4228,10 +4227,19 @@ fn airNeg(self: *Self, inst: Air.Inst.Index) !void {
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
const tag = self.air.instructions.items(.tag)[inst];
try self.genBinOpMir(switch (ty_bits) {
32 => .xorps,
64 => .xorpd,
else => return self.fail("TODO implement airNeg for {}", .{
32 => switch (tag) {
.neg => .xorps,
.fabs => .andnps,
else => unreachable,
},
64 => switch (tag) {
.neg => .xorpd,
.fabs => .andnpd,
else => unreachable,
},
else => return self.fail("TODO implement airFloatSign for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
}, vec_ty, dst_mcv, sign_mcv);

View File

@ -268,23 +268,29 @@ pub const Mnemonic = enum {
movd,
// SSE
addss,
andps,
andnps,
cmpss,
cvtsi2ss,
divss,
maxss, minss,
movss,
mulss,
orps,
subss,
ucomiss,
xorps,
// SSE2
addsd,
andpd,
andnpd,
//cmpsd,
cvtsd2ss, cvtsi2sd, cvtss2sd,
divsd,
maxsd, minsd,
movq, //movd, movsd,
mulsd,
orpd,
subsd,
ucomisd,
xorpd,

View File

@ -94,6 +94,8 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
.xor,
.addss,
.andnps,
.andps,
.cmpss,
.cvtsi2ss,
.divss,
@ -101,11 +103,14 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
.minss,
.movss,
.mulss,
.orps,
.roundss,
.subss,
.ucomiss,
.xorps,
.addsd,
.andnpd,
.andpd,
.cmpsd,
.cvtsd2ss,
.cvtsi2sd,
@ -115,6 +120,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
.minsd,
.movsd,
.mulsd,
.orpd,
.roundsd,
.subsd,
.ucomisd,

View File

@ -168,6 +168,10 @@ pub const Inst = struct {
/// Add single precision floating point values
addss,
/// Bitwise logical and of packed single precision floating-point values
andps,
/// Bitwise logical and not of packed single precision floating-point values
andnps,
/// Compare scalar single-precision floating-point values
cmpss,
/// Convert doubleword integer to scalar single-precision floating-point value
@ -182,6 +186,8 @@ pub const Inst = struct {
movss,
/// Multiply scalar single-precision floating-point values
mulss,
/// Bitwise logical or of packed single precision floating-point values
orps,
/// Round scalar single-precision floating-point values
roundss,
/// Subtract scalar single-precision floating-point values
@ -192,6 +198,10 @@ pub const Inst = struct {
xorps,
/// Add double precision floating point values
addsd,
/// Bitwise logical and not of packed double precision floating-point values
andnpd,
/// Bitwise logical and of packed double precision floating-point values
andpd,
/// Compare scalar double-precision floating-point values
cmpsd,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
@ -210,6 +220,8 @@ pub const Inst = struct {
movsd,
/// Multiply scalar double-precision floating-point values
mulsd,
/// Bitwise logical or of packed double precision floating-point values
orpd,
/// Round scalar double-precision floating-point values
roundsd,
/// Subtract scalar double-precision floating-point values

View File

@ -832,6 +832,10 @@ pub const table = [_]Entry{
// SSE
.{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .sse },
.{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .sse },
.{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .sse },
.{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse },
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse },
@ -848,6 +852,8 @@ pub const table = [_]Entry{
.{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .sse },
.{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .sse },
.{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .sse },
.{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse },
@ -857,6 +863,10 @@ pub const table = [_]Entry{
// SSE2
.{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 },
.{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .sse2 },
.{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .sse2 },
.{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 },
.{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 },
@ -883,6 +893,8 @@ pub const table = [_]Entry{
.{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .sse2 },
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .sse2 },
.{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .sse2 },
.{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .sse2 },

View File

@ -96,7 +96,8 @@ test "negative f128 floatToInt at compile-time" {
}
test "@sqrt" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64 and
comptime !std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sse, .sse2, .sse4_1 })) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO