x86_64: implement fabs

2025-12-06 06:13:07 +00:00 · 2023-05-02 03:24:04 -04:00 · 2023-05-02 03:24:04 -04:00 · 9ccdbca635
commit 9ccdbca635
parent 31429a4e86
6 changed files with 52 additions and 7 deletions
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@ -1458,14 +1458,13 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .log,
            .log2,
            .log10,
-            .fabs,
            .floor,
            .ceil,
            .round,
            .trunc_float,
            => try self.airUnaryMath(inst),

-            .neg => try self.airNeg(inst),
+            .neg, .fabs => try self.airFloatSign(inst),

            .add_with_overflow => try self.airAddSubWithOverflow(inst),
            .sub_with_overflow => try self.airAddSubWithOverflow(inst),
@ -4185,7 +4184,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
    return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
 }

-fn airNeg(self: *Self, inst: Air.Inst.Index) !void {
+fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
    const un_op = self.air.instructions.items(.data)[inst].un_op;
    const ty = self.air.typeOf(un_op);
    const ty_bits = ty.floatBits(self.target.*);
@ -4228,10 +4227,19 @@ fn airNeg(self: *Self, inst: Air.Inst.Index) !void {
    const dst_lock = self.register_manager.lockReg(dst_mcv.register);
    defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);

+    const tag = self.air.instructions.items(.tag)[inst];
    try self.genBinOpMir(switch (ty_bits) {
-        32 => .xorps,
-        64 => .xorpd,
-        else => return self.fail("TODO implement airNeg for {}", .{
+        32 => switch (tag) {
+            .neg => .xorps,
+            .fabs => .andnps,
+            else => unreachable,
+        },
+        64 => switch (tag) {
+            .neg => .xorpd,
+            .fabs => .andnpd,
+            else => unreachable,
+        },
+        else => return self.fail("TODO implement airFloatSign for {}", .{
            ty.fmt(self.bin_file.options.module.?),
        }),
    }, vec_ty, dst_mcv, sign_mcv);
--- a/src/arch/x86_64/Encoding.zig
+++ b/src/arch/x86_64/Encoding.zig
@ -268,23 +268,29 @@ pub const Mnemonic = enum {
    movd,
    // SSE
    addss,
+    andps,
+    andnps,
    cmpss,
    cvtsi2ss,
    divss,
    maxss, minss,
    movss,
    mulss,
+    orps,
    subss,
    ucomiss,
    xorps,
    // SSE2
    addsd,
+    andpd,
+    andnpd,
    //cmpsd,
    cvtsd2ss, cvtsi2sd, cvtss2sd,
    divsd,
    maxsd, minsd,
    movq, //movd, movsd,
    mulsd,
+    orpd,
    subsd,
    ucomisd,
    xorpd,
--- a/src/arch/x86_64/Lower.zig
+++ b/src/arch/x86_64/Lower.zig
@ -94,6 +94,8 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
        .xor,

        .addss,
+        .andnps,
+        .andps,
        .cmpss,
        .cvtsi2ss,
        .divss,
@ -101,11 +103,14 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
        .minss,
        .movss,
        .mulss,
+        .orps,
        .roundss,
        .subss,
        .ucomiss,
        .xorps,
        .addsd,
+        .andnpd,
+        .andpd,
        .cmpsd,
        .cvtsd2ss,
        .cvtsi2sd,
@ -115,6 +120,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
        .minsd,
        .movsd,
        .mulsd,
+        .orpd,
        .roundsd,
        .subsd,
        .ucomisd,
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@ -168,6 +168,10 @@ pub const Inst = struct {

        /// Add single precision floating point values
        addss,
+        /// Bitwise logical and of packed single precision floating-point values
+        andps,
+        /// Bitwise logical and not of packed single precision floating-point values
+        andnps,
        /// Compare scalar single-precision floating-point values
        cmpss,
        /// Convert doubleword integer to scalar single-precision floating-point value
@ -182,6 +186,8 @@ pub const Inst = struct {
        movss,
        /// Multiply scalar single-precision floating-point values
        mulss,
+        /// Bitwise logical or of packed single precision floating-point values
+        orps,
        /// Round scalar single-precision floating-point values
        roundss,
        /// Subtract scalar single-precision floating-point values
@ -192,6 +198,10 @@ pub const Inst = struct {
        xorps,
        /// Add double precision floating point values
        addsd,
+        /// Bitwise logical and not of packed double precision floating-point values
+        andnpd,
+        /// Bitwise logical and of packed double precision floating-point values
+        andpd,
        /// Compare scalar double-precision floating-point values
        cmpsd,
        /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
@ -210,6 +220,8 @@ pub const Inst = struct {
        movsd,
        /// Multiply scalar double-precision floating-point values
        mulsd,
+        /// Bitwise logical or of packed double precision floating-point values
+        orpd,
        /// Round scalar double-precision floating-point values
        roundsd,
        /// Subtract scalar double-precision floating-point values
--- a/src/arch/x86_64/encodings.zig
+++ b/src/arch/x86_64/encodings.zig
@ -832,6 +832,10 @@ pub const table = [_]Entry{
    // SSE
    .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .sse },

+    .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .sse },
+
+    .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .sse },
+
    .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse },

    .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse },
@ -848,6 +852,8 @@ pub const table = [_]Entry{

    .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .sse },

+    .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .sse },
+
    .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .sse },

    .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse },
@ -857,6 +863,10 @@ pub const table = [_]Entry{
    // SSE2
    .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 },

+    .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .sse2 },
+
+    .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .sse2 },
+
    .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 },

    .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 },
@ -883,6 +893,8 @@ pub const table = [_]Entry{

    .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .sse2 },

+    .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .sse2 },
+
    .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .sse2 },

    .{ .movsd, .rm, &.{ .xmm,     .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .sse2 },
--- a/test/behavior/floatop.zig
+++ b/test/behavior/floatop.zig
@ -96,7 +96,8 @@ test "negative f128 floatToInt at compile-time" {
 }

 test "@sqrt" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and
+        comptime !std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sse, .sse2, .sse4_1 })) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO