compiler-rt: provide actual sincos implementations

2026-02-01 20:23:38 +00:00 · 2022-04-27 22:37:07 -07:00 · 2022-04-27 22:37:07 -07:00 · f7f03c699d
commit f7f03c699d
parent 0b2ed45f5f
3 changed files with 264 additions and 49 deletions
--- a/lib/std/special/compiler_rt/cos.zig
+++ b/lib/std/special/compiler_rt/cos.zig
@ -2,7 +2,7 @@ const std = @import("std");
 const math = std.math;
 const expect = std.testing.expect;

-const kernel = @import("trig.zig");
+const trig = @import("trig.zig");
 const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
 const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;

@ -28,27 +28,27 @@ pub fn cosf(x: f32) callconv(.C) f32 {
            math.doNotOptimizeAway(x + 0x1p120);
            return 1.0;
        }
-        return kernel.__cosdf(x);
+        return trig.__cosdf(x);
    }
    if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
        if (ix > 0x4016cbe3) { // |x|  ~> 3*pi/4
-            return -kernel.__cosdf(if (sign) x + c2pio2 else x - c2pio2);
+            return -trig.__cosdf(if (sign) x + c2pio2 else x - c2pio2);
        } else {
            if (sign) {
-                return kernel.__sindf(x + c1pio2);
+                return trig.__sindf(x + c1pio2);
            } else {
-                return kernel.__sindf(c1pio2 - x);
+                return trig.__sindf(c1pio2 - x);
            }
        }
    }
    if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
        if (ix > 0x40afeddf) { // |x| ~> 7*pi/4
-            return kernel.__cosdf(if (sign) x + c4pio2 else x - c4pio2);
+            return trig.__cosdf(if (sign) x + c4pio2 else x - c4pio2);
        } else {
            if (sign) {
-                return kernel.__sindf(-x - c3pio2);
+                return trig.__sindf(-x - c3pio2);
            } else {
-                return kernel.__sindf(x - c3pio2);
+                return trig.__sindf(x - c3pio2);
            }
        }
    }
@ -61,10 +61,10 @@ pub fn cosf(x: f32) callconv(.C) f32 {
    var y: f64 = undefined;
    const n = rem_pio2f(x, &y);
    return switch (n & 3) {
-        0 => kernel.__cosdf(y),
-        1 => kernel.__sindf(-y),
-        2 => -kernel.__cosdf(y),
-        else => kernel.__sindf(y),
+        0 => trig.__cosdf(y),
+        1 => trig.__sindf(-y),
+        2 => -trig.__cosdf(y),
+        else => trig.__sindf(y),
    };
 }

@ -79,7 +79,7 @@ pub fn cos(x: f64) callconv(.C) f64 {
            math.doNotOptimizeAway(x + 0x1p120);
            return 1.0;
        }
-        return kernel.__cos(x, 0);
+        return trig.__cos(x, 0);
    }

    // cos(Inf or NaN) is NaN
@ -90,10 +90,10 @@ pub fn cos(x: f64) callconv(.C) f64 {
    var y: [2]f64 = undefined;
    const n = rem_pio2(x, &y);
    return switch (n & 3) {
-        0 => kernel.__cos(y[0], y[1]),
-        1 => -kernel.__sin(y[0], y[1], 1),
-        2 => -kernel.__cos(y[0], y[1]),
-        else => kernel.__sin(y[0], y[1], 1),
+        0 => trig.__cos(y[0], y[1]),
+        1 => -trig.__sin(y[0], y[1], 1),
+        2 => -trig.__cos(y[0], y[1]),
+        else => trig.__sin(y[0], y[1], 1),
    };
 }

--- a/lib/std/special/compiler_rt/sin.zig
+++ b/lib/std/special/compiler_rt/sin.zig
@ -8,7 +8,7 @@ const std = @import("std");
 const math = std.math;
 const expect = std.testing.expect;

-const kernel = @import("trig.zig");
+const trig = @import("trig.zig");
 const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
 const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;

@ -34,27 +34,27 @@ pub fn sinf(x: f32) callconv(.C) f32 {
            math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120);
            return x;
        }
-        return kernel.__sindf(x);
+        return trig.__sindf(x);
    }
    if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
        if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4
            if (sign) {
-                return -kernel.__cosdf(x + s1pio2);
+                return -trig.__cosdf(x + s1pio2);
            } else {
-                return kernel.__cosdf(x - s1pio2);
+                return trig.__cosdf(x - s1pio2);
            }
        }
-        return kernel.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2));
+        return trig.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2));
    }
    if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
        if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4
            if (sign) {
-                return kernel.__cosdf(x + s3pio2);
+                return trig.__cosdf(x + s3pio2);
            } else {
-                return -kernel.__cosdf(x - s3pio2);
+                return -trig.__cosdf(x - s3pio2);
            }
        }
-        return kernel.__sindf(if (sign) x + s4pio2 else x - s4pio2);
+        return trig.__sindf(if (sign) x + s4pio2 else x - s4pio2);
    }

    // sin(Inf or NaN) is NaN
@ -65,10 +65,10 @@ pub fn sinf(x: f32) callconv(.C) f32 {
    var y: f64 = undefined;
    const n = rem_pio2f(x, &y);
    return switch (n & 3) {
-        0 => kernel.__sindf(y),
-        1 => kernel.__cosdf(y),
-        2 => kernel.__sindf(-y),
-        else => -kernel.__cosdf(y),
+        0 => trig.__sindf(y),
+        1 => trig.__cosdf(y),
+        2 => trig.__sindf(-y),
+        else => -trig.__cosdf(y),
    };
 }

@ -83,7 +83,7 @@ pub fn sin(x: f64) callconv(.C) f64 {
            math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
            return x;
        }
-        return kernel.__sin(x, 0.0, 0);
+        return trig.__sin(x, 0.0, 0);
    }

    // sin(Inf or NaN) is NaN
@ -94,10 +94,10 @@ pub fn sin(x: f64) callconv(.C) f64 {
    var y: [2]f64 = undefined;
    const n = rem_pio2(x, &y);
    return switch (n & 3) {
-        0 => kernel.__sin(y[0], y[1], 1),
-        1 => kernel.__cos(y[0], y[1]),
-        2 => -kernel.__sin(y[0], y[1], 1),
-        else => -kernel.__cos(y[0], y[1]),
+        0 => trig.__sin(y[0], y[1], 1),
+        1 => trig.__cos(y[0], y[1]),
+        2 => -trig.__sin(y[0], y[1], 1),
+        else => -trig.__cos(y[0], y[1]),
    };
 }

--- a/lib/std/special/compiler_rt/sincos.zig
+++ b/lib/std/special/compiler_rt/sincos.zig
@ -1,27 +1,242 @@
+const std = @import("std");
+const math = std.math;
 const sin = @import("sin.zig");
 const cos = @import("cos.zig");
+const trig = @import("trig.zig");
+const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
+const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;

-pub fn __sincosh(a: f16, r_sin: *f16, r_cos: *f16) callconv(.C) void {
-    r_sin.* = sin.__sinh(a);
-    r_cos.* = cos.__cosh(a);
+pub fn __sincosh(x: f16, r_sin: *f16, r_cos: *f16) callconv(.C) void {
+    // TODO: more efficient implementation
+    var big_sin: f32 = undefined;
+    var big_cos: f32 = undefined;
+    sincosf(x, &big_sin, &big_cos);
+    r_sin.* = @floatCast(f16, big_sin);
+    r_cos.* = @floatCast(f16, big_cos);
 }

-pub fn sincosf(a: f32, r_sin: *f32, r_cos: *f32) callconv(.C) void {
-    r_sin.* = sin.sinf(a);
-    r_cos.* = cos.cosf(a);
+pub fn sincosf(x: f32, r_sin: *f32, r_cos: *f32) callconv(.C) void {
+    const sc1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
+    const sc2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
+    const sc3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
+    const sc4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
+
+    const pre_ix = @bitCast(u32, x);
+    const sign = pre_ix >> 31 != 0;
+    const ix = pre_ix & 0x7fffffff;
+
+    // |x| ~<= pi/4
+    if (ix <= 0x3f490fda) {
+        // |x| < 2**-12
+        if (ix < 0x39800000) {
+            // raise inexact if x!=0 and underflow if subnormal
+            math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
+            r_sin.* = x;
+            r_cos.* = 1.0;
+            return;
+        }
+        r_sin.* = trig.__sindf(x);
+        r_cos.* = trig.__cosdf(x);
+        return;
+    }
+
+    // |x| ~<= 5*pi/4
+    if (ix <= 0x407b53d1) {
+        // |x| ~<= 3pi/4
+        if (ix <= 0x4016cbe3) {
+            if (sign) {
+                r_sin.* = -trig.__cosdf(x + sc1pio2);
+                r_cos.* = trig.__sindf(x + sc1pio2);
+            } else {
+                r_sin.* = trig.__cosdf(sc1pio2 - x);
+                r_cos.* = trig.__sindf(sc1pio2 - x);
+            }
+            return;
+        }
+        //  -sin(x+c) is not correct if x+c could be 0: -0 vs +0
+        r_sin.* = -trig.__sindf(if (sign) x + sc2pio2 else x - sc2pio2);
+        r_cos.* = -trig.__cosdf(if (sign) x + sc2pio2 else x - sc2pio2);
+        return;
+    }
+
+    // |x| ~<= 9*pi/4
+    if (ix <= 0x40e231d5) {
+        // |x| ~<= 7*pi/4
+        if (ix <= 0x40afeddf) {
+            if (sign) {
+                r_sin.* = trig.__cosdf(x + sc3pio2);
+                r_cos.* = -trig.__sindf(x + sc3pio2);
+            } else {
+                r_sin.* = -trig.__cosdf(x - sc3pio2);
+                r_cos.* = trig.__sindf(x - sc3pio2);
+            }
+            return;
+        }
+        r_sin.* = trig.__sindf(if (sign) x + sc4pio2 else x - sc4pio2);
+        r_cos.* = trig.__cosdf(if (sign) x + sc4pio2 else x - sc4pio2);
+        return;
+    }
+
+    // sin(Inf or NaN) is NaN
+    if (ix >= 0x7f800000) {
+        const result = x - x;
+        r_sin.* = result;
+        r_cos.* = result;
+        return;
+    }
+
+    // general argument reduction needed
+    var y: f64 = undefined;
+    const n = rem_pio2f(x, &y);
+    const s = trig.__sindf(y);
+    const c = trig.__cosdf(y);
+    switch (n & 3) {
+        0 => {
+            r_sin.* = s;
+            r_cos.* = c;
+        },
+        1 => {
+            r_sin.* = c;
+            r_cos.* = -s;
+        },
+        2 => {
+            r_sin.* = -s;
+            r_cos.* = -c;
+        },
+        else => {
+            r_sin.* = -c;
+            r_cos.* = s;
+        },
+    }
 }

-pub fn sincos(a: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void {
-    r_sin.* = sin.sin(a);
-    r_cos.* = cos.cos(a);
+pub fn sincos(x: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void {
+    const ix = @truncate(u32, @bitCast(u64, x) >> 32) & 0x7fffffff;
+
+    // |x| ~< pi/4
+    if (ix <= 0x3fe921fb) {
+        // if |x| < 2**-27 * sqrt(2)
+        if (ix < 0x3e46a09e) {
+            // raise inexact if x != 0 and underflow if subnormal
+            math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
+            r_sin.* = x;
+            r_cos.* = 1.0;
+            return;
+        }
+        r_sin.* = trig.__sin(x, 0.0, 0);
+        r_cos.* = trig.__cos(x, 0.0);
+        return;
+    }
+
+    // sincos(Inf or NaN) is NaN
+    if (ix >= 0x7ff00000) {
+        const result = x - x;
+        r_sin.* = result;
+        r_cos.* = result;
+        return;
+    }
+
+    // argument reduction needed
+    var y: [2]f64 = undefined;
+    const n = rem_pio2(x, &y);
+    const s = trig.__sin(y[0], y[1], 1);
+    const c = trig.__cos(y[0], y[1]);
+    switch (n & 3) {
+        0 => {
+            r_sin.* = s;
+            r_cos.* = c;
+        },
+        1 => {
+            r_sin.* = c;
+            r_cos.* = -s;
+        },
+        2 => {
+            r_sin.* = -s;
+            r_cos.* = -c;
+        },
+        else => {
+            r_sin.* = -c;
+            r_cos.* = s;
+        },
+    }
 }

-pub fn __sincosx(a: f80, r_sin: *f80, r_cos: *f80) callconv(.C) void {
-    r_sin.* = sin.__sinx(a);
-    r_cos.* = cos.__cosx(a);
+pub fn __sincosx(x: f80, r_sin: *f80, r_cos: *f80) callconv(.C) void {
+    // TODO: more efficient implementation
+    //return sincos_generic(f80, x, r_sin, r_cos);
+    var big_sin: f128 = undefined;
+    var big_cos: f128 = undefined;
+    sincosq(x, &big_sin, &big_cos);
+    r_sin.* = @floatCast(f80, big_sin);
+    r_cos.* = @floatCast(f80, big_cos);
 }

-pub fn sincosq(a: f128, r_sin: *f128, r_cos: *f128) callconv(.C) void {
-    r_sin.* = sin.sinq(a);
-    r_cos.* = cos.cosq(a);
+pub fn sincosq(x: f128, r_sin: *f128, r_cos: *f128) callconv(.C) void {
+    // TODO: more correct implementation
+    //return sincos_generic(f128, x, r_sin, r_cos);
+    var small_sin: f64 = undefined;
+    var small_cos: f64 = undefined;
+    sincos(@floatCast(f64, x), &small_sin, &small_cos);
+    r_sin.* = small_sin;
+    r_cos.* = small_cos;
+}
+
+const rem_pio2_generic = @compileError("TODO");
+
+/// Ported from musl sincosl.c. Needs the following dependencies to be complete:
+/// * rem_pio2_generic ported from __rem_pio2l.c
+/// * trig.sin_generic ported from __sinl.c
+/// * trig.cos_generic ported from __cosl.c
+inline fn sincos_generic(comptime F: type, x: F, r_sin: *F, r_cos: *F) void {
+    const sc1pio4: F = 1.0 * math.pi / 4.0;
+    const bits = @typeInfo(F).Float.bits;
+    const I = std.meta.Int(.unsigned, bits);
+    const ix = @bitCast(I, x) & (math.maxInt(I) >> 1);
+    const se = @truncate(u16, ix >> (bits - 16));
+
+    if (se == 0x7fff) {
+        const result = x - x;
+        r_sin.* = result;
+        r_cos.* = result;
+        return;
+    }
+
+    if (@bitCast(F, ix) < sc1pio4) {
+        if (se < 0x3fff - math.floatFractionalBits(F) - 1) {
+            // raise underflow if subnormal
+            if (se == 0) {
+                math.doNotOptimizeAway(x * 0x1p-120);
+            }
+            r_sin.* = x;
+            // raise inexact if x!=0
+            r_cos.* = 1.0 + x;
+            return;
+        }
+        r_sin.* = trig.sin_generic(F, x, 0, 0);
+        r_cos.* = trig.cos_generic(F, x, 0);
+        return;
+    }
+
+    var y: [2]F = undefined;
+    const n = rem_pio2_generic(F, x, &y);
+    const s = trig.sin_generic(F, y[0], y[1], 1);
+    const c = trig.cos_generic(F, y[0], y[1]);
+    switch (n & 3) {
+        0 => {
+            r_sin.* = s;
+            r_cos.* = c;
+        },
+        1 => {
+            r_sin.* = c;
+            r_cos.* = -s;
+        },
+        2 => {
+            r_sin.* = -s;
+            r_cos.* = -c;
+        },
+        else => {
+            r_sin.* = -c;
+            r_cos.* = s;
+        },
+    }
 }