Merge pull request #10276 from tiehuis/musl-trig-reimpl

sin/cos/tan musl reimplementation
This commit is contained in:
Andrew Kelley 2021-12-05 14:49:04 -08:00 committed by GitHub
commit a7828c261a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1369 additions and 225 deletions

198
lib/std/math/__rem_pio2.zig Normal file
View File

@ -0,0 +1,198 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2.c
const std = @import("../std.zig");
const __rem_pio2_large = @import("__rem_pio2_large.zig").__rem_pio2_large;
const math = std.math;
const toint = 1.5 / math.epsilon(f64);
// pi/4
const pio4 = 0x1.921fb54442d18p-1;
// invpio2: 53 bits of 2/pi
const invpio2 = 6.36619772367581382433e-01; // 0x3FE45F30, 0x6DC9C883
// pio2_1: first 33 bit of pi/2
const pio2_1 = 1.57079632673412561417e+00; // 0x3FF921FB, 0x54400000
// pio2_1t: pi/2 - pio2_1
const pio2_1t = 6.07710050650619224932e-11; // 0x3DD0B461, 0x1A626331
// pio2_2: second 33 bit of pi/2
const pio2_2 = 6.07710050630396597660e-11; // 0x3DD0B461, 0x1A600000
// pio2_2t: pi/2 - (pio2_1+pio2_2)
const pio2_2t = 2.02226624879595063154e-21; // 0x3BA3198A, 0x2E037073
// pio2_3: third 33 bit of pi/2
const pio2_3 = 2.02226624871116645580e-21; // 0x3BA3198A, 0x2E000000
// pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
const pio2_3t = 8.47842766036889956997e-32; // 0x397B839A, 0x252049C1
fn U(x: anytype) usize {
return @intCast(usize, x);
}
fn medium(ix: u32, x: f64, y: *[2]f64) i32 {
var w: f64 = undefined;
var t: f64 = undefined;
var r: f64 = undefined;
var @"fn": f64 = undefined;
var n: i32 = undefined;
var ex: i32 = undefined;
var ey: i32 = undefined;
var ui: u64 = undefined;
// rint(x/(pi/2))
@"fn" = x * invpio2 + toint - toint;
n = @floatToInt(i32, @"fn");
r = x - @"fn" * pio2_1;
w = @"fn" * pio2_1t; // 1st round, good to 85 bits
// Matters with directed rounding.
if (r - w < -pio4) {
n -= 1;
@"fn" -= 1;
r = x - @"fn" * pio2_1;
w = @"fn" * pio2_1t;
} else if (r - w > pio4) {
n += 1;
@"fn" += 1;
r = x - @"fn" * pio2_1;
w = @"fn" * pio2_1t;
}
y[0] = r - w;
ui = @bitCast(u64, y[0]);
ey = @intCast(i32, (ui >> 52) & 0x7ff);
ex = @intCast(i32, ix >> 20);
if (ex - ey > 16) { // 2nd round, good to 118 bits
t = r;
w = @"fn" * pio2_2;
r = t - w;
w = @"fn" * pio2_2t - ((t - r) - w);
y[0] = r - w;
ui = @bitCast(u64, y[0]);
ey = @intCast(i32, (ui >> 52) & 0x7ff);
if (ex - ey > 49) { // 3rd round, good to 151 bits, covers all cases
t = r;
w = @"fn" * pio2_3;
r = t - w;
w = @"fn" * pio2_3t - ((t - r) - w);
y[0] = r - w;
}
}
y[1] = (r - y[0]) - w;
return n;
}
// Returns the remainder of x rem pi/2 in y[0]+y[1]
//
// use __rem_pio2_large() for large x
//
// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
pub fn __rem_pio2(x: f64, y: *[2]f64) i32 {
var z: f64 = undefined;
var tx: [3]f64 = undefined;
var ty: [2]f64 = undefined;
var n: i32 = undefined;
var ix: u32 = undefined;
var sign: bool = undefined;
var i: i32 = undefined;
var ui: u64 = undefined;
ui = @bitCast(u64, x);
sign = ui >> 63 != 0;
ix = @truncate(u32, (ui >> 32) & 0x7fffffff);
if (ix <= 0x400f6a7a) { // |x| ~<= 5pi/4
if ((ix & 0xfffff) == 0x921fb) { // |x| ~= pi/2 or 2pi/2
return medium(ix, x, y);
}
if (ix <= 0x4002d97c) { // |x| ~<= 3pi/4
if (!sign) {
z = x - pio2_1; // one round good to 85 bits
y[0] = z - pio2_1t;
y[1] = (z - y[0]) - pio2_1t;
return 1;
} else {
z = x + pio2_1;
y[0] = z + pio2_1t;
y[1] = (z - y[0]) + pio2_1t;
return -1;
}
} else {
if (!sign) {
z = x - 2 * pio2_1;
y[0] = z - 2 * pio2_1t;
y[1] = (z - y[0]) - 2 * pio2_1t;
return 2;
} else {
z = x + 2 * pio2_1;
y[0] = z + 2 * pio2_1t;
y[1] = (z - y[0]) + 2 * pio2_1t;
return -2;
}
}
}
if (ix <= 0x401c463b) { // |x| ~<= 9pi/4
if (ix <= 0x4015fdbc) { // |x| ~<= 7pi/4
if (ix == 0x4012d97c) { // |x| ~= 3pi/2
return medium(ix, x, y);
}
if (!sign) {
z = x - 3 * pio2_1;
y[0] = z - 3 * pio2_1t;
y[1] = (z - y[0]) - 3 * pio2_1t;
return 3;
} else {
z = x + 3 * pio2_1;
y[0] = z + 3 * pio2_1t;
y[1] = (z - y[0]) + 3 * pio2_1t;
return -3;
}
} else {
if (ix == 0x401921fb) { // |x| ~= 4pi/2 */
return medium(ix, x, y);
}
if (!sign) {
z = x - 4 * pio2_1;
y[0] = z - 4 * pio2_1t;
y[1] = (z - y[0]) - 4 * pio2_1t;
return 4;
} else {
z = x + 4 * pio2_1;
y[0] = z + 4 * pio2_1t;
y[1] = (z - y[0]) + 4 * pio2_1t;
return -4;
}
}
}
if (ix < 0x413921fb) { // |x| ~< 2^20*(pi/2), medium size
return medium(ix, x, y);
}
// all other (large) arguments
if (ix >= 0x7ff00000) { // x is inf or NaN
y[0] = x - x;
y[1] = y[0];
return 0;
}
// set z = scalbn(|x|,-ilogb(x)+23)
ui = @bitCast(u64, x);
ui &= std.math.maxInt(u64) >> 12;
ui |= @as(u64, 0x3ff + 23) << 52;
z = @bitCast(f64, ui);
i = 0;
while (i < 2) : (i += 1) {
tx[U(i)] = @intToFloat(f64, @floatToInt(i32, z));
z = (z - tx[U(i)]) * 0x1p24;
}
tx[U(i)] = z;
// skip zero terms, first term is non-zero
while (tx[U(i)] == 0.0) {
i -= 1;
}
n = __rem_pio2_large(tx[0..], ty[0..], @intCast(i32, (ix >> 20)) - (0x3ff + 23), i + 1, 1);
if (sign) {
y[0] = -ty[0];
y[1] = -ty[1];
return -n;
}
y[0] = ty[0];
y[1] = ty[1];
return n;
}

View File

@ -0,0 +1,510 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2_large.c
const std = @import("../std.zig");
const math = std.math;
const init_jk = [_]i32{ 3, 4, 4, 6 }; // initial value for jk
//
// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
//
// integer array, contains the (24*i)-th to (24*i+23)-th
// bit of 2/pi after binary point. The corresponding
// floating value is
//
// ipio2[i] * 2^(-24(i+1)).
//
// NB: This table must have at least (e0-3)/24 + jk terms.
// For quad precision (e0 <= 16360, jk = 6), this is 686.
///
const ipio2 = [_]i32{
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
//#if LDBL_MAX_EXP > 1024
0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901,
0x8071E0,
//#endif
};
const PIo2 = [_]f64{
1.57079625129699707031e+00, // 0x3FF921FB, 0x40000000
7.54978941586159635335e-08, // 0x3E74442D, 0x00000000
5.39030252995776476554e-15, // 0x3CF84698, 0x80000000
3.28200341580791294123e-22, // 0x3B78CC51, 0x60000000
1.27065575308067607349e-29, // 0x39F01B83, 0x80000000
1.22933308981111328932e-36, // 0x387A2520, 0x40000000
2.73370053816464559624e-44, // 0x36E38222, 0x80000000
2.16741683877804819444e-51, // 0x3569F31D, 0x00000000
};
fn U(x: anytype) usize {
return @intCast(usize, x);
}
// Returns the last three digits of N with y = x - N*pi/2 so that |y| < pi/2.
//
// The method is to compute the integer (mod 8) and fraction parts of
// (2/pi)*x without doing the full multiplication. In general we
// skip the part of the product that are known to be a huge integer (
// more accurately, = 0 mod 8 ). Thus the number of operations are
// independent of the exponent of the input.
//
// (2/pi) is represented by an array of 24-bit integers in ipio2[].
//
// Input parameters:
// x[] The input value (must be positive) is broken into nx
// pieces of 24-bit integers in double precision format.
// x[i] will be the i-th 24 bit of x. The scaled exponent
// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
// match x's up to 24 bits.
//
// Example of breaking a double positive z into x[0]+x[1]+x[2]:
// e0 = ilogb(z)-23
// z = scalbn(z,-e0)
// for i = 0,1,2
// x[i] = floor(z)
// z = (z-x[i])*2**24
//
//
// y[] ouput result in an array of double precision numbers.
// The dimension of y[] is:
// 24-bit precision 1
// 53-bit precision 2
// 64-bit precision 2
// 113-bit precision 3
// The actual value is the sum of them. Thus for 113-bit
// precison, one may have to do something like:
//
// long double t,w,r_head, r_tail;
// t = (long double)y[2] + (long double)y[1];
// w = (long double)y[0];
// r_head = t+w;
// r_tail = w - (r_head - t);
//
// e0 The exponent of x[0]. Must be <= 16360 or you need to
// expand the ipio2 table.
//
// nx dimension of x[]
//
// prec an integer indicating the precision:
// 0 24 bits (single)
// 1 53 bits (double)
// 2 64 bits (extended)
// 3 113 bits (quad)
//
// Here is the description of some local variables:
//
// jk jk+1 is the initial number of terms of ipio2[] needed
// in the computation. The minimum and recommended value
// for jk is 3,4,4,6 for single, double, extended, and quad.
// jk+1 must be 2 larger than you might expect so that our
// recomputation test works. (Up to 24 bits in the integer
// part (the 24 bits of it that we compute) and 23 bits in
// the fraction part may be lost to cancelation before we
// recompute.)
//
// jz local integer variable indicating the number of
// terms of ipio2[] used.
//
// jx nx - 1
//
// jv index for pointing to the suitable ipio2[] for the
// computation. In general, we want
// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
// is an integer. Thus
// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
// Hence jv = max(0,(e0-3)/24).
//
// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
//
// q[] double array with integral value, representing the
// 24-bits chunk of the product of x and 2/pi.
//
// q0 the corresponding exponent of q[0]. Note that the
// exponent for q[i] would be q0-24*i.
//
// PIo2[] double precision array, obtained by cutting pi/2
// into 24 bits chunks.
//
// f[] ipio2[] in floating point
//
// iq[] integer array by breaking up q[] in 24-bits chunk.
//
// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
//
// ih integer. If >0 it indicates q[] is >= 0.5, hence
// it also indicates the *sign* of the result.
//
///
//
// Constants:
// The hexadecimal values are the intended ones for the following
// constants. The decimal values may be used, provided that the
// compiler will convert from decimal to binary accurately enough
// to produce the hexadecimal values shown.
///
pub fn __rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
var jz: i32 = undefined;
var jx: i32 = undefined;
var jv: i32 = undefined;
var jp: i32 = undefined;
var jk: i32 = undefined;
var carry: i32 = undefined;
var n: i32 = undefined;
var iq: [20]i32 = undefined;
var i: i32 = undefined;
var j: i32 = undefined;
var k: i32 = undefined;
var m: i32 = undefined;
var q0: i32 = undefined;
var ih: i32 = undefined;
var z: f64 = undefined;
var fw: f64 = undefined;
var f: [20]f64 = undefined;
var fq: [20]f64 = undefined;
var q: [20]f64 = undefined;
// initialize jk
jk = init_jk[prec];
jp = jk;
// determine jx,jv,q0, note that 3>q0
jx = nx - 1;
jv = @divFloor(e0 - 3, 24);
if (jv < 0) jv = 0;
q0 = e0 - 24 * (jv + 1);
// set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk]
j = jv - jx;
m = jx + jk;
i = 0;
while (i <= m) : ({
i += 1;
j += 1;
}) {
f[U(i)] = if (j < 0) 0.0 else @intToFloat(f64, ipio2[U(j)]);
}
// compute q[0],q[1],...q[jk]
i = 0;
while (i <= jk) : (i += 1) {
j = 0;
fw = 0;
while (j <= jx) : (j += 1) {
fw += x[U(j)] * f[U(jx + i - j)];
}
q[U(i)] = fw;
}
jz = jk;
// This is to handle a non-trivial goto translation from C.
// An unconditional return statement is found at the end of this loop.
recompute: while (true) {
// distill q[] into iq[] reversingly
i = 0;
j = jz;
z = q[U(jz)];
while (j > 0) : ({
i += 1;
j -= 1;
}) {
fw = @intToFloat(f64, @floatToInt(i32, 0x1p-24 * z));
iq[U(i)] = @floatToInt(i32, z - 0x1p24 * fw);
z = q[U(j - 1)] + fw;
}
// compute n
z = math.scalbn(z, q0); // actual value of z
z -= 8.0 * math.floor(z * 0.125); // trim off integer >= 8
n = @floatToInt(i32, z);
z -= @intToFloat(f64, n);
ih = 0;
if (q0 > 0) { // need iq[jz-1] to determine n
i = iq[U(jz - 1)] >> @intCast(u5, 24 - q0);
n += i;
iq[U(jz - 1)] -= i << @intCast(u5, 24 - q0);
ih = iq[U(jz - 1)] >> @intCast(u5, 23 - q0);
} else if (q0 == 0) {
ih = iq[U(jz - 1)] >> 23;
} else if (z >= 0.5) {
ih = 2;
}
if (ih > 0) { // q > 0.5
n += 1;
carry = 0;
i = 0;
while (i < jz) : (i += 1) { // compute 1-q
j = iq[U(i)];
if (carry == 0) {
if (j != 0) {
carry = 1;
iq[U(i)] = 0x1000000 - j;
}
} else {
iq[U(i)] = 0xffffff - j;
}
}
if (q0 > 0) { // rare case: chance is 1 in 12
switch (q0) {
1 => iq[U(jz - 1)] &= 0x7fffff,
2 => iq[U(jz - 1)] &= 0x3fffff,
else => unreachable,
}
}
if (ih == 2) {
z = 1.0 - z;
if (carry != 0) {
z -= math.scalbn(@as(f64, 1.0), q0);
}
}
}
// check if recomputation is needed
if (z == 0.0) {
j = 0;
i = jz - 1;
while (i >= jk) : (i -= 1) {
j |= iq[U(i)];
}
if (j == 0) { // need recomputation
k = 1;
while (iq[U(jk - k)] == 0) : (k += 1) {
// k = no. of terms needed
}
i = jz + 1;
while (i <= jz + k) : (i += 1) { // add q[jz+1] to q[jz+k]
f[U(jx + i)] = @intToFloat(f64, ipio2[U(jv + i)]);
j = 0;
fw = 0;
while (j <= jx) : (j += 1) {
fw += x[U(j)] * f[U(jx + i - j)];
}
q[U(i)] = fw;
}
jz += k;
continue :recompute; // mimic goto recompute
}
}
// chop off zero terms
if (z == 0.0) {
jz -= 1;
q0 -= 24;
while (iq[U(jz)] == 0) {
jz -= 1;
q0 -= 24;
}
} else { // break z into 24-bit if necessary
z = math.scalbn(z, -q0);
if (z >= 0x1p24) {
fw = @intToFloat(f64, @floatToInt(i32, 0x1p-24 * z));
iq[U(jz)] = @floatToInt(i32, z - 0x1p24 * fw);
jz += 1;
q0 += 24;
iq[U(jz)] = @floatToInt(i32, fw);
} else {
iq[U(jz)] = @floatToInt(i32, z);
}
}
// convert integer "bit" chunk to floating-point value
fw = math.scalbn(@as(f64, 1.0), q0);
i = jz;
while (i >= 0) : (i -= 1) {
q[U(i)] = fw * @intToFloat(f64, iq[U(i)]);
fw *= 0x1p-24;
}
// compute PIo2[0,...,jp]*q[jz,...,0]
i = jz;
while (i >= 0) : (i -= 1) {
fw = 0;
k = 0;
while (k <= jp and k <= jz - i) : (k += 1) {
fw += PIo2[U(k)] * q[U(i + k)];
}
fq[U(jz - i)] = fw;
}
// compress fq[] into y[]
switch (prec) {
0 => {
fw = 0.0;
i = jz;
while (i >= 0) : (i -= 1) {
fw += fq[U(i)];
}
y[0] = if (ih == 0) fw else -fw;
},
1, 2 => {
fw = 0.0;
i = jz;
while (i >= 0) : (i -= 1) {
fw += fq[U(i)];
}
// TODO: drop excess precision here once double_t is used
fw = fw;
y[0] = if (ih == 0) fw else -fw;
fw = fq[0] - fw;
i = 1;
while (i <= jz) : (i += 1) {
fw += fq[U(i)];
}
y[1] = if (ih == 0) fw else -fw;
},
3 => { // painful
i = jz;
while (i > 0) : (i -= 1) {
fw = fq[U(i - 1)] + fq[U(i)];
fq[U(i)] += fq[U(i - 1)] - fw;
fq[U(i - 1)] = fw;
}
i = jz;
while (i > 1) : (i -= 1) {
fw = fq[U(i - 1)] + fq[U(i)];
fq[U(i)] += fq[U(i - 1)] - fw;
fq[U(i - 1)] = fw;
}
fw = 0;
i = jz;
while (i >= 2) : (i -= 1) {
fw += fq[U(i)];
}
if (ih == 0) {
y[0] = fq[0];
y[1] = fq[1];
y[2] = fw;
} else {
y[0] = -fq[0];
y[1] = -fq[1];
y[2] = -fw;
}
},
else => unreachable,
}
return n & 7;
}
}

View File

@ -0,0 +1,70 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2f.c
const std = @import("../std.zig");
const __rem_pio2_large = @import("__rem_pio2_large.zig").__rem_pio2_large;
const math = std.math;
const toint = 1.5 / math.epsilon(f64);
// pi/4
const pio4 = 0x1.921fb6p-1;
// invpio2: 53 bits of 2/pi
const invpio2 = 6.36619772367581382433e-01; // 0x3FE45F30, 0x6DC9C883
// pio2_1: first 25 bits of pi/2
const pio2_1 = 1.57079631090164184570e+00; // 0x3FF921FB, 0x50000000
// pio2_1t: pi/2 - pio2_1
const pio2_1t = 1.58932547735281966916e-08; // 0x3E5110b4, 0x611A6263
// Returns the remainder of x rem pi/2 in *y
// use double precision for everything except passing x
// use __rem_pio2_large() for large x
pub fn __rem_pio2f(x: f32, y: *f64) i32 {
var tx: [1]f64 = undefined;
var ty: [1]f64 = undefined;
var @"fn": f64 = undefined;
var ix: u32 = undefined;
var n: i32 = undefined;
var sign: bool = undefined;
var e0: u32 = undefined;
var ui: u32 = undefined;
ui = @bitCast(u32, x);
ix = ui & 0x7fffffff;
// 25+53 bit pi is good enough for medium size
if (ix < 0x4dc90fdb) { // |x| ~< 2^28*(pi/2), medium size
// Use a specialized rint() to get fn.
@"fn" = @floatCast(f64, x) * invpio2 + toint - toint;
n = @floatToInt(i32, @"fn");
y.* = x - @"fn" * pio2_1 - @"fn" * pio2_1t;
// Matters with directed rounding.
if (y.* < -pio4) {
n -= 1;
@"fn" -= 1;
y.* = x - @"fn" * pio2_1 - @"fn" * pio2_1t;
} else if (y.* > pio4) {
n += 1;
@"fn" += 1;
y.* = x - @"fn" * pio2_1 - @"fn" * pio2_1t;
}
return n;
}
if (ix >= 0x7f800000) { // x is inf or NaN
y.* = x - x;
return 0;
}
// scale x into [2^23, 2^24-1]
sign = ui >> 31 != 0;
e0 = (ix >> 23) - (0x7f + 23); // e0 = ilogb(|x|)-23, positive
ui = ix - (e0 << 23);
tx[0] = @bitCast(f32, ui);
n = __rem_pio2_large(&tx, &ty, @intCast(i32, e0), 1, 0);
if (sign) {
y.* = -ty[0];
return -n;
}
y.* = ty[0];
return n;
}

273
lib/std/math/__trig.zig Normal file
View File

@ -0,0 +1,273 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__cos.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__cosdf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__sin.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__sindf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tand.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tandf.c
// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
//
// Algorithm
// 1. Since cos(-x) = cos(x), we need only to consider positive x.
// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
// 3. cos(x) is approximated by a polynomial of degree 14 on
// [0,pi/4]
// 4 14
// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
// where the remez error is
//
// | 2 4 6 8 10 12 14 | -58
// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
// | |
//
// 4 6 8 10 12 14
// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
// cos(x) ~ 1 - x*x/2 + r
// since cos(x+y) ~ cos(x) - sin(x)*y
// ~ cos(x) - x*y,
// a correction term is necessary in cos(x) and hence
// cos(x+y) = 1 - (x*x/2 - (r - x*y))
// For better accuracy, rearrange to
// cos(x+y) ~ w + (tmp + (r-x*y))
// where w = 1 - x*x/2 and tmp is a tiny correction term
// (1 - x*x/2 == w + tmp exactly in infinite precision).
// The exactness of w + tmp in infinite precision depends on w
// and tmp having the same precision as x. If they have extra
// precision due to compiler bugs, then the extra precision is
// only good provided it is retained in all terms of the final
// expression for cos(). Retention happens in all cases tested
// under FreeBSD, so don't pessimize things by forcibly clipping
// any extra precision in w.
pub fn __cos(x: f64, y: f64) f64 {
const C1 = 4.16666666666666019037e-02; // 0x3FA55555, 0x5555554C
const C2 = -1.38888888888741095749e-03; // 0xBF56C16C, 0x16C15177
const C3 = 2.48015872894767294178e-05; // 0x3EFA01A0, 0x19CB1590
const C4 = -2.75573143513906633035e-07; // 0xBE927E4F, 0x809C52AD
const C5 = 2.08757232129817482790e-09; // 0x3E21EE9E, 0xBDB4B1C4
const C6 = -1.13596475577881948265e-11; // 0xBDA8FAE9, 0xBE8838D4
const z = x * x;
const zs = z * z;
const r = z * (C1 + z * (C2 + z * C3)) + zs * zs * (C4 + z * (C5 + z * C6));
const hz = 0.5 * z;
const w = 1.0 - hz;
return w + (((1.0 - w) - hz) + (z * r - x * y));
}
pub fn __cosdf(x: f64) f32 {
// |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]).
const C0 = -0x1ffffffd0c5e81.0p-54; // -0.499999997251031003120
const C1 = 0x155553e1053a42.0p-57; // 0.0416666233237390631894
const C2 = -0x16c087e80f1e27.0p-62; // -0.00138867637746099294692
const C3 = 0x199342e0ee5069.0p-68; // 0.0000243904487962774090654
// Try to optimize for parallel evaluation as in __tandf.c.
const z = x * x;
const w = z * z;
const r = C2 + z * C3;
return @floatCast(f32, ((1.0 + z * C0) + w * C1) + (w * z) * r);
}
// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
// Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
//
// Algorithm
// 1. Since sin(-x) = -sin(x), we need only to consider positive x.
// 2. Callers must return sin(-0) = -0 without calling here since our
// odd polynomial is not evaluated in a way that preserves -0.
// Callers may do the optimization sin(x) ~ x for tiny x.
// 3. sin(x) is approximated by a polynomial of degree 13 on
// [0,pi/4]
// 3 13
// sin(x) ~ x + S1*x + ... + S6*x
// where
//
// |sin(x) 2 4 6 8 10 12 | -58
// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
// | x |
//
// 4. sin(x+y) = sin(x) + sin'(x')*y
// ~ sin(x) + (1-x*x/2)*y
// For better accuracy, let
// 3 2 2 2 2
// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
// then 3 2
// sin(x) = x + (S1*x + (x *(r-y/2)+y))
pub fn __sin(x: f64, y: f64, iy: i32) f64 {
const S1 = -1.66666666666666324348e-01; // 0xBFC55555, 0x55555549
const S2 = 8.33333333332248946124e-03; // 0x3F811111, 0x1110F8A6
const S3 = -1.98412698298579493134e-04; // 0xBF2A01A0, 0x19C161D5
const S4 = 2.75573137070700676789e-06; // 0x3EC71DE3, 0x57B1FE7D
const S5 = -2.50507602534068634195e-08; // 0xBE5AE5E6, 0x8A2B9CEB
const S6 = 1.58969099521155010221e-10; // 0x3DE5D93A, 0x5ACFD57C
const z = x * x;
const w = z * z;
const r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
const v = z * x;
if (iy == 0) {
return x + v * (S1 + z * r);
} else {
return x - ((z * (0.5 * y - v * r) - y) - v * S1);
}
}
pub fn __sindf(x: f64) f32 {
// |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]).
const S1 = -0x15555554cbac77.0p-55; // -0.166666666416265235595
const S2 = 0x111110896efbb2.0p-59; // 0.0083333293858894631756
const S3 = -0x1a00f9e2cae774.0p-65; // -0.000198393348360966317347
const S4 = 0x16cd878c3b46a7.0p-71; // 0.0000027183114939898219064
// Try to optimize for parallel evaluation as in __tandf.c.
const z = x * x;
const w = z * z;
const r = S3 + z * S4;
const s = z * x;
return @floatCast(f32, (x + s * (S1 + z * S2)) + s * w * r);
}
// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
//
// Algorithm
// 1. Since tan(-x) = -tan(x), we need only to consider positive x.
// 2. Callers must return tan(-0) = -0 without calling here since our
// odd polynomial is not evaluated in a way that preserves -0.
// Callers may do the optimization tan(x) ~ x for tiny x.
// 3. tan(x) is approximated by a odd polynomial of degree 27 on
// [0,0.67434]
// 3 27
// tan(x) ~ x + T1*x + ... + T13*x
// where
//
// |tan(x) 2 4 26 | -59.2
// |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
// | x |
//
// Note: tan(x+y) = tan(x) + tan'(x)*y
// ~ tan(x) + (1+x*x)*y
// Therefore, for better accuracy in computing tan(x+y), let
// 3 2 2 2 2
// r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
// then
// 3 2
// tan(x+y) = x + (T1*x + (x *(r+y)+y))
//
// 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
// tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
// = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
pub fn __tan(x_: f64, y_: f64, odd: bool) f64 {
var x = x_;
var y = y_;
const T = [_]f64{
3.33333333333334091986e-01, // 3FD55555, 55555563
1.33333333333201242699e-01, // 3FC11111, 1110FE7A
5.39682539762260521377e-02, // 3FABA1BA, 1BB341FE
2.18694882948595424599e-02, // 3F9664F4, 8406D637
8.86323982359930005737e-03, // 3F8226E3, E96E8493
3.59207910759131235356e-03, // 3F6D6D22, C9560328
1.45620945432529025516e-03, // 3F57DBC8, FEE08315
5.88041240820264096874e-04, // 3F4344D8, F2F26501
2.46463134818469906812e-04, // 3F3026F7, 1A8D1068
7.81794442939557092300e-05, // 3F147E88, A03792A6
7.14072491382608190305e-05, // 3F12B80F, 32F0A7E9
-1.85586374855275456654e-05, // BEF375CB, DB605373
2.59073051863633712884e-05, // 3EFB2A70, 74BF7AD4
};
const pio4 = 7.85398163397448278999e-01; // 3FE921FB, 54442D18
const pio4lo = 3.06161699786838301793e-17; // 3C81A626, 33145C07
var z: f64 = undefined;
var r: f64 = undefined;
var v: f64 = undefined;
var w: f64 = undefined;
var s: f64 = undefined;
var a: f64 = undefined;
var w0: f64 = undefined;
var a0: f64 = undefined;
var hx: u32 = undefined;
var sign: bool = undefined;
hx = @intCast(u32, @bitCast(u64, x) >> 32);
const big = (hx & 0x7fffffff) >= 0x3FE59428; // |x| >= 0.6744
if (big) {
sign = hx >> 31 != 0;
if (sign) {
x = -x;
y = -y;
}
x = (pio4 - x) + (pio4lo - y);
y = 0.0;
}
z = x * x;
w = z * z;
// Break x^5*(T[1]+x^2*T[2]+...) into
// x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
// x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
s = z * x;
r = y + z * (s * (r + v) + y) + s * T[0];
w = x + r;
if (big) {
s = 1 - 2 * @intToFloat(f64, @boolToInt(odd));
v = s - 2.0 * (x + (r - w * w / (w + s)));
return if (sign) -v else v;
}
if (!odd) {
return w;
}
// -1.0/(x+r) has up to 2ulp error, so compute it accurately
w0 = w;
w0 = @bitCast(f64, @bitCast(u64, w0) & 0xffffffff00000000);
v = r - (w0 - x); // w0+v = r+x
a = -1.0 / w;
a0 = a;
a0 = @bitCast(f64, @bitCast(u64, a0) & 0xffffffff00000000);
return a0 + a * (1.0 + a0 * w0 + a0 * v);
}
pub fn __tandf(x: f64, odd: bool) f32 {
// |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]).
const T = [_]f64{
0x15554d3418c99f.0p-54, // 0.333331395030791399758
0x1112fd38999f72.0p-55, // 0.133392002712976742718
0x1b54c91d865afe.0p-57, // 0.0533812378445670393523
0x191df3908c33ce.0p-58, // 0.0245283181166547278873
0x185dadfcecf44e.0p-61, // 0.00297435743359967304927
0x1362b9bf971bcd.0p-59, // 0.00946564784943673166728
};
const z = x * x;
// Split up the polynomial into small independent terms to give
// opportunities for parallel evaluation. The chosen splitting is
// micro-optimized for Athlons (XP, X64). It costs 2 multiplications
// relative to Horner's method on sequential machines.
//
// We add the small terms from lowest degree up for efficiency on
// non-sequential machines (the lowest degree terms tend to be ready
// earlier). Apart from this, we don't care about order of
// operations, and don't need to to care since we have precision to
// spare. However, the chosen splitting is good for accuracy too,
// and would give results as accurate as Horner's method if the
// small terms were added from highest degree down.
const r = T[4] + z * T[5];
const t = T[2] + z * T[3];
const w = z * z;
const s = z * x;
const u = T[0] + z * T[1];
const r0 = (x + s * u) + (s * w) * (t + w * r);
return @floatCast(f32, if (odd) -1.0 / r0 else r0);
}

View File

@ -1,12 +1,17 @@
// Ported from go, which is licensed under a BSD-3 license.
// https://golang.org/LICENSE
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://golang.org/src/math/sin.go
// https://git.musl-libc.org/cgit/musl/tree/src/math/cosf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/cos.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const kernel = @import("__trig.zig");
const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
/// Returns the cosine of the radian value x.
///
/// Special Cases:
@ -15,109 +20,135 @@ const expect = std.testing.expect;
pub fn cos(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => cos_(f32, x),
f64 => cos_(f64, x),
f32 => cos32(x),
f64 => cos64(x),
else => @compileError("cos not implemented for " ++ @typeName(T)),
};
}
// sin polynomial coefficients
const S0 = 1.58962301576546568060E-10;
const S1 = -2.50507477628578072866E-8;
const S2 = 2.75573136213857245213E-6;
const S3 = -1.98412698295895385996E-4;
const S4 = 8.33333333332211858878E-3;
const S5 = -1.66666666666666307295E-1;
fn cos32(x: f32) f32 {
// Small multiples of pi/2 rounded to double precision.
const c1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
const c2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
const c3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
const c4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
// cos polynomial coeffiecients
const C0 = -1.13585365213876817300E-11;
const C1 = 2.08757008419747316778E-9;
const C2 = -2.75573141792967388112E-7;
const C3 = 2.48015872888517045348E-5;
const C4 = -1.38888888888730564116E-3;
const C5 = 4.16666666666665929218E-2;
var ix = @bitCast(u32, x);
const sign = ix >> 31 != 0;
ix &= 0x7fffffff;
const pi4a = 7.85398125648498535156e-1;
const pi4b = 3.77489470793079817668E-8;
const pi4c = 2.69515142907905952645E-15;
const m4pi = 1.273239544735162542821171882678754627704620361328125;
fn cos_(comptime T: type, x_: T) T {
const I = std.meta.Int(.signed, @typeInfo(T).Float.bits);
var x = x_;
if (math.isNan(x) or math.isInf(x)) {
return math.nan(T);
if (ix <= 0x3f490fda) { // |x| ~<= pi/4
if (ix < 0x39800000) { // |x| < 2**-12
// raise inexact if x != 0
math.doNotOptimizeAway(x + 0x1p120);
return 1.0;
}
return kernel.__cosdf(x);
}
if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
if (ix > 0x4016cbe3) { // |x| ~> 3*pi/4
return -kernel.__cosdf(if (sign) x + c2pio2 else x - c2pio2);
} else {
if (sign) {
return kernel.__sindf(x + c1pio2);
} else {
return kernel.__sindf(c1pio2 - x);
}
}
}
if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
if (ix > 0x40afeddf) { // |x| ~> 7*pi/4
return kernel.__cosdf(if (sign) x + c4pio2 else x - c4pio2);
} else {
if (sign) {
return kernel.__sindf(-x - c3pio2);
} else {
return kernel.__sindf(x - c3pio2);
}
}
}
var sign = false;
x = math.fabs(x);
var y = math.floor(x * m4pi);
var j = @floatToInt(I, y);
if (j & 1 == 1) {
j += 1;
y += 1;
// cos(Inf or NaN) is NaN
if (ix >= 0x7f800000) {
return x - x;
}
j &= 7;
if (j > 3) {
j -= 4;
sign = !sign;
}
if (j > 1) {
sign = !sign;
var y: f64 = undefined;
const n = __rem_pio2f(x, &y);
return switch (n & 3) {
0 => kernel.__cosdf(y),
1 => kernel.__sindf(-y),
2 => -kernel.__cosdf(y),
else => kernel.__sindf(y),
};
}
fn cos64(x: f64) f64 {
var ix = @bitCast(u64, x) >> 32;
ix &= 0x7fffffff;
// |x| ~< pi/4
if (ix <= 0x3fe921fb) {
if (ix < 0x3e46a09e) { // |x| < 2**-27 * sqrt(2)
// raise inexact if x!=0
math.doNotOptimizeAway(x + 0x1p120);
return 1.0;
}
return kernel.__cos(x, 0);
}
const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
const w = z * z;
// cos(Inf or NaN) is NaN
if (ix >= 0x7ff00000) {
return x - x;
}
const r = if (j == 1 or j == 2)
z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))))
else
1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))));
return if (sign) -r else r;
var y: [2]f64 = undefined;
const n = __rem_pio2(x, &y);
return switch (n & 3) {
0 => kernel.__cos(y[0], y[1]),
1 => -kernel.__sin(y[0], y[1], 1),
2 => -kernel.__cos(y[0], y[1]),
else => kernel.__sin(y[0], y[1], 1),
};
}
test "math.cos" {
try expect(cos(@as(f32, 0.0)) == cos_(f32, 0.0));
try expect(cos(@as(f64, 0.0)) == cos_(f64, 0.0));
try expect(cos(@as(f32, 0.0)) == cos32(0.0));
try expect(cos(@as(f64, 0.0)) == cos64(0.0));
}
test "math.cos32" {
const epsilon = 0.000001;
const epsilon = 0.00001;
try expect(math.approxEqAbs(f32, cos_(f32, 0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f32, cos_(f32, 0.2), 0.980067, epsilon));
try expect(math.approxEqAbs(f32, cos_(f32, 0.8923), 0.627623, epsilon));
try expect(math.approxEqAbs(f32, cos_(f32, 1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f32, cos_(f32, -1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f32, cos_(f32, 37.45), 0.969132, epsilon));
try expect(math.approxEqAbs(f32, cos_(f32, 89.123), 0.400798, epsilon));
try expect(math.approxEqAbs(f32, cos32(0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f32, cos32(0.2), 0.980067, epsilon));
try expect(math.approxEqAbs(f32, cos32(0.8923), 0.627623, epsilon));
try expect(math.approxEqAbs(f32, cos32(1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f32, cos32(-1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f32, cos32(37.45), 0.969132, epsilon));
try expect(math.approxEqAbs(f32, cos32(89.123), 0.400798, epsilon));
}
test "math.cos64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, cos_(f64, 0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f64, cos_(f64, 0.2), 0.980067, epsilon));
try expect(math.approxEqAbs(f64, cos_(f64, 0.8923), 0.627623, epsilon));
try expect(math.approxEqAbs(f64, cos_(f64, 1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f64, cos_(f64, -1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f64, cos_(f64, 37.45), 0.969132, epsilon));
try expect(math.approxEqAbs(f64, cos_(f64, 89.123), 0.40080, epsilon));
try expect(math.approxEqAbs(f64, cos64(0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f64, cos64(0.2), 0.980067, epsilon));
try expect(math.approxEqAbs(f64, cos64(0.8923), 0.627623, epsilon));
try expect(math.approxEqAbs(f64, cos64(1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f64, cos64(-1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f64, cos64(37.45), 0.969132, epsilon));
try expect(math.approxEqAbs(f64, cos64(89.123), 0.40080, epsilon));
}
test "math.cos32.special" {
try expect(math.isNan(cos_(f32, math.inf(f32))));
try expect(math.isNan(cos_(f32, -math.inf(f32))));
try expect(math.isNan(cos_(f32, math.nan(f32))));
try expect(math.isNan(cos32(math.inf(f32))));
try expect(math.isNan(cos32(-math.inf(f32))));
try expect(math.isNan(cos32(math.nan(f32))));
}
test "math.cos64.special" {
try expect(math.isNan(cos_(f64, math.inf(f64))));
try expect(math.isNan(cos_(f64, -math.inf(f64))));
try expect(math.isNan(cos_(f64, math.nan(f64))));
try expect(math.isNan(cos64(math.inf(f64))));
try expect(math.isNan(cos64(-math.inf(f64))));
try expect(math.isNan(cos64(math.nan(f64))));
}

View File

@ -1,12 +1,17 @@
// Ported from go, which is licensed under a BSD-3 license.
// https://golang.org/LICENSE
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/sinf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/sin.c
//
// https://golang.org/src/math/sin.go
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const kernel = @import("__trig.zig");
const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
/// Returns the sine of the radian value x.
///
/// Special Cases:
@ -16,114 +21,148 @@ const expect = std.testing.expect;
pub fn sin(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => sin_(T, x),
f64 => sin_(T, x),
f32 => sin32(x),
f64 => sin64(x),
else => @compileError("sin not implemented for " ++ @typeName(T)),
};
}
// sin polynomial coefficients
const S0 = 1.58962301576546568060E-10;
const S1 = -2.50507477628578072866E-8;
const S2 = 2.75573136213857245213E-6;
const S3 = -1.98412698295895385996E-4;
const S4 = 8.33333333332211858878E-3;
const S5 = -1.66666666666666307295E-1;
fn sin32(x: f32) f32 {
// Small multiples of pi/2 rounded to double precision.
const s1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
const s2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
const s3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
const s4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
// cos polynomial coeffiecients
const C0 = -1.13585365213876817300E-11;
const C1 = 2.08757008419747316778E-9;
const C2 = -2.75573141792967388112E-7;
const C3 = 2.48015872888517045348E-5;
const C4 = -1.38888888888730564116E-3;
const C5 = 4.16666666666665929218E-2;
var ix = @bitCast(u32, x);
const sign = ix >> 31 != 0;
ix &= 0x7fffffff;
const pi4a = 7.85398125648498535156e-1;
const pi4b = 3.77489470793079817668E-8;
const pi4c = 2.69515142907905952645E-15;
const m4pi = 1.273239544735162542821171882678754627704620361328125;
fn sin_(comptime T: type, x_: T) T {
const I = std.meta.Int(.signed, @typeInfo(T).Float.bits);
var x = x_;
if (x == 0 or math.isNan(x)) {
return x;
if (ix <= 0x3f490fda) { // |x| ~<= pi/4
if (ix < 0x39800000) { // |x| < 2**-12
// raise inexact if x!=0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__sindf(x);
}
if (math.isInf(x)) {
return math.nan(T);
if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4
if (sign) {
return -kernel.__cosdf(x + s1pio2);
} else {
return kernel.__cosdf(x - s1pio2);
}
}
return kernel.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2));
}
if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4
if (sign) {
return kernel.__cosdf(x + s3pio2);
} else {
return -kernel.__cosdf(x - s3pio2);
}
}
return kernel.__sindf(if (sign) x + s4pio2 else x - s4pio2);
}
var sign = x < 0;
x = math.fabs(x);
var y = math.floor(x * m4pi);
var j = @floatToInt(I, y);
if (j & 1 == 1) {
j += 1;
y += 1;
// sin(Inf or NaN) is NaN
if (ix >= 0x7f800000) {
return x - x;
}
j &= 7;
if (j > 3) {
j -= 4;
sign = !sign;
var y: f64 = undefined;
const n = __rem_pio2f(x, &y);
return switch (n & 3) {
0 => kernel.__sindf(y),
1 => kernel.__cosdf(y),
2 => kernel.__sindf(-y),
else => -kernel.__cosdf(y),
};
}
fn sin64(x: f64) f64 {
var ix = @bitCast(u64, x) >> 32;
ix &= 0x7fffffff;
// |x| ~< pi/4
if (ix <= 0x3fe921fb) {
if (ix < 0x3e500000) { // |x| < 2**-26
// raise inexact if x != 0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__sin(x, 0.0, 0);
}
const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
const w = z * z;
// sin(Inf or NaN) is NaN
if (ix >= 0x7ff00000) {
return x - x;
}
const r = if (j == 1 or j == 2)
1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))))
else
z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))));
return if (sign) -r else r;
var y: [2]f64 = undefined;
const n = __rem_pio2(x, &y);
return switch (n & 3) {
0 => kernel.__sin(y[0], y[1], 1),
1 => kernel.__cos(y[0], y[1]),
2 => -kernel.__sin(y[0], y[1], 1),
else => -kernel.__cos(y[0], y[1]),
};
}
test "math.sin" {
try expect(sin(@as(f32, 0.0)) == sin_(f32, 0.0));
try expect(sin(@as(f64, 0.0)) == sin_(f64, 0.0));
try expect(sin(@as(f32, 0.0)) == sin32(0.0));
try expect(sin(@as(f64, 0.0)) == sin64(0.0));
try expect(comptime (math.sin(@as(f64, 2))) == math.sin(@as(f64, 2)));
}
test "math.sin32" {
const epsilon = 0.000001;
const epsilon = 0.00001;
try expect(math.approxEqAbs(f32, sin_(f32, 0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f32, sin_(f32, 0.2), 0.198669, epsilon));
try expect(math.approxEqAbs(f32, sin_(f32, 0.8923), 0.778517, epsilon));
try expect(math.approxEqAbs(f32, sin_(f32, 1.5), 0.997495, epsilon));
try expect(math.approxEqAbs(f32, sin_(f32, -1.5), -0.997495, epsilon));
try expect(math.approxEqAbs(f32, sin_(f32, 37.45), -0.246544, epsilon));
try expect(math.approxEqAbs(f32, sin_(f32, 89.123), 0.916166, epsilon));
try expect(math.approxEqAbs(f32, sin32(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f32, sin32(0.2), 0.198669, epsilon));
try expect(math.approxEqAbs(f32, sin32(0.8923), 0.778517, epsilon));
try expect(math.approxEqAbs(f32, sin32(1.5), 0.997495, epsilon));
try expect(math.approxEqAbs(f32, sin32(-1.5), -0.997495, epsilon));
try expect(math.approxEqAbs(f32, sin32(37.45), -0.246544, epsilon));
try expect(math.approxEqAbs(f32, sin32(89.123), 0.916166, epsilon));
}
test "math.sin64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, sin_(f64, 0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f64, sin_(f64, 0.2), 0.198669, epsilon));
try expect(math.approxEqAbs(f64, sin_(f64, 0.8923), 0.778517, epsilon));
try expect(math.approxEqAbs(f64, sin_(f64, 1.5), 0.997495, epsilon));
try expect(math.approxEqAbs(f64, sin_(f64, -1.5), -0.997495, epsilon));
try expect(math.approxEqAbs(f64, sin_(f64, 37.45), -0.246543, epsilon));
try expect(math.approxEqAbs(f64, sin_(f64, 89.123), 0.916166, epsilon));
try expect(math.approxEqAbs(f64, sin64(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f64, sin64(0.2), 0.198669, epsilon));
try expect(math.approxEqAbs(f64, sin64(0.8923), 0.778517, epsilon));
try expect(math.approxEqAbs(f64, sin64(1.5), 0.997495, epsilon));
try expect(math.approxEqAbs(f64, sin64(-1.5), -0.997495, epsilon));
try expect(math.approxEqAbs(f64, sin64(37.45), -0.246543, epsilon));
try expect(math.approxEqAbs(f64, sin64(89.123), 0.916166, epsilon));
}
test "math.sin32.special" {
try expect(sin_(f32, 0.0) == 0.0);
try expect(sin_(f32, -0.0) == -0.0);
try expect(math.isNan(sin_(f32, math.inf(f32))));
try expect(math.isNan(sin_(f32, -math.inf(f32))));
try expect(math.isNan(sin_(f32, math.nan(f32))));
try expect(sin32(0.0) == 0.0);
try expect(sin32(-0.0) == -0.0);
try expect(math.isNan(sin32(math.inf(f32))));
try expect(math.isNan(sin32(-math.inf(f32))));
try expect(math.isNan(sin32(math.nan(f32))));
}
test "math.sin64.special" {
try expect(sin_(f64, 0.0) == 0.0);
try expect(sin_(f64, -0.0) == -0.0);
try expect(math.isNan(sin_(f64, math.inf(f64))));
try expect(math.isNan(sin_(f64, -math.inf(f64))));
try expect(math.isNan(sin_(f64, math.nan(f64))));
try expect(sin64(0.0) == 0.0);
try expect(sin64(-0.0) == -0.0);
try expect(math.isNan(sin64(math.inf(f64))));
try expect(math.isNan(sin64(-math.inf(f64))));
try expect(math.isNan(sin64(math.nan(f64))));
}
test "math.sin32 #9901" {
const float = @bitCast(f32, @as(u32, 0b11100011111111110000000000000000));
_ = std.math.sin(float);
}
test "math.sin64 #9901" {
const float = @bitCast(f64, @as(u64, 0b1111111101000001000000001111110111111111100000000000000000000001));
_ = std.math.sin(float);
}

View File

@ -1,12 +1,18 @@
// Ported from go, which is licensed under a BSD-3 license.
// https://golang.org/LICENSE
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/tanf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/tan.c
// https://golang.org/src/math/tan.go
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const kernel = @import("__trig.zig");
const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
/// Returns the tangent of the radian value x.
///
/// Special Cases:
@ -16,102 +22,119 @@ const expect = std.testing.expect;
pub fn tan(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => tan_(f32, x),
f64 => tan_(f64, x),
f32 => tan32(x),
f64 => tan64(x),
else => @compileError("tan not implemented for " ++ @typeName(T)),
};
}
const Tp0 = -1.30936939181383777646E4;
const Tp1 = 1.15351664838587416140E6;
const Tp2 = -1.79565251976484877988E7;
fn tan32(x: f32) f32 {
// Small multiples of pi/2 rounded to double precision.
const t1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
const t2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
const t3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
const t4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
const Tq1 = 1.36812963470692954678E4;
const Tq2 = -1.32089234440210967447E6;
const Tq3 = 2.50083801823357915839E7;
const Tq4 = -5.38695755929454629881E7;
var ix = @bitCast(u32, x);
const sign = ix >> 31 != 0;
ix &= 0x7fffffff;
const pi4a = 7.85398125648498535156e-1;
const pi4b = 3.77489470793079817668E-8;
const pi4c = 2.69515142907905952645E-15;
const m4pi = 1.273239544735162542821171882678754627704620361328125;
fn tan_(comptime T: type, x_: T) T {
const I = std.meta.Int(.signed, @typeInfo(T).Float.bits);
var x = x_;
if (x == 0 or math.isNan(x)) {
return x;
if (ix <= 0x3f490fda) { // |x| ~<= pi/4
if (ix < 0x39800000) { // |x| < 2**-12
// raise inexact if x!=0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__tandf(x, false);
}
if (math.isInf(x)) {
return math.nan(T);
if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4
return kernel.__tandf((if (sign) x + t1pio2 else x - t1pio2), true);
} else {
return kernel.__tandf((if (sign) x + t2pio2 else x - t2pio2), false);
}
}
if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4
return kernel.__tandf((if (sign) x + t3pio2 else x - t3pio2), true);
} else {
return kernel.__tandf((if (sign) x + t4pio2 else x - t4pio2), false);
}
}
var sign = x < 0;
x = math.fabs(x);
var y = math.floor(x * m4pi);
var j = @floatToInt(I, y);
if (j & 1 == 1) {
j += 1;
y += 1;
// tan(Inf or NaN) is NaN
if (ix >= 0x7f800000) {
return x - x;
}
const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
const w = z * z;
var y: f64 = undefined;
const n = __rem_pio2f(x, &y);
return kernel.__tandf(y, n & 1 != 0);
}
var r = if (w > 1e-14)
z + z * (w * ((Tp0 * w + Tp1) * w + Tp2) / ((((w + Tq1) * w + Tq2) * w + Tq3) * w + Tq4))
else
z;
fn tan64(x: f64) f64 {
var ix = @bitCast(u64, x) >> 32;
ix &= 0x7fffffff;
if (j & 2 == 2) {
r = -1 / r;
// |x| ~< pi/4
if (ix <= 0x3fe921fb) {
if (ix < 0x3e400000) { // |x| < 2**-27
// raise inexact if x!=0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__tan(x, 0.0, false);
}
return if (sign) -r else r;
// tan(Inf or NaN) is NaN
if (ix >= 0x7ff00000) {
return x - x;
}
var y: [2]f64 = undefined;
const n = __rem_pio2(x, &y);
return kernel.__tan(y[0], y[1], n & 1 != 0);
}
test "math.tan" {
try expect(tan(@as(f32, 0.0)) == tan_(f32, 0.0));
try expect(tan(@as(f64, 0.0)) == tan_(f64, 0.0));
try expect(tan(@as(f32, 0.0)) == tan32(0.0));
try expect(tan(@as(f64, 0.0)) == tan64(0.0));
}
test "math.tan32" {
const epsilon = 0.000001;
const epsilon = 0.00001;
try expect(math.approxEqAbs(f32, tan_(f32, 0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f32, tan_(f32, 0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f32, tan_(f32, 0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f32, tan_(f32, 1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f32, tan_(f32, 37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f32, tan_(f32, 89.123), 2.285852, epsilon));
try expect(math.approxEqAbs(f32, tan32(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f32, tan32(0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f32, tan32(0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f32, tan32(1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f32, tan32(37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f32, tan32(89.123), 2.285852, epsilon));
}
test "math.tan64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, tan_(f64, 0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f64, tan_(f64, 0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f64, tan_(f64, 0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f64, tan_(f64, 1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f64, tan_(f64, 37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f64, tan_(f64, 89.123), 2.2858376, epsilon));
try expect(math.approxEqAbs(f64, tan64(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f64, tan64(0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f64, tan64(0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f64, tan64(1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f64, tan64(37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f64, tan64(89.123), 2.2858376, epsilon));
}
test "math.tan32.special" {
try expect(tan_(f32, 0.0) == 0.0);
try expect(tan_(f32, -0.0) == -0.0);
try expect(math.isNan(tan_(f32, math.inf(f32))));
try expect(math.isNan(tan_(f32, -math.inf(f32))));
try expect(math.isNan(tan_(f32, math.nan(f32))));
try expect(tan32(0.0) == 0.0);
try expect(tan32(-0.0) == -0.0);
try expect(math.isNan(tan32(math.inf(f32))));
try expect(math.isNan(tan32(-math.inf(f32))));
try expect(math.isNan(tan32(math.nan(f32))));
}
test "math.tan64.special" {
try expect(tan_(f64, 0.0) == 0.0);
try expect(tan_(f64, -0.0) == -0.0);
try expect(math.isNan(tan_(f64, math.inf(f64))));
try expect(math.isNan(tan_(f64, -math.inf(f64))));
try expect(math.isNan(tan_(f64, math.nan(f64))));
try expect(tan64(0.0) == 0.0);
try expect(tan64(-0.0) == -0.0);
try expect(math.isNan(tan64(math.inf(f64))));
try expect(math.isNan(tan64(-math.inf(f64))));
try expect(math.isNan(tan64(math.nan(f64))));
}