compiler-rt: add add/sub for f80

This commit is contained in:
Veikka Tuominen 2022-01-21 21:49:02 +02:00
parent 9bbd3ab257
commit 6a736f0c8c
2 changed files with 175 additions and 0 deletions

View File

@ -237,12 +237,16 @@ comptime {
@export(__adddf3, .{ .name = "__adddf3", .linkage = linkage });
const __addtf3 = @import("compiler_rt/addXf3.zig").__addtf3;
@export(__addtf3, .{ .name = "__addtf3", .linkage = linkage });
const __addxf3 = @import("compiler_rt/addXf3.zig").__addxf3;
@export(__addxf3, .{ .name = "__addxf3", .linkage = linkage });
const __subsf3 = @import("compiler_rt/addXf3.zig").__subsf3;
@export(__subsf3, .{ .name = "__subsf3", .linkage = linkage });
const __subdf3 = @import("compiler_rt/addXf3.zig").__subdf3;
@export(__subdf3, .{ .name = "__subdf3", .linkage = linkage });
const __subtf3 = @import("compiler_rt/addXf3.zig").__subtf3;
@export(__subtf3, .{ .name = "__subtf3", .linkage = linkage });
const __subxf3 = @import("compiler_rt/addXf3.zig").__subxf3;
@export(__subxf3, .{ .name = "__subxf3", .linkage = linkage });
const __mulsf3 = @import("compiler_rt/mulXf3.zig").__mulsf3;
@export(__mulsf3, .{ .name = "__mulsf3", .linkage = linkage });

View File

@ -225,6 +225,177 @@ fn addXf3(comptime T: type, a: T, b: T) T {
return @bitCast(T, result);
}
fn normalize_f80(exp: *i32, significand: *u80) void {
const shift = @clz(u64, @truncate(u64, significand.*));
significand.* = (significand.* << shift);
exp.* += -@as(i8, shift);
}
pub fn __addxf3(a: f80, b: f80) callconv(.C) f80 {
var a_rep align(16) = @ptrCast(*const std.math.F80Repr, &a).*;
var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*;
var a_exp: i32 = a_rep.exp & 0x7FFF;
var b_exp: i32 = b_rep.exp & 0x7FFF;
const significand_bits = std.math.floatMantissaBits(f80);
const int_bit = 0x8000000000000000;
const significand_mask = 0x7FFFFFFFFFFFFFFF;
const qnan_bit = 0xC000000000000000;
const max_exp = 0x7FFF;
const sign_bit = 0x8000;
// Detect if a or b is infinity, or NaN.
if (a_exp == max_exp) {
if (a_rep.fraction ^ int_bit == 0) {
if (b_exp == max_exp and (b_rep.fraction ^ int_bit == 0)) {
// +/-infinity + -/+infinity = qNaN
return std.math.qnan_f80;
}
// +/-infinity + anything = +/- infinity
return a;
} else {
std.debug.assert(a_rep.fraction & significand_mask != 0);
// NaN + anything = qNaN
a_rep.fraction |= qnan_bit;
return @ptrCast(*const f80, &a_rep).*;
}
}
if (b_exp == max_exp) {
if (b_rep.fraction ^ int_bit == 0) {
// anything + +/-infinity = +/-infinity
return b;
} else {
std.debug.assert(b_rep.fraction & significand_mask != 0);
// anything + NaN = qNaN
b_rep.fraction |= qnan_bit;
return @ptrCast(*const f80, &b_rep).*;
}
}
const a_zero = (a_rep.fraction | @bitCast(u32, a_exp)) == 0;
const b_zero = (b_rep.fraction | @bitCast(u32, b_exp)) == 0;
if (a_zero) {
// zero + anything = anything
if (b_zero) {
// but we need to get the sign right for zero + zero
a_rep.exp &= b_rep.exp;
return @ptrCast(*const f80, &a_rep).*;
} else {
return b;
}
} else if (b_zero) {
// anything + zero = anything
return a;
}
var a_int: u80 = a_rep.fraction | (@as(u80, a_rep.exp & max_exp) << significand_bits);
var b_int: u80 = b_rep.fraction | (@as(u80, b_rep.exp & max_exp) << significand_bits);
// Swap a and b if necessary so that a has the larger absolute value.
if (b_int > a_int) {
const temp = a_rep;
a_rep = b_rep;
b_rep = temp;
}
// Extract the exponent and significand from the (possibly swapped) a and b.
a_exp = a_rep.exp & max_exp;
b_exp = b_rep.exp & max_exp;
a_int = a_rep.fraction;
b_int = b_rep.fraction;
// Normalize any denormals, and adjust the exponent accordingly.
normalize_f80(&a_exp, &a_int);
normalize_f80(&b_exp, &b_int);
// The sign of the result is the sign of the larger operand, a. If they
// have opposite signs, we are performing a subtraction; otherwise addition.
const result_sign = a_rep.exp & sign_bit;
const subtraction = (a_rep.exp ^ b_rep.exp) & sign_bit != 0;
// Shift the significands to give us round, guard and sticky, and or in the
// implicit significand bit. (If we fell through from the denormal path it
// was already set by normalize( ), but setting it twice won't hurt
// anything.)
a_int = a_int << 3;
b_int = b_int << 3;
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
const @"align" = @intCast(u80, a_exp - b_exp);
if (@"align" != 0) {
if (@"align" < 80) {
const sticky = if (b_int << @intCast(u7, 80 - @"align") != 0) @as(u80, 1) else 0;
b_int = (b_int >> @truncate(u7, @"align")) | sticky;
} else {
b_int = 1; // sticky; b is known to be non-zero.
}
}
if (subtraction) {
a_int -= b_int;
// If a == -b, return +zero.
if (a_int == 0) return 0.0;
// If partial cancellation occurred, we need to left-shift the result
// and adjust the exponent:
if (a_int < int_bit << 3) {
const shift = @intCast(i32, @clz(u80, a_int)) - @intCast(i32, @clz(u80, int_bit << 3));
a_int <<= @intCast(u7, shift);
a_exp -= shift;
}
} else { // addition
a_int += b_int;
// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if (a_int & (int_bit << 4) != 0) {
const sticky = a_int & 1;
a_int = a_int >> 1 | sticky;
a_exp += 1;
}
}
// If we have overflowed the type, return +/- infinity:
if (a_exp >= max_exp) {
a_rep.exp = max_exp | result_sign;
a_rep.fraction = int_bit; // integer bit is set for +/-inf
return @ptrCast(*const f80, &a_rep).*;
}
if (a_exp <= 0) {
// Result is denormal before rounding; the exponent is zero and we
// need to shift the significand.
const shift = @intCast(u80, 1 - a_exp);
const sticky = if (a_int << @intCast(u7, 80 - shift) != 0) @as(u1, 1) else 0;
a_int = a_int >> @intCast(u7, shift | sticky);
a_exp = 0;
}
// Low three bits are round, guard, and sticky.
const round_guard_sticky = @truncate(u3, a_int);
// Shift the significand into place.
a_int = @truncate(u64, a_int >> 3);
// // Insert the exponent and sign.
a_int |= (@intCast(u80, a_exp) | result_sign) << significand_bits;
// Final rounding. The result may overflow to infinity, but that is the
// correct result in that case.
if (round_guard_sticky > 0x4) a_int += 1;
if (round_guard_sticky == 0x4) a_int += a_int & 1;
a_rep.fraction = @truncate(u64, a_int);
a_rep.exp = @truncate(u16, a_int >> significand_bits);
return @ptrCast(*const f80, &a_rep).*;
}
pub fn __subxf3(a: f80, b: f80) callconv(.C) f80 {
var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*;
b_rep.exp ^= 0x8000;
return __addxf3(a, @ptrCast(*const f80, &b_rep).*);
}
test {
_ = @import("addXf3_test.zig");
}