Merge pull request #10738 from Vexu/f80

Add compiler-rt functions for f80
This commit is contained in:
Andrew Kelley 2022-02-05 20:57:32 -05:00 committed by GitHub
commit 8dcb1eba60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 669 additions and 26 deletions

View File

@ -43,11 +43,13 @@ pub const f128_max = @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF
pub const f128_epsilon = @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000));
pub const f128_toint = 1.0 / f128_epsilon;
const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct {
pub const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct {
fraction: u64,
exp: u16,
_pad: u32 = undefined,
} else extern struct {
exp: u16,
_pad: u32 = undefined, // TODO verify compatibility with hardware
fraction: u64,
};

View File

@ -39,12 +39,23 @@ comptime {
const __extendhftf2 = @import("compiler_rt/extendXfYf2.zig").__extendhftf2;
@export(__extendhftf2, .{ .name = "__extendhftf2", .linkage = linkage });
const __extendhfxf2 = @import("compiler_rt/extend_f80.zig").__extendhfxf2;
@export(__extendhfxf2, .{ .name = "__extendhfxf2", .linkage = linkage });
const __extendffxf2 = @import("compiler_rt/extend_f80.zig").__extendffxf2;
@export(__extendffxf2, .{ .name = "__extendffxf2", .linkage = linkage });
const __extenddfxf2 = @import("compiler_rt/extend_f80.zig").__extenddfxf2;
@export(__extenddfxf2, .{ .name = "__extenddfxf2", .linkage = linkage });
const __extendxftf2 = @import("compiler_rt/extend_f80.zig").__extendxftf2;
@export(__extendxftf2, .{ .name = "__extendxftf2", .linkage = linkage });
const __lesf2 = @import("compiler_rt/compareXf2.zig").__lesf2;
@export(__lesf2, .{ .name = "__lesf2", .linkage = linkage });
const __ledf2 = @import("compiler_rt/compareXf2.zig").__ledf2;
@export(__ledf2, .{ .name = "__ledf2", .linkage = linkage });
const __letf2 = @import("compiler_rt/compareXf2.zig").__letf2;
@export(__letf2, .{ .name = "__letf2", .linkage = linkage });
const __lexf2 = @import("compiler_rt/compareXf2.zig").__lexf2;
@export(__lexf2, .{ .name = "__lexf2", .linkage = linkage });
const __gesf2 = @import("compiler_rt/compareXf2.zig").__gesf2;
@export(__gesf2, .{ .name = "__gesf2", .linkage = linkage });
@ -52,26 +63,36 @@ comptime {
@export(__gedf2, .{ .name = "__gedf2", .linkage = linkage });
const __getf2 = @import("compiler_rt/compareXf2.zig").__getf2;
@export(__getf2, .{ .name = "__getf2", .linkage = linkage });
const __gexf2 = @import("compiler_rt/compareXf2.zig").__gexf2;
@export(__gexf2, .{ .name = "__gexf2", .linkage = linkage });
const __eqsf2 = @import("compiler_rt/compareXf2.zig").__eqsf2;
@export(__eqsf2, .{ .name = "__eqsf2", .linkage = linkage });
const __eqdf2 = @import("compiler_rt/compareXf2.zig").__eqdf2;
@export(__eqdf2, .{ .name = "__eqdf2", .linkage = linkage });
const __eqxf2 = @import("compiler_rt/compareXf2.zig").__eqxf2;
@export(__eqxf2, .{ .name = "__eqxf2", .linkage = linkage });
const __ltsf2 = @import("compiler_rt/compareXf2.zig").__ltsf2;
@export(__ltsf2, .{ .name = "__ltsf2", .linkage = linkage });
const __ltdf2 = @import("compiler_rt/compareXf2.zig").__ltdf2;
@export(__ltdf2, .{ .name = "__ltdf2", .linkage = linkage });
const __ltxf2 = @import("compiler_rt/compareXf2.zig").__ltxf2;
@export(__ltxf2, .{ .name = "__ltxf2", .linkage = linkage });
const __nesf2 = @import("compiler_rt/compareXf2.zig").__nesf2;
@export(__nesf2, .{ .name = "__nesf2", .linkage = linkage });
const __nedf2 = @import("compiler_rt/compareXf2.zig").__nedf2;
@export(__nedf2, .{ .name = "__nedf2", .linkage = linkage });
const __nexf2 = @import("compiler_rt/compareXf2.zig").__nexf2;
@export(__nexf2, .{ .name = "__nexf2", .linkage = linkage });
const __gtsf2 = @import("compiler_rt/compareXf2.zig").__gtsf2;
@export(__gtsf2, .{ .name = "__gtsf2", .linkage = linkage });
const __gtdf2 = @import("compiler_rt/compareXf2.zig").__gtdf2;
@export(__gtdf2, .{ .name = "__gtdf2", .linkage = linkage });
const __gtxf2 = @import("compiler_rt/compareXf2.zig").__gtxf2;
@export(__gtxf2, .{ .name = "__gtxf2", .linkage = linkage });
if (!is_test) {
@export(__lesf2, .{ .name = "__cmpsf2", .linkage = linkage });
@ -179,14 +200,14 @@ comptime {
const __truncdfsf2 = @import("compiler_rt/truncXfYf2.zig").__truncdfsf2;
@export(__truncdfsf2, .{ .name = "__truncdfsf2", .linkage = linkage });
if (!long_double_is_f128) {
// TODO implement these
//const __extendxftf2 = @import("compiler_rt/extendXfYf2.zig").__extendxftf2;
//@export(__extendxftf2, .{ .name = "__extendxftf2", .linkage = linkage });
//const __trunctfxf2 = @import("compiler_rt/truncXfYf2.zig").__trunctfxf2;
//@export(__trunctfxf2, .{ .name = "__trunctfxf2", .linkage = linkage });
}
const __truncxfhf2 = @import("compiler_rt/trunc_f80.zig").__truncxfhf2;
@export(__truncxfhf2, .{ .name = "__truncxfhf2", .linkage = linkage });
const __truncxfff2 = @import("compiler_rt/trunc_f80.zig").__truncxfff2;
@export(__truncxfff2, .{ .name = "__truncxfff2", .linkage = linkage });
const __truncxfdf2 = @import("compiler_rt/trunc_f80.zig").__truncxfdf2;
@export(__truncxfdf2, .{ .name = "__truncxfdf2", .linkage = linkage });
const __trunctfxf2 = @import("compiler_rt/trunc_f80.zig").__trunctfxf2;
@export(__trunctfxf2, .{ .name = "__trunctfxf2", .linkage = linkage });
if (builtin.zig_backend == .stage1) {
switch (arch) {
@ -216,12 +237,16 @@ comptime {
@export(__adddf3, .{ .name = "__adddf3", .linkage = linkage });
const __addtf3 = @import("compiler_rt/addXf3.zig").__addtf3;
@export(__addtf3, .{ .name = "__addtf3", .linkage = linkage });
const __addxf3 = @import("compiler_rt/addXf3.zig").__addxf3;
@export(__addxf3, .{ .name = "__addxf3", .linkage = linkage });
const __subsf3 = @import("compiler_rt/addXf3.zig").__subsf3;
@export(__subsf3, .{ .name = "__subsf3", .linkage = linkage });
const __subdf3 = @import("compiler_rt/addXf3.zig").__subdf3;
@export(__subdf3, .{ .name = "__subdf3", .linkage = linkage });
const __subtf3 = @import("compiler_rt/addXf3.zig").__subtf3;
@export(__subtf3, .{ .name = "__subtf3", .linkage = linkage });
const __subxf3 = @import("compiler_rt/addXf3.zig").__subxf3;
@export(__subxf3, .{ .name = "__subxf3", .linkage = linkage });
const __mulsf3 = @import("compiler_rt/mulXf3.zig").__mulsf3;
@export(__mulsf3, .{ .name = "__mulsf3", .linkage = linkage });

View File

@ -225,6 +225,177 @@ fn addXf3(comptime T: type, a: T, b: T) T {
return @bitCast(T, result);
}
fn normalize_f80(exp: *i32, significand: *u80) void {
const shift = @clz(u64, @truncate(u64, significand.*));
significand.* = (significand.* << shift);
exp.* += -@as(i8, shift);
}
pub fn __addxf3(a: f80, b: f80) callconv(.C) f80 {
var a_rep align(16) = @ptrCast(*const std.math.F80Repr, &a).*;
var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*;
var a_exp: i32 = a_rep.exp & 0x7FFF;
var b_exp: i32 = b_rep.exp & 0x7FFF;
const significand_bits = std.math.floatMantissaBits(f80);
const int_bit = 0x8000000000000000;
const significand_mask = 0x7FFFFFFFFFFFFFFF;
const qnan_bit = 0xC000000000000000;
const max_exp = 0x7FFF;
const sign_bit = 0x8000;
// Detect if a or b is infinity, or NaN.
if (a_exp == max_exp) {
if (a_rep.fraction ^ int_bit == 0) {
if (b_exp == max_exp and (b_rep.fraction ^ int_bit == 0)) {
// +/-infinity + -/+infinity = qNaN
return std.math.qnan_f80;
}
// +/-infinity + anything = +/- infinity
return a;
} else {
std.debug.assert(a_rep.fraction & significand_mask != 0);
// NaN + anything = qNaN
a_rep.fraction |= qnan_bit;
return @ptrCast(*const f80, &a_rep).*;
}
}
if (b_exp == max_exp) {
if (b_rep.fraction ^ int_bit == 0) {
// anything + +/-infinity = +/-infinity
return b;
} else {
std.debug.assert(b_rep.fraction & significand_mask != 0);
// anything + NaN = qNaN
b_rep.fraction |= qnan_bit;
return @ptrCast(*const f80, &b_rep).*;
}
}
const a_zero = (a_rep.fraction | @bitCast(u32, a_exp)) == 0;
const b_zero = (b_rep.fraction | @bitCast(u32, b_exp)) == 0;
if (a_zero) {
// zero + anything = anything
if (b_zero) {
// but we need to get the sign right for zero + zero
a_rep.exp &= b_rep.exp;
return @ptrCast(*const f80, &a_rep).*;
} else {
return b;
}
} else if (b_zero) {
// anything + zero = anything
return a;
}
var a_int: u80 = a_rep.fraction | (@as(u80, a_rep.exp & max_exp) << significand_bits);
var b_int: u80 = b_rep.fraction | (@as(u80, b_rep.exp & max_exp) << significand_bits);
// Swap a and b if necessary so that a has the larger absolute value.
if (b_int > a_int) {
const temp = a_rep;
a_rep = b_rep;
b_rep = temp;
}
// Extract the exponent and significand from the (possibly swapped) a and b.
a_exp = a_rep.exp & max_exp;
b_exp = b_rep.exp & max_exp;
a_int = a_rep.fraction;
b_int = b_rep.fraction;
// Normalize any denormals, and adjust the exponent accordingly.
normalize_f80(&a_exp, &a_int);
normalize_f80(&b_exp, &b_int);
// The sign of the result is the sign of the larger operand, a. If they
// have opposite signs, we are performing a subtraction; otherwise addition.
const result_sign = a_rep.exp & sign_bit;
const subtraction = (a_rep.exp ^ b_rep.exp) & sign_bit != 0;
// Shift the significands to give us round, guard and sticky, and or in the
// implicit significand bit. (If we fell through from the denormal path it
// was already set by normalize( ), but setting it twice won't hurt
// anything.)
a_int = a_int << 3;
b_int = b_int << 3;
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
const @"align" = @intCast(u80, a_exp - b_exp);
if (@"align" != 0) {
if (@"align" < 80) {
const sticky = if (b_int << @intCast(u7, 80 - @"align") != 0) @as(u80, 1) else 0;
b_int = (b_int >> @truncate(u7, @"align")) | sticky;
} else {
b_int = 1; // sticky; b is known to be non-zero.
}
}
if (subtraction) {
a_int -= b_int;
// If a == -b, return +zero.
if (a_int == 0) return 0.0;
// If partial cancellation occurred, we need to left-shift the result
// and adjust the exponent:
if (a_int < int_bit << 3) {
const shift = @intCast(i32, @clz(u80, a_int)) - @intCast(i32, @clz(u80, int_bit << 3));
a_int <<= @intCast(u7, shift);
a_exp -= shift;
}
} else { // addition
a_int += b_int;
// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if (a_int & (int_bit << 4) != 0) {
const sticky = a_int & 1;
a_int = a_int >> 1 | sticky;
a_exp += 1;
}
}
// If we have overflowed the type, return +/- infinity:
if (a_exp >= max_exp) {
a_rep.exp = max_exp | result_sign;
a_rep.fraction = int_bit; // integer bit is set for +/-inf
return @ptrCast(*const f80, &a_rep).*;
}
if (a_exp <= 0) {
// Result is denormal before rounding; the exponent is zero and we
// need to shift the significand.
const shift = @intCast(u80, 1 - a_exp);
const sticky = if (a_int << @intCast(u7, 80 - shift) != 0) @as(u1, 1) else 0;
a_int = a_int >> @intCast(u7, shift | sticky);
a_exp = 0;
}
// Low three bits are round, guard, and sticky.
const round_guard_sticky = @truncate(u3, a_int);
// Shift the significand into place.
a_int = @truncate(u64, a_int >> 3);
// // Insert the exponent and sign.
a_int |= (@intCast(u80, a_exp) | result_sign) << significand_bits;
// Final rounding. The result may overflow to infinity, but that is the
// correct result in that case.
if (round_guard_sticky > 0x4) a_int += 1;
if (round_guard_sticky == 0x4) a_int += a_int & 1;
a_rep.fraction = @truncate(u64, a_int);
a_rep.exp = @truncate(u16, a_int >> significand_bits);
return @ptrCast(*const f80, &a_rep).*;
}
pub fn __subxf3(a: f80, b: f80) callconv(.C) f80 {
var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*;
b_rep.exp ^= 0x8000;
return __addxf3(a, @ptrCast(*const f80, &b_rep).*);
}
test {
_ = @import("addXf3_test.zig");
}

View File

@ -144,6 +144,73 @@ pub fn __gtdf2(a: f64, b: f64) callconv(.C) i32 {
return __gedf2(a, b);
}
// Comparison between f80
pub inline fn cmp_f80(comptime RT: type, a: f80, b: f80) RT {
const a_rep = @ptrCast(*const std.math.F80Repr, &a).*;
const b_rep = @ptrCast(*const std.math.F80Repr, &b).*;
const sig_bits = std.math.floatMantissaBits(f80);
const int_bit = 0x8000000000000000;
const sign_bit = 0x8000;
const special_exp = 0x7FFF;
// If either a or b is NaN, they are unordered.
if ((a_rep.exp & special_exp == special_exp and a_rep.fraction ^ int_bit != 0) or
(b_rep.exp & special_exp == special_exp and b_rep.fraction ^ int_bit != 0))
return RT.Unordered;
// If a and b are both zeros, they are equal.
if ((a_rep.fraction | b_rep.fraction) | ((a_rep.exp | b_rep.exp) & special_exp) == 0)
return .Equal;
if (@boolToInt(a_rep.exp == b_rep.exp) & @boolToInt(a_rep.fraction == b_rep.fraction) != 0) {
return .Equal;
} else if (a_rep.exp & sign_bit != b_rep.exp & sign_bit) {
// signs are different
if (@bitCast(i16, a_rep.exp) < @bitCast(i16, b_rep.exp)) {
return .Less;
} else {
return .Greater;
}
} else {
const a_fraction = a_rep.fraction | (@as(u80, a_rep.exp) << sig_bits);
const b_fraction = b_rep.fraction | (@as(u80, b_rep.exp) << sig_bits);
if (a_fraction < b_fraction) {
return .Less;
} else {
return .Greater;
}
}
}
pub fn __lexf2(a: f80, b: f80) callconv(.C) i32 {
@setRuntimeSafety(builtin.is_test);
const float = cmp_f80(LE, a, b);
return @bitCast(i32, float);
}
pub fn __gexf2(a: f80, b: f80) callconv(.C) i32 {
@setRuntimeSafety(builtin.is_test);
const float = cmp_f80(GE, a, b);
return @bitCast(i32, float);
}
pub fn __eqxf2(a: f80, b: f80) callconv(.C) i32 {
return __lexf2(a, b);
}
pub fn __ltxf2(a: f80, b: f80) callconv(.C) i32 {
return __lexf2(a, b);
}
pub fn __nexf2(a: f80, b: f80) callconv(.C) i32 {
return __lexf2(a, b);
}
pub fn __gtxf2(a: f80, b: f80) callconv(.C) i32 {
return __gexf2(a, b);
}
// Comparison between f128
pub fn __letf2(a: f128, b: f128) callconv(.C) i32 {

View File

@ -27,11 +27,6 @@ pub fn __extendhftf2(a: F16T) callconv(.C) f128 {
return extendXfYf2(f128, f16, @bitCast(u16, a));
}
pub fn __extendxftf2(a: c_longdouble) callconv(.C) f128 {
_ = a;
@panic("TODO implement");
}
pub fn __aeabi_h2f(arg: u16) callconv(.AAPCS) f32 {
@setRuntimeSafety(false);
return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f32, f16, arg });

View File

@ -0,0 +1,131 @@
const std = @import("std");
const builtin = @import("builtin");
const is_test = builtin.is_test;
const native_arch = builtin.cpu.arch;
// AArch64 is the only ABI (at the moment) to support f16 arguments without the
// need for extending them to wider fp types.
pub const F16T = if (native_arch.isAARCH64()) f16 else u16;
pub fn __extendhfxf2(a: F16T) callconv(.C) f80 {
return extendF80(f16, @bitCast(u16, a));
}
pub fn __extendffxf2(a: f32) callconv(.C) f80 {
return extendF80(f32, @bitCast(u32, a));
}
pub fn __extenddfxf2(a: f64) callconv(.C) f80 {
return extendF80(f64, @bitCast(u64, a));
}
inline fn extendF80(comptime src_t: type, a: std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits)) f80 {
@setRuntimeSafety(builtin.is_test);
const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits);
const src_sig_bits = std.math.floatMantissaBits(src_t);
const dst_int_bit = 0x8000000000000000;
const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
const dst_exp_bias = 16383;
const src_bits = @bitSizeOf(src_t);
const src_exp_bits = src_bits - src_sig_bits - 1;
const src_inf_exp = (1 << src_exp_bits) - 1;
const src_exp_bias = src_inf_exp >> 1;
const src_min_normal = 1 << src_sig_bits;
const src_inf = src_inf_exp << src_sig_bits;
const src_sign_mask = 1 << (src_sig_bits + src_exp_bits);
const src_abs_mask = src_sign_mask - 1;
const src_qnan = 1 << (src_sig_bits - 1);
const src_nan_code = src_qnan - 1;
var dst: std.math.F80Repr align(16) = undefined;
// Break a into a sign and representation of the absolute value
const a_abs = a & src_abs_mask;
const sign: u16 = if (a & src_sign_mask != 0) 0x8000 else 0;
if (a_abs -% src_min_normal < src_inf - src_min_normal) {
// a is a normal number.
// Extend to the destination type by shifting the significand and
// exponent into the proper position and rebiasing the exponent.
dst.exp = @intCast(u16, a_abs >> src_sig_bits);
dst.exp += dst_exp_bias - src_exp_bias;
dst.fraction = @as(u64, a_abs) << (dst_sig_bits - src_sig_bits);
dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers
} else if (a_abs >= src_inf) {
// a is NaN or infinity.
// Conjure the result by beginning with infinity, then setting the qNaN
// bit (if needed) and right-aligning the rest of the trailing NaN
// payload field.
dst.exp = 0x7fff;
dst.fraction = dst_int_bit;
dst.fraction |= @as(u64, a_abs & src_qnan) << (dst_sig_bits - src_sig_bits);
dst.fraction |= @as(u64, a_abs & src_nan_code) << (dst_sig_bits - src_sig_bits);
} else if (a_abs != 0) {
// a is denormal.
// renormalize the significand and clear the leading bit, then insert
// the correct adjusted exponent in the destination type.
const scale: u16 = @clz(src_rep_t, a_abs) -
@clz(src_rep_t, @as(src_rep_t, src_min_normal));
dst.fraction = @as(u64, a_abs) << @intCast(u6, dst_sig_bits - src_sig_bits + scale);
dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers
dst.exp = @truncate(u16, a_abs >> @intCast(u4, src_sig_bits - scale));
dst.exp ^= 1;
dst.exp |= dst_exp_bias - src_exp_bias - scale + 1;
} else {
// a is zero.
dst.exp = 0;
dst.fraction = 0;
}
dst.exp |= sign;
return @ptrCast(*const f80, &dst).*;
}
pub fn __extendxftf2(a: f80) callconv(.C) f128 {
@setRuntimeSafety(builtin.is_test);
const src_int_bit: u64 = 0x8000000000000000;
const src_sig_mask = ~src_int_bit;
const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
const dst_sig_bits = std.math.floatMantissaBits(f128);
const dst_bits = @bitSizeOf(f128);
const dst_min_normal = @as(u128, 1) << dst_sig_bits;
// Break a into a sign and representation of the absolute value
var a_rep = @ptrCast(*const std.math.F80Repr, &a).*;
const sign = a_rep.exp & 0x8000;
a_rep.exp &= 0x7FFF;
var abs_result: u128 = undefined;
if (a_rep.exp == 0 and a_rep.fraction == 0) {
// zero
abs_result = 0;
} else if (a_rep.exp == 0x7FFF) {
// a is nan or infinite
abs_result = @as(u128, a_rep.fraction) << (dst_sig_bits - src_sig_bits);
abs_result |= @as(u128, a_rep.exp) << dst_sig_bits;
} else if (a_rep.fraction & src_int_bit != 0) {
// a is a normal value
abs_result = @as(u128, a_rep.fraction & src_sig_mask) << (dst_sig_bits - src_sig_bits);
abs_result |= @as(u128, a_rep.exp) << dst_sig_bits;
} else {
// a is denormal
// renormalize the significand and clear the leading bit and integer part,
// then insert the correct adjusted exponent in the destination type.
const scale: u32 = @clz(u64, a_rep.fraction);
abs_result = @as(u128, a_rep.fraction) << @intCast(u7, dst_sig_bits - src_sig_bits + scale + 1);
abs_result ^= dst_min_normal;
abs_result |= @as(u128, scale + 1) << dst_sig_bits;
}
// Apply the signbit to (dst_t)abs(a).
const result: u128 align(@alignOf(f128)) = abs_result | @as(u128, sign) << (dst_bits - 16);
return @bitCast(f128, result);
}

View File

@ -26,11 +26,6 @@ pub fn __trunctfdf2(a: f128) callconv(.C) f64 {
return truncXfYf2(f64, f128, a);
}
pub fn __trunctfxf2(a: f128) callconv(.C) c_longdouble {
_ = a;
@panic("TODO implement");
}
pub fn __truncdfsf2(a: f64) callconv(.C) f32 {
return truncXfYf2(f32, f64, a);
}

View File

@ -0,0 +1,159 @@
const std = @import("std");
const builtin = @import("builtin");
const native_arch = builtin.cpu.arch;
// AArch64 is the only ABI (at the moment) to support f16 arguments without the
// need for extending them to wider fp types.
pub const F16T = if (native_arch.isAARCH64()) f16 else u16;
pub fn __truncxfhf2(a: f80) callconv(.C) F16T {
return @bitCast(F16T, trunc(f16, a));
}
pub fn __truncxfff2(a: f80) callconv(.C) f32 {
return trunc(f32, a);
}
pub fn __truncxfdf2(a: f80) callconv(.C) f64 {
return trunc(f64, a);
}
inline fn trunc(comptime dst_t: type, a: f80) dst_t {
@setRuntimeSafety(builtin.is_test);
const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).Float.bits);
const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
const dst_sig_bits = std.math.floatMantissaBits(dst_t);
const src_exp_bias = 16383;
const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1;
const halfway = 1 << (src_sig_bits - dst_sig_bits - 1);
const dst_bits = @typeInfo(dst_t).Float.bits;
const dst_exp_bits = dst_bits - dst_sig_bits - 1;
const dst_inf_exp = (1 << dst_exp_bits) - 1;
const dst_exp_bias = dst_inf_exp >> 1;
const underflow = src_exp_bias + 1 - dst_exp_bias;
const overflow = src_exp_bias + dst_inf_exp - dst_exp_bias;
const dst_qnan = 1 << (dst_sig_bits - 1);
const dst_nan_mask = dst_qnan - 1;
// Break a into a sign and representation of the absolute value
var a_rep = @ptrCast(*const std.math.F80Repr, &a).*;
const sign = a_rep.exp & 0x8000;
a_rep.exp &= 0x7FFF;
a_rep.fraction &= 0x7FFFFFFFFFFFFFFF;
var abs_result: dst_rep_t = undefined;
if (a_rep.exp -% underflow < a_rep.exp -% overflow) {
// The exponent of a is within the range of normal numbers in the
// destination format. We can convert by simply right-shifting with
// rounding and adjusting the exponent.
abs_result = @as(dst_rep_t, a_rep.exp) << dst_sig_bits;
abs_result |= @truncate(dst_rep_t, a_rep.fraction >> (src_sig_bits - dst_sig_bits));
abs_result -%= @as(dst_rep_t, src_exp_bias - dst_exp_bias) << dst_sig_bits;
const round_bits = a_rep.fraction & round_mask;
if (round_bits > halfway) {
// Round to nearest
abs_result += 1;
} else if (round_bits == halfway) {
// Ties to even
abs_result += abs_result & 1;
}
} else if (a_rep.exp == 0x7FFF and a_rep.fraction != 0) {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
abs_result = @intCast(dst_rep_t, dst_inf_exp) << dst_sig_bits;
abs_result |= dst_qnan;
abs_result |= @intCast(dst_rep_t, (a_rep.fraction >> (src_sig_bits - dst_sig_bits)) & dst_nan_mask);
} else if (a_rep.exp >= overflow) {
// a overflows to infinity.
abs_result = @intCast(dst_rep_t, dst_inf_exp) << dst_sig_bits;
} else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
// to get the shift amount for the denormalization.
const shift = src_exp_bias - dst_exp_bias - a_rep.exp;
// Right shift by the denormalization amount with sticky.
if (shift > src_sig_bits) {
abs_result = 0;
} else {
const sticky = @boolToInt(a_rep.fraction << @intCast(u6, shift) != 0);
const denormalized_significand = a_rep.fraction >> @intCast(u6, shift) | sticky;
abs_result = @intCast(dst_rep_t, denormalized_significand >> (src_sig_bits - dst_sig_bits));
const round_bits = denormalized_significand & round_mask;
if (round_bits > halfway) {
// Round to nearest
abs_result += 1;
} else if (round_bits == halfway) {
// Ties to even
abs_result += abs_result & 1;
}
}
}
const result align(@alignOf(dst_t)) = abs_result | @as(dst_rep_t, sign) << dst_bits - 16;
return @bitCast(dst_t, result);
}
pub fn __trunctfxf2(a: f128) callconv(.C) f80 {
const src_sig_bits = std.math.floatMantissaBits(f128);
const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const src_bits = @typeInfo(f128).Float.bits;
const src_exp_bits = src_bits - src_sig_bits - 1;
const src_inf_exp = 0x7FFF;
const src_inf = src_inf_exp << src_sig_bits;
const src_sign_mask = 1 << (src_sig_bits + src_exp_bits);
const src_abs_mask = src_sign_mask - 1;
const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1;
const halfway = 1 << (src_sig_bits - dst_sig_bits - 1);
const src_qnan = 1 << (src_sig_bits - 1);
const src_nan_mask = src_qnan - 1;
// Break a into a sign and representation of the absolute value
const a_rep = @bitCast(u128, a);
const a_abs = a_rep & src_abs_mask;
const sign: u16 = if (a_rep & src_sign_mask != 0) 0x8000 else 0;
var res: std.math.F80Repr align(16) = undefined;
if (a_abs > src_inf) {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
res.exp = 0x7fff;
res.fraction = 0x8000000000000000;
res.fraction |= @truncate(u64, (a_abs & src_qnan) << (src_sig_bits - dst_sig_bits));
res.fraction |= @truncate(u64, (a_abs & src_nan_mask) << (src_sig_bits - dst_sig_bits));
} else {
// The exponent of a is within the range of normal numbers in the
// destination format. We can convert by simply right-shifting with
// rounding and adjusting the exponent.
res.fraction = @truncate(u64, a_abs >> (src_sig_bits - dst_sig_bits));
res.exp = @truncate(u16, a_abs >> src_sig_bits);
const round_bits = a_abs & round_mask;
if (round_bits > halfway) {
// Round to nearest
const exp = @addWithOverflow(u64, res.fraction, 1, &res.fraction);
res.exp += @boolToInt(exp);
} else if (round_bits == halfway) {
// Ties to even
const exp = @addWithOverflow(u64, res.fraction, res.fraction & 1, &res.fraction);
res.exp += @boolToInt(exp);
}
}
res.exp |= sign;
return @ptrCast(*const f80, &res).*;
}

View File

@ -3240,6 +3240,49 @@ static LLVMValueRef get_soft_f80_bin_op_func(CodeGen *g, const char *name, int p
return LLVMAddFunction(g->module, name, fn_type);
}
enum SoftF80Icmp {
NONE,
EQ_ZERO,
NE_ZERO,
LE_ZERO,
EQ_NEG,
GE_ZERO,
EQ_ONE,
};
static LLVMValueRef add_f80_icmp(CodeGen *g, LLVMValueRef val, SoftF80Icmp kind) {
switch (kind) {
case NONE:
return val;
case EQ_ZERO: {
LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true);
return LLVMBuildICmp(g->builder, LLVMIntEQ, val, zero, "");
}
case NE_ZERO: {
LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true);
return LLVMBuildICmp(g->builder, LLVMIntNE, val, zero, "");
}
case LE_ZERO: {
LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true);
return LLVMBuildICmp(g->builder, LLVMIntSLE, val, zero, "");
}
case EQ_NEG: {
LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, -1, true);
return LLVMBuildICmp(g->builder, LLVMIntEQ, val, zero, "");
}
case GE_ZERO: {
LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true);
return LLVMBuildICmp(g->builder, LLVMIntSGE, val, zero, "");
}
case EQ_ONE: {
LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 1, true);
return LLVMBuildICmp(g->builder, LLVMIntEQ, val, zero, "");
}
default:
zig_unreachable();
}
}
static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable,
Stage1AirInstBinOp *bin_op_instruction)
{
@ -3255,6 +3298,7 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable,
LLVMTypeRef return_type = g->builtin_types.entry_f80->llvm_type;
int param_count = 2;
const char *func_name;
SoftF80Icmp res_icmp = NONE;
switch (op_id) {
case IrBinOpInvalid:
case IrBinOpArrayCat:
@ -3280,20 +3324,32 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable,
case IrBinOpCmpEq:
return_type = g->builtin_types.entry_i32->llvm_type;
func_name = "__eqxf2";
res_icmp = EQ_ZERO;
break;
case IrBinOpCmpNotEq:
return_type = g->builtin_types.entry_i32->llvm_type;
func_name = "__nexf2";
res_icmp = NE_ZERO;
break;
case IrBinOpCmpLessOrEq:
return_type = g->builtin_types.entry_i32->llvm_type;
func_name = "__lexf2";
res_icmp = LE_ZERO;
break;
case IrBinOpCmpLessThan:
return_type = g->builtin_types.entry_i32->llvm_type;
func_name = "__lexf2";
res_icmp = EQ_NEG;
break;
case IrBinOpCmpGreaterOrEq:
return_type = g->builtin_types.entry_i32->llvm_type;
func_name = "__gexf2";
res_icmp = GE_ZERO;
break;
case IrBinOpCmpGreaterThan:
return_type = g->builtin_types.entry_i32->llvm_type;
func_name = "__gexf2";
res_icmp = EQ_ONE;
break;
case IrBinOpMaximum:
func_name = "__fmaxx";
@ -3344,8 +3400,11 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable,
if (vector_len == 0) {
LLVMValueRef params[2] = {op1_value, op2_value};
result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
result = add_f80_icmp(g, result, res_icmp);
} else {
result = build_alloca(g, op1->value->type, "", 0);
ZigType *alloca_ty = op1->value->type;
if (res_icmp != NONE) alloca_ty = get_vector_type(g, vector_len, g->builtin_types.entry_bool);
result = build_alloca(g, alloca_ty, "", 0);
}
LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
@ -3356,6 +3415,7 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable,
LLVMBuildExtractElement(g->builder, op2_value, index_value, ""),
};
LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, "");
call_result = add_f80_icmp(g, call_result, res_icmp);
LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
call_result, index_value, "");
}
@ -4052,12 +4112,47 @@ static LLVMValueRef ir_render_binary_not(CodeGen *g, Stage1Air *executable,
return LLVMBuildNot(g->builder, operand, "");
}
static LLVMValueRef ir_gen_soft_f80_neg(CodeGen *g, ZigType *op_type, LLVMValueRef operand) {
uint32_t vector_len = op_type->id == ZigTypeIdVector ? op_type->data.vector.len : 0;
uint64_t buf[2] = {0, 0};
if (g->is_big_endian != native_is_big_endian) {
buf[1] = 0x8000000000000000;
} else {
buf[1] = 0x8000;
}
LLVMValueRef sign_mask = LLVMConstIntOfArbitraryPrecision(LLVMInt128Type(), 2, buf);
LLVMValueRef result;
if (vector_len == 0) {
result = LLVMBuildXor(g->builder, operand, sign_mask, "");
} else {
result = build_alloca(g, op_type, "", 0);
}
LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type;
for (uint32_t i = 0; i < vector_len; i++) {
LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false);
LLVMValueRef xor_operand = LLVMBuildExtractElement(g->builder, operand, index_value, "");
LLVMValueRef xor_result = LLVMBuildXor(g->builder, xor_operand, sign_mask, "");
LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""),
xor_result, index_value, "");
}
if (vector_len != 0) {
result = LLVMBuildLoad(g->builder, result, "");
}
return result;
}
static LLVMValueRef ir_gen_negation(CodeGen *g, Stage1AirInst *inst, Stage1AirInst *operand, bool wrapping) {
LLVMValueRef llvm_operand = ir_llvm_value(g, operand);
ZigType *operand_type = operand->value->type;
ZigType *scalar_type = (operand_type->id == ZigTypeIdVector) ?
operand_type->data.vector.elem_type : operand_type;
if (scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target))
return ir_gen_soft_f80_neg(g, operand_type, llvm_operand);
if (scalar_type->id == ZigTypeIdFloat) {
ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, inst));
return LLVMBuildFNeg(g->builder, llvm_operand, "");
@ -8108,6 +8203,7 @@ static LLVMValueRef gen_const_val(CodeGen *g, ZigValue *const_val, const char *n
buf[1] = tmp;
#endif
LLVMValueRef as_i128 = LLVMConstIntOfArbitraryPrecision(LLVMInt128Type(), 2, buf);
if (!target_has_f80(g->zig_target)) return as_i128;
LLVMValueRef as_int = LLVMConstTrunc(as_i128, LLVMIntType(80));
return LLVMConstBitCast(as_int, get_llvm_type(g, type_entry));
}
@ -9331,13 +9427,15 @@ static void define_builtin_types(CodeGen *g) {
add_fp_entry(g, "f64", 64, LLVMDoubleType(), &g->builtin_types.entry_f64);
add_fp_entry(g, "f128", 128, LLVMFP128Type(), &g->builtin_types.entry_f128);
if (target_has_f80(g->zig_target)) {
add_fp_entry(g, "f80", 80, LLVMX86FP80Type(), &g->builtin_types.entry_f80);
} else {
{
ZigType *entry = new_type_table_entry(ZigTypeIdFloat);
entry->llvm_type = get_int_type(g, false, 128)->llvm_type;
entry->size_in_bits = 8 * LLVMStoreSizeOfType(g->target_data_ref, entry->llvm_type);
entry->abi_size = LLVMABISizeOfType(g->target_data_ref, entry->llvm_type);
if (target_has_f80(g->zig_target)) {
entry->llvm_type = LLVMX86FP80Type();
} else {
entry->llvm_type = get_int_type(g, false, 128)->llvm_type;
}
entry->size_in_bits = 8 * 16;
entry->abi_size = 16;
entry->abi_align = 16;
buf_init_from_str(&entry->name, "f80");
entry->data.floating.bit_count = 80;