From 5c4ef1a64ca71c2a43e362a5ad29a10bd880716c Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Thu, 20 Jan 2022 13:50:30 +0200 Subject: [PATCH 1/6] compiler-rt: add extend functions for f80 --- lib/std/math.zig | 2 +- lib/std/special/compiler_rt.zig | 12 +- lib/std/special/compiler_rt/extendXfYf2.zig | 5 - lib/std/special/compiler_rt/extend_f80.zig | 131 ++++++++++++++++++++ 4 files changed, 141 insertions(+), 9 deletions(-) create mode 100644 lib/std/special/compiler_rt/extend_f80.zig diff --git a/lib/std/math.zig b/lib/std/math.zig index 59532d7ab2..43ad49889d 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -43,7 +43,7 @@ pub const f128_max = @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF pub const f128_epsilon = @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000)); pub const f128_toint = 1.0 / f128_epsilon; -const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct { +pub const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct { fraction: u64, exp: u16, } else extern struct { diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index 24bca128de..acb0d13baf 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -39,6 +39,15 @@ comptime { const __extendhftf2 = @import("compiler_rt/extendXfYf2.zig").__extendhftf2; @export(__extendhftf2, .{ .name = "__extendhftf2", .linkage = linkage }); + const __extendhfxf2 = @import("compiler_rt/extend_f80.zig").__extendhfxf2; + @export(__extendhfxf2, .{ .name = "__extendhfxf2", .linkage = linkage }); + const __extendffxf2 = @import("compiler_rt/extend_f80.zig").__extendffxf2; + @export(__extendffxf2, .{ .name = "__extendffxf2", .linkage = linkage }); + const __extenddfxf2 = @import("compiler_rt/extend_f80.zig").__extenddfxf2; + @export(__extenddfxf2, .{ .name = "__extenddfxf2", .linkage = linkage }); + const __extendxftf2 = @import("compiler_rt/extend_f80.zig").__extendxftf2; + @export(__extendxftf2, .{ .name = "__extendxftf2", .linkage = linkage }); + const __lesf2 = @import("compiler_rt/compareXf2.zig").__lesf2; @export(__lesf2, .{ .name = "__lesf2", .linkage = linkage }); const __ledf2 = @import("compiler_rt/compareXf2.zig").__ledf2; @@ -181,9 +190,6 @@ comptime { if (!long_double_is_f128) { // TODO implement these - //const __extendxftf2 = @import("compiler_rt/extendXfYf2.zig").__extendxftf2; - //@export(__extendxftf2, .{ .name = "__extendxftf2", .linkage = linkage }); - //const __trunctfxf2 = @import("compiler_rt/truncXfYf2.zig").__trunctfxf2; //@export(__trunctfxf2, .{ .name = "__trunctfxf2", .linkage = linkage }); } diff --git a/lib/std/special/compiler_rt/extendXfYf2.zig b/lib/std/special/compiler_rt/extendXfYf2.zig index 2c3f0c88fc..8622fe1513 100644 --- a/lib/std/special/compiler_rt/extendXfYf2.zig +++ b/lib/std/special/compiler_rt/extendXfYf2.zig @@ -27,11 +27,6 @@ pub fn __extendhftf2(a: F16T) callconv(.C) f128 { return extendXfYf2(f128, f16, @bitCast(u16, a)); } -pub fn __extendxftf2(a: c_longdouble) callconv(.C) f128 { - _ = a; - @panic("TODO implement"); -} - pub fn __aeabi_h2f(arg: u16) callconv(.AAPCS) f32 { @setRuntimeSafety(false); return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f32, f16, arg }); diff --git a/lib/std/special/compiler_rt/extend_f80.zig b/lib/std/special/compiler_rt/extend_f80.zig new file mode 100644 index 0000000000..29ba8560ce --- /dev/null +++ b/lib/std/special/compiler_rt/extend_f80.zig @@ -0,0 +1,131 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const is_test = builtin.is_test; +const native_arch = builtin.cpu.arch; + +// AArch64 is the only ABI (at the moment) to support f16 arguments without the +// need for extending them to wider fp types. +pub const F16T = if (native_arch.isAARCH64()) f16 else u16; + +pub fn __extendhfxf2(a: F16T) callconv(.C) f80 { + return extendF80(f16, @bitCast(u16, a)); +} + +pub fn __extendffxf2(a: f32) callconv(.C) f80 { + return extendF80(f32, @bitCast(u32, a)); +} + +pub fn __extenddfxf2(a: f64) callconv(.C) f80 { + return extendF80(f64, @bitCast(u64, a)); +} + +inline fn extendF80(comptime src_t: type, a: std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits)) f80 { + @setRuntimeSafety(builtin.is_test); + + const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits); + const src_sig_bits = std.math.floatMantissaBits(src_t); + const dst_int_bit = 0x8000000000000000; + const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit + + const dst_exp_bias = 16383; + + const src_bits = @bitSizeOf(src_t); + const src_exp_bits = src_bits - src_sig_bits - 1; + const src_inf_exp = (1 << src_exp_bits) - 1; + const src_exp_bias = src_inf_exp >> 1; + + const src_min_normal = 1 << src_sig_bits; + const src_inf = src_inf_exp << src_sig_bits; + const src_sign_mask = 1 << (src_sig_bits + src_exp_bits); + const src_abs_mask = src_sign_mask - 1; + const src_qnan = 1 << (src_sig_bits - 1); + const src_nan_code = src_qnan - 1; + + var dst: std.math.F80Repr align(16) = undefined; + + // Break a into a sign and representation of the absolute value + const a_abs = a & src_abs_mask; + const sign: u16 = if (a & src_sign_mask != 0) 0x8000 else 0; + + if (a_abs -% src_min_normal < src_inf - src_min_normal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + dst.exp = @intCast(u16, a_abs >> src_sig_bits); + dst.exp += dst_exp_bias - src_exp_bias; + dst.fraction = @as(u64, a_abs) << (dst_sig_bits - src_sig_bits); + dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers + } else if (a_abs >= src_inf) { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + dst.exp = 0x7fff; + dst.fraction = dst_int_bit; + dst.fraction |= @as(u64, a_abs & src_qnan) << (dst_sig_bits - src_sig_bits); + dst.fraction |= @as(u64, a_abs & src_nan_code) << (dst_sig_bits - src_sig_bits); + } else if (a_abs != 0) { + // a is denormal. + // renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + const scale: u16 = @clz(src_rep_t, a_abs) - + @clz(src_rep_t, @as(src_rep_t, src_min_normal)); + + dst.fraction = @as(u64, a_abs) << @intCast(u6, dst_sig_bits - src_sig_bits + scale); + dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers + dst.exp = @truncate(u16, a_abs >> @intCast(u4, src_sig_bits - scale)); + dst.exp ^= 1; + dst.exp |= dst_exp_bias - src_exp_bias - scale + 1; + } else { + // a is zero. + dst.exp = 0; + dst.fraction = 0; + } + + dst.exp |= sign; + return @ptrCast(*const f80, &dst).*; +} + +pub fn __extendxftf2(a: f80) callconv(.C) f128 { + @setRuntimeSafety(builtin.is_test); + + const src_int_bit: u64 = 0x8000000000000000; + const src_sig_mask = ~src_int_bit; + const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit + const dst_sig_bits = std.math.floatMantissaBits(f128); + + const dst_bits = @bitSizeOf(f128); + + const dst_min_normal = @as(u128, 1) << dst_sig_bits; + + // Break a into a sign and representation of the absolute value + var a_rep = @ptrCast(*const std.math.F80Repr, &a).*; + const sign = a_rep.exp & 0x8000; + a_rep.exp &= 0x7FFF; + var abs_result: u128 = undefined; + + if (a_rep.exp == 0 and a_rep.fraction == 0) { + // zero + abs_result = 0; + } else if (a_rep.exp == 0x7FFF) { + // a is nan or infinite + abs_result = @as(u128, a_rep.fraction) << (dst_sig_bits - src_sig_bits); + abs_result |= @as(u128, a_rep.exp) << dst_sig_bits; + } else if (a_rep.fraction & src_int_bit != 0) { + // a is a normal value + abs_result = @as(u128, a_rep.fraction & src_sig_mask) << (dst_sig_bits - src_sig_bits); + abs_result |= @as(u128, a_rep.exp) << dst_sig_bits; + } else { + // a is denormal + // renormalize the significand and clear the leading bit and integer part, + // then insert the correct adjusted exponent in the destination type. + const scale: u32 = @clz(u64, a_rep.fraction); + abs_result = @as(u128, a_rep.fraction) << @intCast(u7, dst_sig_bits - src_sig_bits + scale + 1); + abs_result ^= dst_min_normal; + abs_result |= @as(u128, scale + 1) << dst_sig_bits; + } + + // Apply the signbit to (dst_t)abs(a). + const result: u128 align(@alignOf(f128)) = abs_result | @as(u128, sign) << (dst_bits - 16); + return @bitCast(f128, result); +} From 72cef17b1a23c4704b3931540b7f10f4297870b9 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Fri, 21 Jan 2022 12:41:09 +0200 Subject: [PATCH 2/6] compiler-rt: add trunc functions for f80 --- lib/std/special/compiler_rt.zig | 13 +- lib/std/special/compiler_rt/truncXfYf2.zig | 5 - lib/std/special/compiler_rt/trunc_f80.zig | 159 +++++++++++++++++++++ 3 files changed, 167 insertions(+), 10 deletions(-) create mode 100644 lib/std/special/compiler_rt/trunc_f80.zig diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index acb0d13baf..555a7a49d3 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -188,11 +188,14 @@ comptime { const __truncdfsf2 = @import("compiler_rt/truncXfYf2.zig").__truncdfsf2; @export(__truncdfsf2, .{ .name = "__truncdfsf2", .linkage = linkage }); - if (!long_double_is_f128) { - // TODO implement these - //const __trunctfxf2 = @import("compiler_rt/truncXfYf2.zig").__trunctfxf2; - //@export(__trunctfxf2, .{ .name = "__trunctfxf2", .linkage = linkage }); - } + const __truncxfhf2 = @import("compiler_rt/trunc_f80.zig").__truncxfhf2; + @export(__truncxfhf2, .{ .name = "__truncxfhf2", .linkage = linkage }); + const __truncxfff2 = @import("compiler_rt/trunc_f80.zig").__truncxfff2; + @export(__truncxfff2, .{ .name = "__truncxfff2", .linkage = linkage }); + const __truncxfdf2 = @import("compiler_rt/trunc_f80.zig").__truncxfdf2; + @export(__truncxfdf2, .{ .name = "__truncxfdf2", .linkage = linkage }); + const __trunctfxf2 = @import("compiler_rt/trunc_f80.zig").__trunctfxf2; + @export(__trunctfxf2, .{ .name = "__trunctfxf2", .linkage = linkage }); if (builtin.zig_backend == .stage1) { switch (arch) { diff --git a/lib/std/special/compiler_rt/truncXfYf2.zig b/lib/std/special/compiler_rt/truncXfYf2.zig index 4cded15abc..fea1aeb60a 100644 --- a/lib/std/special/compiler_rt/truncXfYf2.zig +++ b/lib/std/special/compiler_rt/truncXfYf2.zig @@ -26,11 +26,6 @@ pub fn __trunctfdf2(a: f128) callconv(.C) f64 { return truncXfYf2(f64, f128, a); } -pub fn __trunctfxf2(a: f128) callconv(.C) c_longdouble { - _ = a; - @panic("TODO implement"); -} - pub fn __truncdfsf2(a: f64) callconv(.C) f32 { return truncXfYf2(f32, f64, a); } diff --git a/lib/std/special/compiler_rt/trunc_f80.zig b/lib/std/special/compiler_rt/trunc_f80.zig new file mode 100644 index 0000000000..567d03be63 --- /dev/null +++ b/lib/std/special/compiler_rt/trunc_f80.zig @@ -0,0 +1,159 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const native_arch = builtin.cpu.arch; + +// AArch64 is the only ABI (at the moment) to support f16 arguments without the +// need for extending them to wider fp types. +pub const F16T = if (native_arch.isAARCH64()) f16 else u16; + +pub fn __truncxfhf2(a: f80) callconv(.C) F16T { + return @bitCast(F16T, trunc(f16, a)); +} + +pub fn __truncxfff2(a: f80) callconv(.C) f32 { + return trunc(f32, a); +} + +pub fn __truncxfdf2(a: f80) callconv(.C) f64 { + return trunc(f64, a); +} + +inline fn trunc(comptime dst_t: type, a: f80) dst_t { + @setRuntimeSafety(builtin.is_test); + + const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).Float.bits); + const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit + const dst_sig_bits = std.math.floatMantissaBits(dst_t); + + const src_exp_bias = 16383; + + const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1; + const halfway = 1 << (src_sig_bits - dst_sig_bits - 1); + + const dst_bits = @typeInfo(dst_t).Float.bits; + const dst_exp_bits = dst_bits - dst_sig_bits - 1; + const dst_inf_exp = (1 << dst_exp_bits) - 1; + const dst_exp_bias = dst_inf_exp >> 1; + + const underflow = src_exp_bias + 1 - dst_exp_bias; + const overflow = src_exp_bias + dst_inf_exp - dst_exp_bias; + + const dst_qnan = 1 << (dst_sig_bits - 1); + const dst_nan_mask = dst_qnan - 1; + + // Break a into a sign and representation of the absolute value + var a_rep = @ptrCast(*const std.math.F80Repr, &a).*; + const sign = a_rep.exp & 0x8000; + a_rep.exp &= 0x7FFF; + a_rep.fraction &= 0x7FFFFFFFFFFFFFFF; + var abs_result: dst_rep_t = undefined; + + if (a_rep.exp -% underflow < a_rep.exp -% overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + abs_result = @as(dst_rep_t, a_rep.exp) << dst_sig_bits; + abs_result |= @truncate(dst_rep_t, a_rep.fraction >> (src_sig_bits - dst_sig_bits)); + abs_result -%= @as(dst_rep_t, src_exp_bias - dst_exp_bias) << dst_sig_bits; + + const round_bits = a_rep.fraction & round_mask; + if (round_bits > halfway) { + // Round to nearest + abs_result += 1; + } else if (round_bits == halfway) { + // Ties to even + abs_result += abs_result & 1; + } + } else if (a_rep.exp == 0x7FFF and a_rep.fraction != 0) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + abs_result = @intCast(dst_rep_t, dst_inf_exp) << dst_sig_bits; + abs_result |= dst_qnan; + abs_result |= @intCast(dst_rep_t, (a_rep.fraction >> (src_sig_bits - dst_sig_bits)) & dst_nan_mask); + } else if (a_rep.exp >= overflow) { + // a overflows to infinity. + abs_result = @intCast(dst_rep_t, dst_inf_exp) << dst_sig_bits; + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + const shift = src_exp_bias - dst_exp_bias - a_rep.exp; + + // Right shift by the denormalization amount with sticky. + if (shift > src_sig_bits) { + abs_result = 0; + } else { + const sticky = @boolToInt(a_rep.fraction << @intCast(u6, shift) != 0); + const denormalized_significand = a_rep.fraction >> @intCast(u6, shift) | sticky; + abs_result = @intCast(dst_rep_t, denormalized_significand >> (src_sig_bits - dst_sig_bits)); + const round_bits = denormalized_significand & round_mask; + if (round_bits > halfway) { + // Round to nearest + abs_result += 1; + } else if (round_bits == halfway) { + // Ties to even + abs_result += abs_result & 1; + } + } + } + + const result align(@alignOf(dst_t)) = abs_result | @as(dst_rep_t, sign) << dst_bits - 16; + return @bitCast(dst_t, result); +} + +pub fn __trunctfxf2(a: f128) callconv(.C) f80 { + const src_sig_bits = std.math.floatMantissaBits(f128); + const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit + + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const src_bits = @typeInfo(f128).Float.bits; + const src_exp_bits = src_bits - src_sig_bits - 1; + const src_inf_exp = 0x7FFF; + + const src_inf = src_inf_exp << src_sig_bits; + const src_sign_mask = 1 << (src_sig_bits + src_exp_bits); + const src_abs_mask = src_sign_mask - 1; + const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1; + const halfway = 1 << (src_sig_bits - dst_sig_bits - 1); + const src_qnan = 1 << (src_sig_bits - 1); + const src_nan_mask = src_qnan - 1; + + // Break a into a sign and representation of the absolute value + const a_rep = @bitCast(u128, a); + const a_abs = a_rep & src_abs_mask; + const sign: u16 = if (a_rep & src_sign_mask != 0) 0x8000 else 0; + + var res: std.math.F80Repr align(16) = undefined; + + if (a_abs > src_inf) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + res.exp = 0x7fff; + res.fraction = 0x8000000000000000; + res.fraction |= @truncate(u64, (a_abs & src_qnan) << (src_sig_bits - dst_sig_bits)); + res.fraction |= @truncate(u64, (a_abs & src_nan_mask) << (src_sig_bits - dst_sig_bits)); + } else { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + res.fraction = @truncate(u64, a_abs >> (src_sig_bits - dst_sig_bits)); + res.exp = @truncate(u16, a_abs >> src_sig_bits); + + const round_bits = a_abs & round_mask; + if (round_bits > halfway) { + // Round to nearest + const exp = @addWithOverflow(u64, res.fraction, 1, &res.fraction); + res.exp += @boolToInt(exp); + } else if (round_bits == halfway) { + // Ties to even + const exp = @addWithOverflow(u64, res.fraction, res.fraction & 1, &res.fraction); + res.exp += @boolToInt(exp); + } + } + + res.exp |= sign; + return @ptrCast(*const f80, &res).*; +} From 9bbd3ab257137c97f695d187436e14c622f877c8 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Fri, 21 Jan 2022 15:26:43 +0200 Subject: [PATCH 3/6] compiler-rt: add comparison functions for f80 --- lib/std/special/compiler_rt.zig | 12 ++++ lib/std/special/compiler_rt/compareXf2.zig | 67 ++++++++++++++++++++++ src/stage1/codegen.cpp | 62 +++++++++++++++++++- 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index 555a7a49d3..d83e94be8f 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -54,6 +54,8 @@ comptime { @export(__ledf2, .{ .name = "__ledf2", .linkage = linkage }); const __letf2 = @import("compiler_rt/compareXf2.zig").__letf2; @export(__letf2, .{ .name = "__letf2", .linkage = linkage }); + const __lexf2 = @import("compiler_rt/compareXf2.zig").__lexf2; + @export(__lexf2, .{ .name = "__lexf2", .linkage = linkage }); const __gesf2 = @import("compiler_rt/compareXf2.zig").__gesf2; @export(__gesf2, .{ .name = "__gesf2", .linkage = linkage }); @@ -61,26 +63,36 @@ comptime { @export(__gedf2, .{ .name = "__gedf2", .linkage = linkage }); const __getf2 = @import("compiler_rt/compareXf2.zig").__getf2; @export(__getf2, .{ .name = "__getf2", .linkage = linkage }); + const __gexf2 = @import("compiler_rt/compareXf2.zig").__gexf2; + @export(__gexf2, .{ .name = "__gexf2", .linkage = linkage }); const __eqsf2 = @import("compiler_rt/compareXf2.zig").__eqsf2; @export(__eqsf2, .{ .name = "__eqsf2", .linkage = linkage }); const __eqdf2 = @import("compiler_rt/compareXf2.zig").__eqdf2; @export(__eqdf2, .{ .name = "__eqdf2", .linkage = linkage }); + const __eqxf2 = @import("compiler_rt/compareXf2.zig").__eqxf2; + @export(__eqxf2, .{ .name = "__eqxf2", .linkage = linkage }); const __ltsf2 = @import("compiler_rt/compareXf2.zig").__ltsf2; @export(__ltsf2, .{ .name = "__ltsf2", .linkage = linkage }); const __ltdf2 = @import("compiler_rt/compareXf2.zig").__ltdf2; @export(__ltdf2, .{ .name = "__ltdf2", .linkage = linkage }); + const __ltxf2 = @import("compiler_rt/compareXf2.zig").__ltxf2; + @export(__ltxf2, .{ .name = "__ltxf2", .linkage = linkage }); const __nesf2 = @import("compiler_rt/compareXf2.zig").__nesf2; @export(__nesf2, .{ .name = "__nesf2", .linkage = linkage }); const __nedf2 = @import("compiler_rt/compareXf2.zig").__nedf2; @export(__nedf2, .{ .name = "__nedf2", .linkage = linkage }); + const __nexf2 = @import("compiler_rt/compareXf2.zig").__nexf2; + @export(__nexf2, .{ .name = "__nexf2", .linkage = linkage }); const __gtsf2 = @import("compiler_rt/compareXf2.zig").__gtsf2; @export(__gtsf2, .{ .name = "__gtsf2", .linkage = linkage }); const __gtdf2 = @import("compiler_rt/compareXf2.zig").__gtdf2; @export(__gtdf2, .{ .name = "__gtdf2", .linkage = linkage }); + const __gtxf2 = @import("compiler_rt/compareXf2.zig").__gtxf2; + @export(__gtxf2, .{ .name = "__gtxf2", .linkage = linkage }); if (!is_test) { @export(__lesf2, .{ .name = "__cmpsf2", .linkage = linkage }); diff --git a/lib/std/special/compiler_rt/compareXf2.zig b/lib/std/special/compiler_rt/compareXf2.zig index 9f3750094e..36f6f5f1c1 100644 --- a/lib/std/special/compiler_rt/compareXf2.zig +++ b/lib/std/special/compiler_rt/compareXf2.zig @@ -144,6 +144,73 @@ pub fn __gtdf2(a: f64, b: f64) callconv(.C) i32 { return __gedf2(a, b); } +// Comparison between f80 + +pub inline fn cmp_f80(comptime RT: type, a: f80, b: f80) RT { + const a_rep = @ptrCast(*const std.math.F80Repr, &a).*; + const b_rep = @ptrCast(*const std.math.F80Repr, &b).*; + const sig_bits = std.math.floatMantissaBits(f80); + const int_bit = 0x8000000000000000; + const sign_bit = 0x8000; + const special_exp = 0x7FFF; + + // If either a or b is NaN, they are unordered. + if ((a_rep.exp & special_exp == special_exp and a_rep.fraction ^ int_bit != 0) or + (b_rep.exp & special_exp == special_exp and b_rep.fraction ^ int_bit != 0)) + return RT.Unordered; + + // If a and b are both zeros, they are equal. + if ((a_rep.fraction | b_rep.fraction) | ((a_rep.exp | b_rep.exp) & special_exp) == 0) + return .Equal; + + if (@boolToInt(a_rep.exp == b_rep.exp) & @boolToInt(a_rep.fraction == b_rep.fraction) != 0) { + return .Equal; + } else if (a_rep.exp & sign_bit != b_rep.exp & sign_bit) { + // signs are different + if (@bitCast(i16, a_rep.exp) < @bitCast(i16, b_rep.exp)) { + return .Less; + } else { + return .Greater; + } + } else { + const a_fraction = a_rep.fraction | (@as(u80, a_rep.exp) << sig_bits); + const b_fraction = b_rep.fraction | (@as(u80, b_rep.exp) << sig_bits); + if (a_fraction < b_fraction) { + return .Less; + } else { + return .Greater; + } + } +} + +pub fn __lexf2(a: f80, b: f80) callconv(.C) i32 { + @setRuntimeSafety(builtin.is_test); + const float = cmp_f80(LE, a, b); + return @bitCast(i32, float); +} + +pub fn __gexf2(a: f80, b: f80) callconv(.C) i32 { + @setRuntimeSafety(builtin.is_test); + const float = cmp_f80(GE, a, b); + return @bitCast(i32, float); +} + +pub fn __eqxf2(a: f80, b: f80) callconv(.C) i32 { + return __lexf2(a, b); +} + +pub fn __ltxf2(a: f80, b: f80) callconv(.C) i32 { + return __lexf2(a, b); +} + +pub fn __nexf2(a: f80, b: f80) callconv(.C) i32 { + return __lexf2(a, b); +} + +pub fn __gtxf2(a: f80, b: f80) callconv(.C) i32 { + return __gexf2(a, b); +} + // Comparison between f128 pub fn __letf2(a: f128, b: f128) callconv(.C) i32 { diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp index b97f009d62..a62daf0d63 100644 --- a/src/stage1/codegen.cpp +++ b/src/stage1/codegen.cpp @@ -3234,6 +3234,49 @@ static LLVMValueRef get_soft_f80_bin_op_func(CodeGen *g, const char *name, int p return LLVMAddFunction(g->module, name, fn_type); } +enum SoftF80Icmp { + NONE, + EQ_ZERO, + NE_ZERO, + LE_ZERO, + EQ_NEG, + GE_ZERO, + EQ_ONE, +}; + +static LLVMValueRef add_f80_icmp(CodeGen *g, LLVMValueRef val, SoftF80Icmp kind) { + switch (kind) { + case NONE: + return val; + case EQ_ZERO: { + LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true); + return LLVMBuildICmp(g->builder, LLVMIntEQ, val, zero, ""); + } + case NE_ZERO: { + LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true); + return LLVMBuildICmp(g->builder, LLVMIntNE, val, zero, ""); + } + case LE_ZERO: { + LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true); + return LLVMBuildICmp(g->builder, LLVMIntSLE, val, zero, ""); + } + case EQ_NEG: { + LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, -1, true); + return LLVMBuildICmp(g->builder, LLVMIntEQ, val, zero, ""); + } + case GE_ZERO: { + LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 0, true); + return LLVMBuildICmp(g->builder, LLVMIntSGE, val, zero, ""); + } + case EQ_ONE: { + LLVMValueRef zero = LLVMConstInt(g->builtin_types.entry_i32->llvm_type, 1, true); + return LLVMBuildICmp(g->builder, LLVMIntEQ, val, zero, ""); + } + default: + zig_unreachable(); + } +} + static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable, Stage1AirInstBinOp *bin_op_instruction) { @@ -3249,6 +3292,7 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable, LLVMTypeRef return_type = g->builtin_types.entry_f80->llvm_type; int param_count = 2; const char *func_name; + SoftF80Icmp res_icmp = NONE; switch (op_id) { case IrBinOpInvalid: case IrBinOpArrayCat: @@ -3274,20 +3318,32 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable, case IrBinOpCmpEq: return_type = g->builtin_types.entry_i32->llvm_type; func_name = "__eqxf2"; + res_icmp = EQ_ZERO; break; case IrBinOpCmpNotEq: return_type = g->builtin_types.entry_i32->llvm_type; func_name = "__nexf2"; + res_icmp = NE_ZERO; break; case IrBinOpCmpLessOrEq: + return_type = g->builtin_types.entry_i32->llvm_type; + func_name = "__lexf2"; + res_icmp = LE_ZERO; + break; case IrBinOpCmpLessThan: return_type = g->builtin_types.entry_i32->llvm_type; func_name = "__lexf2"; + res_icmp = EQ_NEG; break; case IrBinOpCmpGreaterOrEq: + return_type = g->builtin_types.entry_i32->llvm_type; + func_name = "__gexf2"; + res_icmp = GE_ZERO; + break; case IrBinOpCmpGreaterThan: return_type = g->builtin_types.entry_i32->llvm_type; func_name = "__gexf2"; + res_icmp = EQ_ONE; break; case IrBinOpMaximum: func_name = "__fmaxx"; @@ -3338,8 +3394,11 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable, if (vector_len == 0) { LLVMValueRef params[2] = {op1_value, op2_value}; result = LLVMBuildCall(g->builder, func_ref, params, param_count, ""); + result = add_f80_icmp(g, result, res_icmp); } else { - result = build_alloca(g, op1->value->type, "", 0); + ZigType *alloca_ty = op1->value->type; + if (res_icmp != NONE) alloca_ty = get_vector_type(g, vector_len, g->builtin_types.entry_bool); + result = build_alloca(g, alloca_ty, "", 0); } LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type; @@ -3350,6 +3409,7 @@ static LLVMValueRef ir_render_soft_f80_bin_op(CodeGen *g, Stage1Air *executable, LLVMBuildExtractElement(g->builder, op2_value, index_value, ""), }; LLVMValueRef call_result = LLVMBuildCall(g->builder, func_ref, params, param_count, ""); + call_result = add_f80_icmp(g, call_result, res_icmp); LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""), call_result, index_value, ""); } From 6a736f0c8c187f2fcaeed4b60bf9e54aa719ae02 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Fri, 21 Jan 2022 21:49:02 +0200 Subject: [PATCH 4/6] compiler-rt: add add/sub for f80 --- lib/std/special/compiler_rt.zig | 4 + lib/std/special/compiler_rt/addXf3.zig | 171 +++++++++++++++++++++++++ 2 files changed, 175 insertions(+) diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index d83e94be8f..da21745cce 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -237,12 +237,16 @@ comptime { @export(__adddf3, .{ .name = "__adddf3", .linkage = linkage }); const __addtf3 = @import("compiler_rt/addXf3.zig").__addtf3; @export(__addtf3, .{ .name = "__addtf3", .linkage = linkage }); + const __addxf3 = @import("compiler_rt/addXf3.zig").__addxf3; + @export(__addxf3, .{ .name = "__addxf3", .linkage = linkage }); const __subsf3 = @import("compiler_rt/addXf3.zig").__subsf3; @export(__subsf3, .{ .name = "__subsf3", .linkage = linkage }); const __subdf3 = @import("compiler_rt/addXf3.zig").__subdf3; @export(__subdf3, .{ .name = "__subdf3", .linkage = linkage }); const __subtf3 = @import("compiler_rt/addXf3.zig").__subtf3; @export(__subtf3, .{ .name = "__subtf3", .linkage = linkage }); + const __subxf3 = @import("compiler_rt/addXf3.zig").__subxf3; + @export(__subxf3, .{ .name = "__subxf3", .linkage = linkage }); const __mulsf3 = @import("compiler_rt/mulXf3.zig").__mulsf3; @export(__mulsf3, .{ .name = "__mulsf3", .linkage = linkage }); diff --git a/lib/std/special/compiler_rt/addXf3.zig b/lib/std/special/compiler_rt/addXf3.zig index 4c74110310..41ff00e95d 100644 --- a/lib/std/special/compiler_rt/addXf3.zig +++ b/lib/std/special/compiler_rt/addXf3.zig @@ -225,6 +225,177 @@ fn addXf3(comptime T: type, a: T, b: T) T { return @bitCast(T, result); } +fn normalize_f80(exp: *i32, significand: *u80) void { + const shift = @clz(u64, @truncate(u64, significand.*)); + significand.* = (significand.* << shift); + exp.* += -@as(i8, shift); +} + +pub fn __addxf3(a: f80, b: f80) callconv(.C) f80 { + var a_rep align(16) = @ptrCast(*const std.math.F80Repr, &a).*; + var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*; + var a_exp: i32 = a_rep.exp & 0x7FFF; + var b_exp: i32 = b_rep.exp & 0x7FFF; + + const significand_bits = std.math.floatMantissaBits(f80); + const int_bit = 0x8000000000000000; + const significand_mask = 0x7FFFFFFFFFFFFFFF; + const qnan_bit = 0xC000000000000000; + const max_exp = 0x7FFF; + const sign_bit = 0x8000; + + // Detect if a or b is infinity, or NaN. + if (a_exp == max_exp) { + if (a_rep.fraction ^ int_bit == 0) { + if (b_exp == max_exp and (b_rep.fraction ^ int_bit == 0)) { + // +/-infinity + -/+infinity = qNaN + return std.math.qnan_f80; + } + // +/-infinity + anything = +/- infinity + return a; + } else { + std.debug.assert(a_rep.fraction & significand_mask != 0); + // NaN + anything = qNaN + a_rep.fraction |= qnan_bit; + return @ptrCast(*const f80, &a_rep).*; + } + } + if (b_exp == max_exp) { + if (b_rep.fraction ^ int_bit == 0) { + // anything + +/-infinity = +/-infinity + return b; + } else { + std.debug.assert(b_rep.fraction & significand_mask != 0); + // anything + NaN = qNaN + b_rep.fraction |= qnan_bit; + return @ptrCast(*const f80, &b_rep).*; + } + } + + const a_zero = (a_rep.fraction | @bitCast(u32, a_exp)) == 0; + const b_zero = (b_rep.fraction | @bitCast(u32, b_exp)) == 0; + if (a_zero) { + // zero + anything = anything + if (b_zero) { + // but we need to get the sign right for zero + zero + a_rep.exp &= b_rep.exp; + return @ptrCast(*const f80, &a_rep).*; + } else { + return b; + } + } else if (b_zero) { + // anything + zero = anything + return a; + } + + var a_int: u80 = a_rep.fraction | (@as(u80, a_rep.exp & max_exp) << significand_bits); + var b_int: u80 = b_rep.fraction | (@as(u80, b_rep.exp & max_exp) << significand_bits); + + // Swap a and b if necessary so that a has the larger absolute value. + if (b_int > a_int) { + const temp = a_rep; + a_rep = b_rep; + b_rep = temp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + a_exp = a_rep.exp & max_exp; + b_exp = b_rep.exp & max_exp; + a_int = a_rep.fraction; + b_int = b_rep.fraction; + + // Normalize any denormals, and adjust the exponent accordingly. + normalize_f80(&a_exp, &a_int); + normalize_f80(&b_exp, &b_int); + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction; otherwise addition. + const result_sign = a_rep.exp & sign_bit; + const subtraction = (a_rep.exp ^ b_rep.exp) & sign_bit != 0; + + // Shift the significands to give us round, guard and sticky, and or in the + // implicit significand bit. (If we fell through from the denormal path it + // was already set by normalize( ), but setting it twice won't hurt + // anything.) + a_int = a_int << 3; + b_int = b_int << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + const @"align" = @intCast(u80, a_exp - b_exp); + if (@"align" != 0) { + if (@"align" < 80) { + const sticky = if (b_int << @intCast(u7, 80 - @"align") != 0) @as(u80, 1) else 0; + b_int = (b_int >> @truncate(u7, @"align")) | sticky; + } else { + b_int = 1; // sticky; b is known to be non-zero. + } + } + if (subtraction) { + a_int -= b_int; + // If a == -b, return +zero. + if (a_int == 0) return 0.0; + + // If partial cancellation occurred, we need to left-shift the result + // and adjust the exponent: + if (a_int < int_bit << 3) { + const shift = @intCast(i32, @clz(u80, a_int)) - @intCast(i32, @clz(u80, int_bit << 3)); + a_int <<= @intCast(u7, shift); + a_exp -= shift; + } + } else { // addition + a_int += b_int; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent: + if (a_int & (int_bit << 4) != 0) { + const sticky = a_int & 1; + a_int = a_int >> 1 | sticky; + a_exp += 1; + } + } + + // If we have overflowed the type, return +/- infinity: + if (a_exp >= max_exp) { + a_rep.exp = max_exp | result_sign; + a_rep.fraction = int_bit; // integer bit is set for +/-inf + return @ptrCast(*const f80, &a_rep).*; + } + + if (a_exp <= 0) { + // Result is denormal before rounding; the exponent is zero and we + // need to shift the significand. + const shift = @intCast(u80, 1 - a_exp); + const sticky = if (a_int << @intCast(u7, 80 - shift) != 0) @as(u1, 1) else 0; + a_int = a_int >> @intCast(u7, shift | sticky); + a_exp = 0; + } + + // Low three bits are round, guard, and sticky. + const round_guard_sticky = @truncate(u3, a_int); + + // Shift the significand into place. + a_int = @truncate(u64, a_int >> 3); + + // // Insert the exponent and sign. + a_int |= (@intCast(u80, a_exp) | result_sign) << significand_bits; + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + if (round_guard_sticky > 0x4) a_int += 1; + if (round_guard_sticky == 0x4) a_int += a_int & 1; + + a_rep.fraction = @truncate(u64, a_int); + a_rep.exp = @truncate(u16, a_int >> significand_bits); + return @ptrCast(*const f80, &a_rep).*; +} + +pub fn __subxf3(a: f80, b: f80) callconv(.C) f80 { + var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*; + b_rep.exp ^= 0x8000; + return __addxf3(a, @ptrCast(*const f80, &b_rep).*); +} + test { _ = @import("addXf3_test.zig"); } From b2f84c6714c30589c35fce72bab530cef4b05eca Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Sat, 22 Jan 2022 14:07:15 +0200 Subject: [PATCH 5/6] stage1: implement f80 negation on non native targets --- src/stage1/codegen.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp index a62daf0d63..1b11f32397 100644 --- a/src/stage1/codegen.cpp +++ b/src/stage1/codegen.cpp @@ -4106,12 +4106,47 @@ static LLVMValueRef ir_render_binary_not(CodeGen *g, Stage1Air *executable, return LLVMBuildNot(g->builder, operand, ""); } +static LLVMValueRef ir_gen_soft_f80_neg(CodeGen *g, ZigType *op_type, LLVMValueRef operand) { + uint32_t vector_len = op_type->id == ZigTypeIdVector ? op_type->data.vector.len : 0; + + uint64_t buf[2] = {0, 0}; + if (g->is_big_endian != native_is_big_endian) { + buf[1] = 0x8000000000000000; + } else { + buf[1] = 0x8000; + } + LLVMValueRef sign_mask = LLVMConstIntOfArbitraryPrecision(LLVMInt128Type(), 2, buf); + + LLVMValueRef result; + if (vector_len == 0) { + result = LLVMBuildXor(g->builder, operand, sign_mask, ""); + } else { + result = build_alloca(g, op_type, "", 0); + } + + LLVMTypeRef usize_ref = g->builtin_types.entry_usize->llvm_type; + for (uint32_t i = 0; i < vector_len; i++) { + LLVMValueRef index_value = LLVMConstInt(usize_ref, i, false); + LLVMValueRef xor_operand = LLVMBuildExtractElement(g->builder, operand, index_value, ""); + LLVMValueRef xor_result = LLVMBuildXor(g->builder, xor_operand, sign_mask, ""); + LLVMBuildInsertElement(g->builder, LLVMBuildLoad(g->builder, result, ""), + xor_result, index_value, ""); + } + if (vector_len != 0) { + result = LLVMBuildLoad(g->builder, result, ""); + } + return result; +} + static LLVMValueRef ir_gen_negation(CodeGen *g, Stage1AirInst *inst, Stage1AirInst *operand, bool wrapping) { LLVMValueRef llvm_operand = ir_llvm_value(g, operand); ZigType *operand_type = operand->value->type; ZigType *scalar_type = (operand_type->id == ZigTypeIdVector) ? operand_type->data.vector.elem_type : operand_type; + if (scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) + return ir_gen_soft_f80_neg(g, operand_type, llvm_operand); + if (scalar_type->id == ZigTypeIdFloat) { ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, inst)); return LLVMBuildFNeg(g->builder, llvm_operand, ""); From 5a7d43df23664385d6841ef98b17ff9447be1ec6 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Sat, 29 Jan 2022 17:30:18 +0200 Subject: [PATCH 6/6] stage1: make f80 always size 16, align 16 --- lib/std/math.zig | 2 ++ src/stage1/codegen.cpp | 15 +++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/std/math.zig b/lib/std/math.zig index 43ad49889d..6802d420fd 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -46,8 +46,10 @@ pub const f128_toint = 1.0 / f128_epsilon; pub const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct { fraction: u64, exp: u16, + _pad: u32 = undefined, } else extern struct { exp: u16, + _pad: u32 = undefined, // TODO verify compatibility with hardware fraction: u64, }; diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp index 1b11f32397..96576f1721 100644 --- a/src/stage1/codegen.cpp +++ b/src/stage1/codegen.cpp @@ -8197,6 +8197,7 @@ static LLVMValueRef gen_const_val(CodeGen *g, ZigValue *const_val, const char *n buf[1] = tmp; #endif LLVMValueRef as_i128 = LLVMConstIntOfArbitraryPrecision(LLVMInt128Type(), 2, buf); + if (!target_has_f80(g->zig_target)) return as_i128; LLVMValueRef as_int = LLVMConstTrunc(as_i128, LLVMIntType(80)); return LLVMConstBitCast(as_int, get_llvm_type(g, type_entry)); } @@ -9420,13 +9421,15 @@ static void define_builtin_types(CodeGen *g) { add_fp_entry(g, "f64", 64, LLVMDoubleType(), &g->builtin_types.entry_f64); add_fp_entry(g, "f128", 128, LLVMFP128Type(), &g->builtin_types.entry_f128); - if (target_has_f80(g->zig_target)) { - add_fp_entry(g, "f80", 80, LLVMX86FP80Type(), &g->builtin_types.entry_f80); - } else { + { ZigType *entry = new_type_table_entry(ZigTypeIdFloat); - entry->llvm_type = get_int_type(g, false, 128)->llvm_type; - entry->size_in_bits = 8 * LLVMStoreSizeOfType(g->target_data_ref, entry->llvm_type); - entry->abi_size = LLVMABISizeOfType(g->target_data_ref, entry->llvm_type); + if (target_has_f80(g->zig_target)) { + entry->llvm_type = LLVMX86FP80Type(); + } else { + entry->llvm_type = get_int_type(g, false, 128)->llvm_type; + } + entry->size_in_bits = 8 * 16; + entry->abi_size = 16; entry->abi_align = 16; buf_init_from_str(&entry->name, "f80"); entry->data.floating.bit_count = 80;