mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 04:48:20 +00:00
Merge pull request #8542 from LemonBoy/floating-point-is-hard-my-dude
Floating point is hard my dude
This commit is contained in:
commit
1e06a74348
@ -23,6 +23,10 @@ pub fn __extendhfsf2(a: u16) callconv(.C) f32 {
|
||||
return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f32, f16, a });
|
||||
}
|
||||
|
||||
pub fn __extendhftf2(a: u16) callconv(.C) f128 {
|
||||
return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f128, f16, a });
|
||||
}
|
||||
|
||||
pub fn __aeabi_h2f(arg: u16) callconv(.AAPCS) f32 {
|
||||
@setRuntimeSafety(false);
|
||||
return @call(.{ .modifier = .always_inline }, __extendhfsf2, .{arg});
|
||||
|
||||
@ -4,9 +4,10 @@
|
||||
// The MIT license requires this copyright notice to be included in all copies
|
||||
// and substantial portions of the software.
|
||||
const builtin = @import("builtin");
|
||||
const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
|
||||
const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2;
|
||||
const __extendhftf2 = @import("extendXfYf2.zig").__extendhftf2;
|
||||
const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2;
|
||||
const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
|
||||
|
||||
fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) void {
|
||||
const x = __extenddftf2(a);
|
||||
@ -161,3 +162,49 @@ fn makeNaN32(rand: u32) f32 {
|
||||
fn makeInf32() f32 {
|
||||
return @bitCast(f32, @as(u32, 0x7f800000));
|
||||
}
|
||||
|
||||
fn test__extendhftf2(a: u16, expectedHi: u64, expectedLo: u64) void {
|
||||
const x = __extendhftf2(a);
|
||||
|
||||
const rep = @bitCast(u128, x);
|
||||
const hi = @intCast(u64, rep >> 64);
|
||||
const lo = @truncate(u64, rep);
|
||||
|
||||
if (hi == expectedHi and lo == expectedLo)
|
||||
return;
|
||||
|
||||
// test other possible NaN representation(signal NaN)
|
||||
if (expectedHi == 0x7fff800000000000 and expectedLo == 0x0) {
|
||||
if ((hi & 0x7fff000000000000) == 0x7fff000000000000 and
|
||||
((hi & 0xffffffffffff) > 0 or lo > 0))
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@panic("__extendhftf2 test failure");
|
||||
}
|
||||
|
||||
test "extendhftf2" {
|
||||
// qNaN
|
||||
test__extendhftf2(0x7e00, 0x7fff800000000000, 0x0);
|
||||
// NaN
|
||||
test__extendhftf2(0x7d00, 0x7fff400000000000, 0x0);
|
||||
// inf
|
||||
test__extendhftf2(0x7c00, 0x7fff000000000000, 0x0);
|
||||
test__extendhftf2(0xfc00, 0xffff000000000000, 0x0);
|
||||
// zero
|
||||
test__extendhftf2(0x0000, 0x0000000000000000, 0x0);
|
||||
test__extendhftf2(0x8000, 0x8000000000000000, 0x0);
|
||||
// denormal
|
||||
test__extendhftf2(0x0010, 0x3feb000000000000, 0x0);
|
||||
test__extendhftf2(0x0001, 0x3fe7000000000000, 0x0);
|
||||
test__extendhftf2(0x8001, 0xbfe7000000000000, 0x0);
|
||||
|
||||
// pi
|
||||
test__extendhftf2(0x4248, 0x4000920000000000, 0x0);
|
||||
test__extendhftf2(0xc248, 0xc000920000000000, 0x0);
|
||||
|
||||
test__extendhftf2(0x508c, 0x4004230000000000, 0x0);
|
||||
test__extendhftf2(0x1bb7, 0x3ff6edc000000000, 0x0);
|
||||
}
|
||||
|
||||
@ -13,6 +13,10 @@ pub fn __truncdfhf2(a: f64) callconv(.C) u16 {
|
||||
return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f64, a }));
|
||||
}
|
||||
|
||||
pub fn __trunctfhf2(a: f128) callconv(.C) u16 {
|
||||
return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f128, a }));
|
||||
}
|
||||
|
||||
pub fn __trunctfsf2(a: f128) callconv(.C) f32 {
|
||||
return @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f32, f128, a });
|
||||
}
|
||||
@ -122,7 +126,7 @@ fn truncXfYf2(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
|
||||
if (shift > srcSigBits) {
|
||||
absResult = 0;
|
||||
} else {
|
||||
const sticky: src_rep_t = significand << @intCast(SrcShift, srcBits - shift);
|
||||
const sticky: src_rep_t = @boolToInt(significand << @intCast(SrcShift, srcBits - shift) != 0);
|
||||
const denormalizedSignificand: src_rep_t = significand >> @intCast(SrcShift, shift) | sticky;
|
||||
absResult = @intCast(dst_rep_t, denormalizedSignificand >> (srcSigBits - dstSigBits));
|
||||
const roundBits: src_rep_t = denormalizedSignificand & roundMask;
|
||||
|
||||
@ -242,3 +242,59 @@ test "truncdfsf2" {
|
||||
// huge number becomes inf
|
||||
test__truncdfsf2(340282366920938463463374607431768211456.0, 0x7f800000);
|
||||
}
|
||||
|
||||
const __trunctfhf2 = @import("truncXfYf2.zig").__trunctfhf2;
|
||||
|
||||
fn test__trunctfhf2(a: f128, expected: u16) void {
|
||||
const x = __trunctfhf2(a);
|
||||
|
||||
const rep = @bitCast(u16, x);
|
||||
if (rep == expected) {
|
||||
return;
|
||||
}
|
||||
|
||||
@import("std").debug.warn("got 0x{x} wanted 0x{x}\n", .{ rep, expected });
|
||||
|
||||
@panic("__trunctfhf2 test failure");
|
||||
}
|
||||
|
||||
test "trunctfhf2" {
|
||||
// qNaN
|
||||
test__trunctfhf2(@bitCast(f128, @as(u128, 0x7fff8000000000000000000000000000)), 0x7e00);
|
||||
// NaN
|
||||
test__trunctfhf2(@bitCast(f128, @as(u128, 0x7fff0000000000000000000000000001)), 0x7e00);
|
||||
// inf
|
||||
test__trunctfhf2(@bitCast(f128, @as(u128, 0x7fff0000000000000000000000000000)), 0x7c00);
|
||||
test__trunctfhf2(-@bitCast(f128, @as(u128, 0x7fff0000000000000000000000000000)), 0xfc00);
|
||||
// zero
|
||||
test__trunctfhf2(0.0, 0x0);
|
||||
test__trunctfhf2(-0.0, 0x8000);
|
||||
|
||||
test__trunctfhf2(3.1415926535, 0x4248);
|
||||
test__trunctfhf2(-3.1415926535, 0xc248);
|
||||
test__trunctfhf2(0x1.987124876876324p+100, 0x7c00);
|
||||
test__trunctfhf2(0x1.987124876876324p+12, 0x6e62);
|
||||
test__trunctfhf2(0x1.0p+0, 0x3c00);
|
||||
test__trunctfhf2(0x1.0p-14, 0x0400);
|
||||
// denormal
|
||||
test__trunctfhf2(0x1.0p-20, 0x0010);
|
||||
test__trunctfhf2(0x1.0p-24, 0x0001);
|
||||
test__trunctfhf2(-0x1.0p-24, 0x8001);
|
||||
test__trunctfhf2(0x1.5p-25, 0x0001);
|
||||
// and back to zero
|
||||
test__trunctfhf2(0x1.0p-25, 0x0000);
|
||||
test__trunctfhf2(-0x1.0p-25, 0x8000);
|
||||
// max (precise)
|
||||
test__trunctfhf2(65504.0, 0x7bff);
|
||||
// max (rounded)
|
||||
test__trunctfhf2(65519.0, 0x7bff);
|
||||
// max (to +inf)
|
||||
test__trunctfhf2(65520.0, 0x7c00);
|
||||
test__trunctfhf2(65536.0, 0x7c00);
|
||||
test__trunctfhf2(-65520.0, 0xfc00);
|
||||
|
||||
test__trunctfhf2(0x1.23a2abb4a2ddee355f36789abcdep+5, 0x508f);
|
||||
test__trunctfhf2(0x1.e3d3c45bd3abfd98b76a54cc321fp-9, 0x1b8f);
|
||||
test__trunctfhf2(0x1.234eebb5faa678f4488693abcdefp+453, 0x7c00);
|
||||
test__trunctfhf2(0x1.edcba9bb8c76a5a43dd21f334634p-43, 0x0);
|
||||
}
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
#include "bigint.hpp"
|
||||
#include "buffer.hpp"
|
||||
#include "softfloat.hpp"
|
||||
#include "softfloat_ext.hpp"
|
||||
#include "parse_f128.h"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
@ -60,9 +61,7 @@ void bigfloat_init_bigint(BigFloat *dest, const BigInt *op) {
|
||||
|
||||
if (i == 0) {
|
||||
if (op->is_negative) {
|
||||
float128_t zero_f128;
|
||||
ui32_to_f128M(0, &zero_f128);
|
||||
f128M_sub(&zero_f128, &dest->value, &dest->value);
|
||||
f128M_neg(&dest->value, &dest->value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -89,9 +88,7 @@ void bigfloat_add(BigFloat *dest, const BigFloat *op1, const BigFloat *op2) {
|
||||
}
|
||||
|
||||
void bigfloat_negate(BigFloat *dest, const BigFloat *op) {
|
||||
float128_t zero_f128;
|
||||
ui32_to_f128M(0, &zero_f128);
|
||||
f128M_sub(&zero_f128, &op->value, &dest->value);
|
||||
f128M_neg(&op->value, &dest->value);
|
||||
}
|
||||
|
||||
void bigfloat_sub(BigFloat *dest, const BigFloat *op1, const BigFloat *op2) {
|
||||
|
||||
@ -7436,7 +7436,10 @@ static LLVMValueRef gen_const_val(CodeGen *g, ZigValue *const_val, const char *n
|
||||
case ZigTypeIdFloat:
|
||||
switch (type_entry->data.floating.bit_count) {
|
||||
case 16:
|
||||
return LLVMConstReal(get_llvm_type(g, type_entry), zig_f16_to_double(const_val->data.x_f16));
|
||||
{
|
||||
LLVMValueRef as_int = LLVMConstInt(LLVMInt16Type(), const_val->data.x_f16.v, false);
|
||||
return LLVMConstBitCast(as_int, get_llvm_type(g, type_entry));
|
||||
}
|
||||
case 32:
|
||||
return LLVMConstReal(get_llvm_type(g, type_entry), const_val->data.x_f32);
|
||||
case 64:
|
||||
|
||||
@ -11363,11 +11363,8 @@ static void float_negate(ZigValue *out_val, ZigValue *op) {
|
||||
} else if (op->type->id == ZigTypeIdFloat) {
|
||||
switch (op->type->data.floating.bit_count) {
|
||||
case 16:
|
||||
{
|
||||
const float16_t zero = zig_double_to_f16(0);
|
||||
out_val->data.x_f16 = f16_sub(zero, op->data.x_f16);
|
||||
return;
|
||||
}
|
||||
out_val->data.x_f16 = f16_neg(op->data.x_f16);
|
||||
return;
|
||||
case 32:
|
||||
out_val->data.x_f32 = -op->data.x_f32;
|
||||
return;
|
||||
@ -11375,9 +11372,7 @@ static void float_negate(ZigValue *out_val, ZigValue *op) {
|
||||
out_val->data.x_f64 = -op->data.x_f64;
|
||||
return;
|
||||
case 128:
|
||||
float128_t zero_f128;
|
||||
ui32_to_f128M(0, &zero_f128);
|
||||
f128M_sub(&zero_f128, &op->data.x_f128, &out_val->data.x_f128);
|
||||
f128M_neg(&op->data.x_f128, &out_val->data.x_f128);
|
||||
return;
|
||||
default:
|
||||
zig_unreachable();
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#include "softfloat_ext.hpp"
|
||||
|
||||
extern "C" {
|
||||
#include "platform.h"
|
||||
#include "internals.h"
|
||||
#include "softfloat.h"
|
||||
}
|
||||
|
||||
@ -22,4 +24,15 @@ void f128M_trunc(const float128_t *aPtr, float128_t *zPtr) {
|
||||
} else {
|
||||
f128M_roundToInt(aPtr, softfloat_round_min, false, zPtr);
|
||||
}
|
||||
}
|
||||
|
||||
float16_t f16_neg(const float16_t a) {
|
||||
union ui16_f16 uZ;
|
||||
uZ.ui = a.v ^ (UINT16_C(1) << 15);
|
||||
return uZ.f;
|
||||
}
|
||||
|
||||
void f128M_neg(const float128_t *aPtr, float128_t *zPtr) {
|
||||
zPtr->v[indexWord(2,1)] = aPtr->v[indexWord(2,1)] ^ (UINT64_C(1) << 63);
|
||||
zPtr->v[indexWord(2,0)] = aPtr->v[indexWord(2,0)];
|
||||
}
|
||||
@ -5,5 +5,8 @@
|
||||
|
||||
void f128M_abs(const float128_t *aPtr, float128_t *zPtr);
|
||||
void f128M_trunc(const float128_t *aPtr, float128_t *zPtr);
|
||||
void f128M_neg(const float128_t *aPtr, float128_t *zPtr);
|
||||
|
||||
float16_t f16_neg(const float16_t a);
|
||||
|
||||
#endif
|
||||
@ -843,3 +843,20 @@ test "compare undefined literal with comptime_int" {
|
||||
x = true;
|
||||
expect(x);
|
||||
}
|
||||
|
||||
test "signed zeros are represented properly" {
|
||||
const S = struct {
|
||||
fn doTheTest() void {
|
||||
inline for ([_]type{ f16, f32, f64, f128 }) |T| {
|
||||
const ST = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);
|
||||
var as_fp_val = -@as(T, 0.0);
|
||||
var as_uint_val = @bitCast(ST, as_fp_val);
|
||||
// Ensure the sign bit is set.
|
||||
expect(as_uint_val >> (@typeInfo(T).Float.bits - 1) == 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
S.doTheTest();
|
||||
comptime S.doTheTest();
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user