mirror of
https://github.com/ziglang/zig.git
synced 2025-12-12 01:03:13 +00:00
We already have a LICENSE file that covers the Zig Standard Library. We no longer need to remind everyone that the license is MIT in every single file. Previously this was introduced to clarify the situation for a fork of Zig that made Zig's LICENSE file harder to find, and replaced it with their own license that required annual payments to their company. However that fork now appears to be dead. So there is no need to reinforce the copyright notice in every single file.
300 lines
12 KiB
Zig
300 lines
12 KiB
Zig
// Ported from:
|
|
//
|
|
// https://github.com/llvm/llvm-project/blob/2ffb1b0413efa9a24eb3c49e710e36f92e2cb50b/compiler-rt/lib/builtins/fp_mul_impl.inc
|
|
|
|
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
const compiler_rt = @import("../compiler_rt.zig");
|
|
|
|
pub fn __multf3(a: f128, b: f128) callconv(.C) f128 {
|
|
return mulXf3(f128, a, b);
|
|
}
|
|
pub fn __muldf3(a: f64, b: f64) callconv(.C) f64 {
|
|
return mulXf3(f64, a, b);
|
|
}
|
|
pub fn __mulsf3(a: f32, b: f32) callconv(.C) f32 {
|
|
return mulXf3(f32, a, b);
|
|
}
|
|
|
|
pub fn __aeabi_fmul(a: f32, b: f32) callconv(.C) f32 {
|
|
@setRuntimeSafety(false);
|
|
return @call(.{ .modifier = .always_inline }, __mulsf3, .{ a, b });
|
|
}
|
|
|
|
pub fn __aeabi_dmul(a: f64, b: f64) callconv(.C) f64 {
|
|
@setRuntimeSafety(false);
|
|
return @call(.{ .modifier = .always_inline }, __muldf3, .{ a, b });
|
|
}
|
|
|
|
fn mulXf3(comptime T: type, a: T, b: T) T {
|
|
@setRuntimeSafety(builtin.is_test);
|
|
const typeWidth = @typeInfo(T).Float.bits;
|
|
const Z = std.meta.Int(.unsigned, typeWidth);
|
|
|
|
const significandBits = std.math.floatMantissaBits(T);
|
|
const exponentBits = std.math.floatExponentBits(T);
|
|
|
|
const signBit = (@as(Z, 1) << (significandBits + exponentBits));
|
|
const maxExponent = ((1 << exponentBits) - 1);
|
|
const exponentBias = (maxExponent >> 1);
|
|
|
|
const implicitBit = (@as(Z, 1) << significandBits);
|
|
const quietBit = implicitBit >> 1;
|
|
const significandMask = implicitBit - 1;
|
|
|
|
const absMask = signBit - 1;
|
|
const exponentMask = absMask ^ significandMask;
|
|
const qnanRep = exponentMask | quietBit;
|
|
const infRep = @bitCast(Z, std.math.inf(T));
|
|
|
|
const aExponent = @truncate(u32, (@bitCast(Z, a) >> significandBits) & maxExponent);
|
|
const bExponent = @truncate(u32, (@bitCast(Z, b) >> significandBits) & maxExponent);
|
|
const productSign: Z = (@bitCast(Z, a) ^ @bitCast(Z, b)) & signBit;
|
|
|
|
var aSignificand: Z = @bitCast(Z, a) & significandMask;
|
|
var bSignificand: Z = @bitCast(Z, b) & significandMask;
|
|
var scale: i32 = 0;
|
|
|
|
// Detect if a or b is zero, denormal, infinity, or NaN.
|
|
if (aExponent -% 1 >= maxExponent -% 1 or bExponent -% 1 >= maxExponent -% 1) {
|
|
const aAbs: Z = @bitCast(Z, a) & absMask;
|
|
const bAbs: Z = @bitCast(Z, b) & absMask;
|
|
|
|
// NaN * anything = qNaN
|
|
if (aAbs > infRep) return @bitCast(T, @bitCast(Z, a) | quietBit);
|
|
// anything * NaN = qNaN
|
|
if (bAbs > infRep) return @bitCast(T, @bitCast(Z, b) | quietBit);
|
|
|
|
if (aAbs == infRep) {
|
|
// infinity * non-zero = +/- infinity
|
|
if (bAbs != 0) {
|
|
return @bitCast(T, aAbs | productSign);
|
|
} else {
|
|
// infinity * zero = NaN
|
|
return @bitCast(T, qnanRep);
|
|
}
|
|
}
|
|
|
|
if (bAbs == infRep) {
|
|
//? non-zero * infinity = +/- infinity
|
|
if (aAbs != 0) {
|
|
return @bitCast(T, bAbs | productSign);
|
|
} else {
|
|
// zero * infinity = NaN
|
|
return @bitCast(T, qnanRep);
|
|
}
|
|
}
|
|
|
|
// zero * anything = +/- zero
|
|
if (aAbs == 0) return @bitCast(T, productSign);
|
|
// anything * zero = +/- zero
|
|
if (bAbs == 0) return @bitCast(T, productSign);
|
|
|
|
// one or both of a or b is denormal, the other (if applicable) is a
|
|
// normal number. Renormalize one or both of a and b, and set scale to
|
|
// include the necessary exponent adjustment.
|
|
if (aAbs < implicitBit) scale += normalize(T, &aSignificand);
|
|
if (bAbs < implicitBit) scale += normalize(T, &bSignificand);
|
|
}
|
|
|
|
// Or in the implicit significand bit. (If we fell through from the
|
|
// denormal path it was already set by normalize( ), but setting it twice
|
|
// won't hurt anything.)
|
|
aSignificand |= implicitBit;
|
|
bSignificand |= implicitBit;
|
|
|
|
// Get the significand of a*b. Before multiplying the significands, shift
|
|
// one of them left to left-align it in the field. Thus, the product will
|
|
// have (exponentBits + 2) integral digits, all but two of which must be
|
|
// zero. Normalizing this result is just a conditional left-shift by one
|
|
// and bumping the exponent accordingly.
|
|
var productHi: Z = undefined;
|
|
var productLo: Z = undefined;
|
|
wideMultiply(Z, aSignificand, bSignificand << exponentBits, &productHi, &productLo);
|
|
|
|
var productExponent: i32 = @bitCast(i32, aExponent +% bExponent) -% exponentBias +% scale;
|
|
|
|
// Normalize the significand, adjust exponent if needed.
|
|
if ((productHi & implicitBit) != 0) {
|
|
productExponent +%= 1;
|
|
} else {
|
|
productHi = (productHi << 1) | (productLo >> (typeWidth - 1));
|
|
productLo = productLo << 1;
|
|
}
|
|
|
|
// If we have overflowed the type, return +/- infinity.
|
|
if (productExponent >= maxExponent) return @bitCast(T, infRep | productSign);
|
|
|
|
if (productExponent <= 0) {
|
|
// Result is denormal before rounding
|
|
//
|
|
// If the result is so small that it just underflows to zero, return
|
|
// a zero of the appropriate sign. Mathematically there is no need to
|
|
// handle this case separately, but we make it a special case to
|
|
// simplify the shift logic.
|
|
const shift: u32 = @truncate(u32, @as(Z, 1) -% @bitCast(u32, productExponent));
|
|
if (shift >= typeWidth) return @bitCast(T, productSign);
|
|
|
|
// Otherwise, shift the significand of the result so that the round
|
|
// bit is the high bit of productLo.
|
|
wideRightShiftWithSticky(Z, &productHi, &productLo, shift);
|
|
} else {
|
|
// Result is normal before rounding; insert the exponent.
|
|
productHi &= significandMask;
|
|
productHi |= @as(Z, @bitCast(u32, productExponent)) << significandBits;
|
|
}
|
|
|
|
// Insert the sign of the result:
|
|
productHi |= productSign;
|
|
|
|
// Final rounding. The final result may overflow to infinity, or underflow
|
|
// to zero, but those are the correct results in those cases. We use the
|
|
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
|
|
if (productLo > signBit) productHi +%= 1;
|
|
if (productLo == signBit) productHi +%= productHi & 1;
|
|
return @bitCast(T, productHi);
|
|
}
|
|
|
|
fn wideMultiply(comptime Z: type, a: Z, b: Z, hi: *Z, lo: *Z) void {
|
|
@setRuntimeSafety(builtin.is_test);
|
|
switch (Z) {
|
|
u32 => {
|
|
// 32x32 --> 64 bit multiply
|
|
const product = @as(u64, a) * @as(u64, b);
|
|
hi.* = @truncate(u32, product >> 32);
|
|
lo.* = @truncate(u32, product);
|
|
},
|
|
u64 => {
|
|
const S = struct {
|
|
fn loWord(x: u64) u64 {
|
|
return @truncate(u32, x);
|
|
}
|
|
fn hiWord(x: u64) u64 {
|
|
return @truncate(u32, x >> 32);
|
|
}
|
|
};
|
|
// 64x64 -> 128 wide multiply for platforms that don't have such an operation;
|
|
// many 64-bit platforms have this operation, but they tend to have hardware
|
|
// floating-point, so we don't bother with a special case for them here.
|
|
// Each of the component 32x32 -> 64 products
|
|
const plolo: u64 = S.loWord(a) * S.loWord(b);
|
|
const plohi: u64 = S.loWord(a) * S.hiWord(b);
|
|
const philo: u64 = S.hiWord(a) * S.loWord(b);
|
|
const phihi: u64 = S.hiWord(a) * S.hiWord(b);
|
|
// Sum terms that contribute to lo in a way that allows us to get the carry
|
|
const r0: u64 = S.loWord(plolo);
|
|
const r1: u64 = S.hiWord(plolo) +% S.loWord(plohi) +% S.loWord(philo);
|
|
lo.* = r0 +% (r1 << 32);
|
|
// Sum terms contributing to hi with the carry from lo
|
|
hi.* = S.hiWord(plohi) +% S.hiWord(philo) +% S.hiWord(r1) +% phihi;
|
|
},
|
|
u128 => {
|
|
const Word_LoMask = @as(u64, 0x00000000ffffffff);
|
|
const Word_HiMask = @as(u64, 0xffffffff00000000);
|
|
const Word_FullMask = @as(u64, 0xffffffffffffffff);
|
|
const S = struct {
|
|
fn Word_1(x: u128) u64 {
|
|
return @truncate(u32, x >> 96);
|
|
}
|
|
fn Word_2(x: u128) u64 {
|
|
return @truncate(u32, x >> 64);
|
|
}
|
|
fn Word_3(x: u128) u64 {
|
|
return @truncate(u32, x >> 32);
|
|
}
|
|
fn Word_4(x: u128) u64 {
|
|
return @truncate(u32, x);
|
|
}
|
|
};
|
|
// 128x128 -> 256 wide multiply for platforms that don't have such an operation;
|
|
// many 64-bit platforms have this operation, but they tend to have hardware
|
|
// floating-point, so we don't bother with a special case for them here.
|
|
|
|
const product11: u64 = S.Word_1(a) * S.Word_1(b);
|
|
const product12: u64 = S.Word_1(a) * S.Word_2(b);
|
|
const product13: u64 = S.Word_1(a) * S.Word_3(b);
|
|
const product14: u64 = S.Word_1(a) * S.Word_4(b);
|
|
const product21: u64 = S.Word_2(a) * S.Word_1(b);
|
|
const product22: u64 = S.Word_2(a) * S.Word_2(b);
|
|
const product23: u64 = S.Word_2(a) * S.Word_3(b);
|
|
const product24: u64 = S.Word_2(a) * S.Word_4(b);
|
|
const product31: u64 = S.Word_3(a) * S.Word_1(b);
|
|
const product32: u64 = S.Word_3(a) * S.Word_2(b);
|
|
const product33: u64 = S.Word_3(a) * S.Word_3(b);
|
|
const product34: u64 = S.Word_3(a) * S.Word_4(b);
|
|
const product41: u64 = S.Word_4(a) * S.Word_1(b);
|
|
const product42: u64 = S.Word_4(a) * S.Word_2(b);
|
|
const product43: u64 = S.Word_4(a) * S.Word_3(b);
|
|
const product44: u64 = S.Word_4(a) * S.Word_4(b);
|
|
|
|
const sum0: u128 = @as(u128, product44);
|
|
const sum1: u128 = @as(u128, product34) +%
|
|
@as(u128, product43);
|
|
const sum2: u128 = @as(u128, product24) +%
|
|
@as(u128, product33) +%
|
|
@as(u128, product42);
|
|
const sum3: u128 = @as(u128, product14) +%
|
|
@as(u128, product23) +%
|
|
@as(u128, product32) +%
|
|
@as(u128, product41);
|
|
const sum4: u128 = @as(u128, product13) +%
|
|
@as(u128, product22) +%
|
|
@as(u128, product31);
|
|
const sum5: u128 = @as(u128, product12) +%
|
|
@as(u128, product21);
|
|
const sum6: u128 = @as(u128, product11);
|
|
|
|
const r0: u128 = (sum0 & Word_FullMask) +%
|
|
((sum1 & Word_LoMask) << 32);
|
|
const r1: u128 = (sum0 >> 64) +%
|
|
((sum1 >> 32) & Word_FullMask) +%
|
|
(sum2 & Word_FullMask) +%
|
|
((sum3 << 32) & Word_HiMask);
|
|
|
|
lo.* = r0 +% (r1 << 64);
|
|
hi.* = (r1 >> 64) +%
|
|
(sum1 >> 96) +%
|
|
(sum2 >> 64) +%
|
|
(sum3 >> 32) +%
|
|
sum4 +%
|
|
(sum5 << 32) +%
|
|
(sum6 << 64);
|
|
},
|
|
else => @compileError("unsupported"),
|
|
}
|
|
}
|
|
|
|
fn normalize(comptime T: type, significand: *std.meta.Int(.unsigned, @typeInfo(T).Float.bits)) i32 {
|
|
@setRuntimeSafety(builtin.is_test);
|
|
const Z = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);
|
|
const significandBits = std.math.floatMantissaBits(T);
|
|
const implicitBit = @as(Z, 1) << significandBits;
|
|
|
|
const shift = @clz(Z, significand.*) - @clz(Z, implicitBit);
|
|
significand.* <<= @intCast(std.math.Log2Int(Z), shift);
|
|
return @as(i32, 1) - shift;
|
|
}
|
|
|
|
fn wideRightShiftWithSticky(comptime Z: type, hi: *Z, lo: *Z, count: u32) void {
|
|
@setRuntimeSafety(builtin.is_test);
|
|
const typeWidth = @typeInfo(Z).Int.bits;
|
|
const S = std.math.Log2Int(Z);
|
|
if (count < typeWidth) {
|
|
const sticky = @boolToInt((lo.* << @intCast(S, typeWidth -% count)) != 0);
|
|
lo.* = (hi.* << @intCast(S, typeWidth -% count)) | (lo.* >> @intCast(S, count)) | sticky;
|
|
hi.* = hi.* >> @intCast(S, count);
|
|
} else if (count < 2 * typeWidth) {
|
|
const sticky = @boolToInt((hi.* << @intCast(S, 2 * typeWidth -% count) | lo.*) != 0);
|
|
lo.* = hi.* >> @intCast(S, count -% typeWidth) | sticky;
|
|
hi.* = 0;
|
|
} else {
|
|
const sticky = @boolToInt((hi.* | lo.*) != 0);
|
|
lo.* = sticky;
|
|
hi.* = 0;
|
|
}
|
|
}
|
|
|
|
test "import mulXf3" {
|
|
_ = @import("mulXf3_test.zig");
|
|
}
|