From d03e9d0b8347a74d674bdafadb71e7ddd8fdfad1 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Mon, 1 Nov 2021 19:26:32 +0100
Subject: [PATCH] compiler-rt: Fix f16 API declarations to be consistent

LLVM and compiler-rt must agree on how the parameters are passed, it
turns out that in LLVM13 something changed and broke the test case for
AArch64 systems.

It has nothing to do with fma at all.

Closes #9900
---
 lib/std/special/compiler_rt/extendXfYf2.zig   | 23 +++++++++-------
 .../special/compiler_rt/extendXfYf2_test.zig  |  5 ++--
 lib/std/special/compiler_rt/truncXfYf2.zig    | 18 ++++++++-----
 .../special/compiler_rt/truncXfYf2_test.zig   |  4 +--
 test/behavior/cast_stage1.zig                 | 26 +++++++++++++++++++
 test/behavior/muladd.zig                      |  3 +--
 6 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/lib/std/special/compiler_rt/extendXfYf2.zig b/lib/std/special/compiler_rt/extendXfYf2.zig
index 9a2580e9ec..2c3f0c88fc 100644
--- a/lib/std/special/compiler_rt/extendXfYf2.zig
+++ b/lib/std/special/compiler_rt/extendXfYf2.zig
@@ -1,6 +1,7 @@
 const std = @import("std");
 const builtin = @import("builtin");
 const is_test = builtin.is_test;
+const native_arch = builtin.cpu.arch;
 
 pub fn __extendsfdf2(a: f32) callconv(.C) f64 {
     return extendXfYf2(f64, f32, @bitCast(u32, a));
@@ -14,12 +15,16 @@ pub fn __extendsftf2(a: f32) callconv(.C) f128 {
     return extendXfYf2(f128, f32, @bitCast(u32, a));
 }
 
-pub fn __extendhfsf2(a: u16) callconv(.C) f32 {
-    return extendXfYf2(f32, f16, a);
+// AArch64 is the only ABI (at the moment) to support f16 arguments without the
+// need for extending them to wider fp types.
+pub const F16T = if (native_arch.isAARCH64()) f16 else u16;
+
+pub fn __extendhfsf2(a: F16T) callconv(.C) f32 {
+    return extendXfYf2(f32, f16, @bitCast(u16, a));
 }
 
-pub fn __extendhftf2(a: u16) callconv(.C) f128 {
-    return extendXfYf2(f128, f16, a);
+pub fn __extendhftf2(a: F16T) callconv(.C) f128 {
+    return extendXfYf2(f128, f16, @bitCast(u16, a));
 }
 
 pub fn __extendxftf2(a: c_longdouble) callconv(.C) f128 {
@@ -29,16 +34,14 @@ pub fn __extendxftf2(a: c_longdouble) callconv(.C) f128 {
 
 pub fn __aeabi_h2f(arg: u16) callconv(.AAPCS) f32 {
     @setRuntimeSafety(false);
-    return @call(.{ .modifier = .always_inline }, __extendhfsf2, .{arg});
+    return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f32, f16, arg });
 }
 
 pub fn __aeabi_f2d(arg: f32) callconv(.AAPCS) f64 {
     @setRuntimeSafety(false);
-    return @call(.{ .modifier = .always_inline }, __extendsfdf2, .{arg});
+    return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f64, f32, @bitCast(u32, arg) });
 }
 
-const CHAR_BIT = 8;
-
 inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits)) dst_t {
     @setRuntimeSafety(builtin.is_test);
 
@@ -50,7 +53,7 @@ inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: std.meta.In
 
     // Various constants whose values follow from the type parameters.
     // Any reasonable optimizer will fold and propagate all of these.
-    const srcBits = @sizeOf(src_t) * CHAR_BIT;
+    const srcBits = @bitSizeOf(src_t);
     const srcExpBits = srcBits - srcSigBits - 1;
     const srcInfExp = (1 << srcExpBits) - 1;
     const srcExpBias = srcInfExp >> 1;
@@ -62,7 +65,7 @@ inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: std.meta.In
     const srcQNaN = 1 << (srcSigBits - 1);
     const srcNaNCode = srcQNaN - 1;
 
-    const dstBits = @sizeOf(dst_t) * CHAR_BIT;
+    const dstBits = @bitSizeOf(dst_t);
     const dstExpBits = dstBits - dstSigBits - 1;
     const dstInfExp = (1 << dstExpBits) - 1;
     const dstExpBias = dstInfExp >> 1;
diff --git a/lib/std/special/compiler_rt/extendXfYf2_test.zig b/lib/std/special/compiler_rt/extendXfYf2_test.zig
index 89545576a2..d0c4f82e97 100644
--- a/lib/std/special/compiler_rt/extendXfYf2_test.zig
+++ b/lib/std/special/compiler_rt/extendXfYf2_test.zig
@@ -3,6 +3,7 @@ const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2;
 const __extendhftf2 = @import("extendXfYf2.zig").__extendhftf2;
 const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2;
 const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
+const F16T = @import("extendXfYf2.zig").F16T;
 
 fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) !void {
     const x = __extenddftf2(a);
@@ -27,7 +28,7 @@ fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) !void {
 }
 
 fn test__extendhfsf2(a: u16, expected: u32) !void {
-    const x = __extendhfsf2(a);
+    const x = __extendhfsf2(@bitCast(F16T, a));
     const rep = @bitCast(u32, x);
 
     if (rep == expected) {
@@ -159,7 +160,7 @@ fn makeInf32() f32 {
 }
 
 fn test__extendhftf2(a: u16, expectedHi: u64, expectedLo: u64) !void {
-    const x = __extendhftf2(a);
+    const x = __extendhftf2(@bitCast(F16T, a));
 
     const rep = @bitCast(u128, x);
     const hi = @intCast(u64, rep >> 64);
diff --git a/lib/std/special/compiler_rt/truncXfYf2.zig b/lib/std/special/compiler_rt/truncXfYf2.zig
index 3cad52426e..559ec0ec4f 100644
--- a/lib/std/special/compiler_rt/truncXfYf2.zig
+++ b/lib/std/special/compiler_rt/truncXfYf2.zig
@@ -1,15 +1,21 @@
 const std = @import("std");
+const builtin = @import("builtin");
+const native_arch = builtin.cpu.arch;
 
-pub fn __truncsfhf2(a: f32) callconv(.C) u16 {
-    return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f32, a }));
+// AArch64 is the only ABI (at the moment) to support f16 arguments without the
+// need for extending them to wider fp types.
+pub const F16T = if (native_arch.isAARCH64()) f16 else u16;
+
+pub fn __truncsfhf2(a: f32) callconv(.C) F16T {
+    return @bitCast(F16T, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f32, a }));
 }
 
-pub fn __truncdfhf2(a: f64) callconv(.C) u16 {
-    return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f64, a }));
+pub fn __truncdfhf2(a: f64) callconv(.C) F16T {
+    return @bitCast(F16T, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f64, a }));
 }
 
-pub fn __trunctfhf2(a: f128) callconv(.C) u16 {
-    return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f128, a }));
+pub fn __trunctfhf2(a: f128) callconv(.C) F16T {
+    return @bitCast(F16T, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f128, a }));
 }
 
 pub fn __trunctfsf2(a: f128) callconv(.C) f32 {
diff --git a/lib/std/special/compiler_rt/truncXfYf2_test.zig b/lib/std/special/compiler_rt/truncXfYf2_test.zig
index 23c83afd9f..83ec8afab0 100644
--- a/lib/std/special/compiler_rt/truncXfYf2_test.zig
+++ b/lib/std/special/compiler_rt/truncXfYf2_test.zig
@@ -1,7 +1,7 @@
 const __truncsfhf2 = @import("truncXfYf2.zig").__truncsfhf2;
 
 fn test__truncsfhf2(a: u32, expected: u16) !void {
-    const actual = __truncsfhf2(@bitCast(f32, a));
+    const actual = @bitCast(u16, __truncsfhf2(@bitCast(f32, a)));
 
     if (actual == expected) {
         return;
@@ -82,7 +82,7 @@ fn test__truncdfhf2(a: f64, expected: u16) void {
 }
 
 fn test__truncdfhf2_raw(a: u64, expected: u16) void {
-    const actual = __truncdfhf2(@bitCast(f64, a));
+    const actual = @bitCast(u16, __truncdfhf2(@bitCast(f64, a)));
 
     if (actual == expected) {
         return;
diff --git a/test/behavior/cast_stage1.zig b/test/behavior/cast_stage1.zig
index 1a5679bc0c..f6bf975011 100644
--- a/test/behavior/cast_stage1.zig
+++ b/test/behavior/cast_stage1.zig
@@ -263,6 +263,32 @@ test "cast *[1][*]const u8 to [*]const ?[*]const u8" {
     try expect(mem.eql(u8, std.mem.spanZ(@ptrCast([*:0]const u8, x[0].?)), "window name"));
 }
 
+test "cast f16 to wider types" {
+    const S = struct {
+        fn doTheTest() !void {
+            var x: f16 = 1234.0;
+            try std.testing.expectEqual(@as(f32, 1234.0), x);
+            try std.testing.expectEqual(@as(f64, 1234.0), x);
+            try std.testing.expectEqual(@as(f128, 1234.0), x);
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "cast f128 to narrower types" {
+    const S = struct {
+        fn doTheTest() !void {
+            var x: f128 = 1234.0;
+            try std.testing.expectEqual(@as(f16, 1234.0), @floatCast(f16, x));
+            try std.testing.expectEqual(@as(f32, 1234.0), @floatCast(f32, x));
+            try std.testing.expectEqual(@as(f64, 1234.0), @floatCast(f64, x));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
 test "vector casts" {
     const S = struct {
         fn doTheTest() !void {
diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig
index 5129303c92..eaa30324df 100644
--- a/test/behavior/muladd.zig
+++ b/test/behavior/muladd.zig
@@ -24,8 +24,7 @@ fn testMulAdd() !void {
         var c: f64 = 6.25;
         try expect(@mulAdd(f64, a, b, c) == 20);
     }
-    // TODO https://github.com/ziglang/zig/issues/9900
-    if (@import("builtin").cpu.arch != .aarch64) {
+    {
         var a: f16 = 5.5;
         var b: f128 = 2.5;
         var c: f128 = 6.25;