From 83e2d3fb3701d77c8177f7b2b164cbd7c790a3ed Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Sat, 8 Oct 2022 11:29:49 -0700
Subject: [PATCH 01/14] stage1: Skip new tests that never passed in stage1

This gets the behavior tests passing for stage1 again.
---
 test/behavior.zig               |  3 ++-
 test/behavior/align.zig         |  2 ++
 test/behavior/bugs/11816.zig    |  1 +
 test/behavior/bugs/12723.zig    | 11 +++++++++++
 test/behavior/bugs/12801-1.zig  |  1 +
 test/behavior/bugs/12801-2.zig  |  1 +
 test/behavior/enum.zig          |  7 -------
 test/behavior/eval.zig          |  2 ++
 test/behavior/packed-struct.zig |  1 +
 9 files changed, 21 insertions(+), 8 deletions(-)
 create mode 100644 test/behavior/bugs/12723.zig

diff --git a/test/behavior.zig b/test/behavior.zig
index 4e1c2ba35a..44559d45d6 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -89,7 +89,6 @@ test {
     _ = @import("behavior/bugs/12551.zig");
     _ = @import("behavior/bugs/12644.zig");
     _ = @import("behavior/bugs/12680.zig");
-    _ = @import("behavior/bugs/12776.zig");
     _ = @import("behavior/bugs/12786.zig");
     _ = @import("behavior/bugs/12794.zig");
     _ = @import("behavior/bugs/12801-1.zig");
@@ -187,6 +186,8 @@ test {
         _ = @import("behavior/packed_struct_explicit_backing_int.zig");
         _ = @import("behavior/empty_union.zig");
         _ = @import("behavior/inline_switch.zig");
+        _ = @import("behavior/bugs/12723.zig");
+        _ = @import("behavior/bugs/12776.zig");
     }
 
     if (builtin.os.tag != .wasi) {
diff --git a/test/behavior/align.zig b/test/behavior/align.zig
index a131cc8df7..2ebdda341a 100644
--- a/test/behavior/align.zig
+++ b/test/behavior/align.zig
@@ -566,6 +566,8 @@ test "@alignCast null" {
 }
 
 test "alignment of slice element" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
+
     const a: []align(1024) const u8 = undefined;
     try expect(@TypeOf(&a[0]) == *align(1024) const u8);
 }
diff --git a/test/behavior/bugs/11816.zig b/test/behavior/bugs/11816.zig
index 639212e098..5b6c9bd319 100644
--- a/test/behavior/bugs/11816.zig
+++ b/test/behavior/bugs/11816.zig
@@ -3,6 +3,7 @@ const builtin = @import("builtin");
 
 test {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
 
     var x: u32 = 3;
     const val: usize = while (true) switch (x) {
diff --git a/test/behavior/bugs/12723.zig b/test/behavior/bugs/12723.zig
new file mode 100644
index 0000000000..6768444545
--- /dev/null
+++ b/test/behavior/bugs/12723.zig
@@ -0,0 +1,11 @@
+const expect = @import("std").testing.expect;
+
+// This test causes a compile error on stage1 regardless of whether
+// the body of the test is comptime-gated or not. To workaround this,
+// we gate the inclusion of the test file.
+test "Non-exhaustive enum backed by comptime_int" {
+    const E = enum(comptime_int) { a, b, c, _ };
+    comptime var e: E = .a;
+    e = @intToEnum(E, 378089457309184723749);
+    try expect(@enumToInt(e) == 378089457309184723749);
+}
diff --git a/test/behavior/bugs/12801-1.zig b/test/behavior/bugs/12801-1.zig
index ff94382d1f..b1f565e47f 100644
--- a/test/behavior/bugs/12801-1.zig
+++ b/test/behavior/bugs/12801-1.zig
@@ -8,6 +8,7 @@ fn capacity_() u64 {
 
 test {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
 
     try std.testing.expect((@This(){}).capacity() == 64);
 }
diff --git a/test/behavior/bugs/12801-2.zig b/test/behavior/bugs/12801-2.zig
index f98fcfbcff..298e4f96c1 100644
--- a/test/behavior/bugs/12801-2.zig
+++ b/test/behavior/bugs/12801-2.zig
@@ -14,6 +14,7 @@ const Auto = struct {
     }
 };
 test {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
diff --git a/test/behavior/enum.zig b/test/behavior/enum.zig
index 114090c78e..5cb89ffa32 100644
--- a/test/behavior/enum.zig
+++ b/test/behavior/enum.zig
@@ -1169,10 +1169,3 @@ test "Non-exhaustive enum with nonstandard int size behaves correctly" {
     const E = enum(u15) { _ };
     try expect(@sizeOf(E) == @sizeOf(u15));
 }
-
-test "Non-exhaustive enum backed by comptime_int" {
-    const E = enum(comptime_int) { a, b, c, _ };
-    comptime var e: E = .a;
-    e = @intToEnum(E, 378089457309184723749);
-    try expect(@enumToInt(e) == 378089457309184723749);
-}
diff --git a/test/behavior/eval.zig b/test/behavior/eval.zig
index da93ebc831..c2d3162919 100644
--- a/test/behavior/eval.zig
+++ b/test/behavior/eval.zig
@@ -1339,6 +1339,8 @@ test "lazy value is resolved as slice operand" {
 }
 
 test "break from inline loop depends on runtime condition" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
+
     const S = struct {
         fn foo(a: u8) bool {
             return a == 4;
diff --git a/test/behavior/packed-struct.zig b/test/behavior/packed-struct.zig
index 46adee083d..5a878112b5 100644
--- a/test/behavior/packed-struct.zig
+++ b/test/behavior/packed-struct.zig
@@ -585,6 +585,7 @@ test "runtime init of unnamed packed struct type" {
 }
 
 test "packed struct passed to callconv(.C) function" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;

From aedafb20cf32caea453b648cd19b7c82e993d02d Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Fri, 7 Oct 2022 09:25:19 -0700
Subject: [PATCH 02/14] stage2: Fix softfloat support for PPC64(LE)

Stage 2's softfloat support still had a couple of gaps, which were
preventing us from lowering `f16` on this target. With any luck,
this is enough to get PPC64 working as a Tier 2 target again.
---
 deps/SoftFloat-3e-prebuilt/platform.h |  16 +--
 src/codegen/llvm.zig                  | 142 +++++++++-----------------
 2 files changed, 59 insertions(+), 99 deletions(-)

diff --git a/deps/SoftFloat-3e-prebuilt/platform.h b/deps/SoftFloat-3e-prebuilt/platform.h
index 588c548c60..2c4a0ec88e 100644
--- a/deps/SoftFloat-3e-prebuilt/platform.h
+++ b/deps/SoftFloat-3e-prebuilt/platform.h
@@ -3,6 +3,10 @@
 
 #if defined(__BIG_ENDIAN__)
 #define BIGENDIAN 1
+#elif defined(_BIG_ENDIAN) && (_BIG_ENDIAN == 1)
+#define BIGENDIAN 1
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define BIGENDIAN 1
 #elif defined(__ARMEB__)
 #define BIGENDIAN 1
 #elif defined(__THUMBEB__)
@@ -15,18 +19,12 @@
 #define BIGENDIAN 1
 #elif defined(__MIPSEB__)
 #define BIGENDIAN 1
-#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#define BIGENDIAN 1
 #elif defined(__sparc)
 #define BIGENDIAN 1
 #elif defined(__sparc__)
 #define BIGENDIAN 1
 #elif defined(_POWER)
 #define BIGENDIAN 1
-#elif defined(__powerpc__)
-#define BIGENDIAN 1
-#elif defined(__ppc__)
-#define BIGENDIAN 1
 #elif defined(__hpux)
 #define BIGENDIAN 1
 #elif defined(__hppa)
@@ -39,6 +37,10 @@
 
 #if defined(__LITTLE_ENDIAN__)
 #define LITTLEENDIAN 1
+#elif defined(_LITTLE_ENDIAN) && (_LITTLE_ENDIAN == 1)
+#define LITTLEENDIAN 1
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define LITTLEENDIAN 1
 #elif defined(__ARMEL__)
 #define LITTLEENDIAN 1
 #elif defined(__THUMBEL__)
@@ -51,8 +53,6 @@
 #define LITTLEENDIAN 1
 #elif defined(__MIPSEL__)
 #define LITTLEENDIAN 1
-#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define LITTLEENDIAN 1
 #elif defined(__i386__)
 #define LITTLEENDIAN 1
 #elif defined(__alpha__)
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 7038606611..6134ba7d50 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -2711,7 +2711,7 @@ pub const DeclGen = struct {
                 return dg.context.intType(bit_count);
             },
             .Float => switch (t.floatBits(target)) {
-                16 => return dg.context.halfType(),
+                16 => return if (backendSupportsF16(target)) dg.context.halfType() else dg.context.intType(16),
                 32 => return dg.context.floatType(),
                 64 => return dg.context.doubleType(),
                 80 => return if (backendSupportsF80(target)) dg.context.x86FP80Type() else dg.context.intType(80),
@@ -3226,7 +3226,15 @@ pub const DeclGen = struct {
             .Float => {
                 const llvm_ty = try dg.lowerType(tv.ty);
                 switch (tv.ty.floatBits(target)) {
-                    16, 32, 64 => return llvm_ty.constReal(tv.val.toFloat(f64)),
+                    16 => if (intrinsicsAllowed(tv.ty, target)) {
+                        return llvm_ty.constReal(tv.val.toFloat(f16));
+                    } else {
+                        const repr = @bitCast(u16, tv.val.toFloat(f16));
+                        const llvm_i16 = dg.context.intType(16);
+                        const int = llvm_i16.constInt(repr, .False);
+                        return int.constBitCast(llvm_ty);
+                    },
+                    32, 64 => return llvm_ty.constReal(tv.val.toFloat(f64)),
                     80 => {
                         const float = tv.val.toFloat(f80);
                         const repr = std.math.break_f80(float);
@@ -7584,11 +7592,25 @@ pub const FuncGen = struct {
         const target = self.dg.module.getTarget();
         const dest_bits = dest_ty.floatBits(target);
         const src_bits = operand_ty.floatBits(target);
-        if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
-            return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+
+        if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+            return self.builder.buildFPTrunc(operand, dest_llvm_ty, "");
+        } else {
+            const operand_llvm_ty = try self.dg.lowerType(operand_ty);
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+
+            var fn_name_buf: [64]u8 = undefined;
+            const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__trunc{s}f{s}f2", .{
+                compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
+            }) catch unreachable;
+
+            const params = [1]*llvm.Value{operand};
+            const param_types = [1]*llvm.Type{operand_llvm_ty};
+            const llvm_fn = self.getLibcFunction(fn_name, &param_types, dest_llvm_ty);
+
+            return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params, params.len, .C, .Auto, "");
         }
-        const dest_llvm_ty = try self.dg.lowerType(dest_ty);
-        return self.builder.buildFPTrunc(operand, dest_llvm_ty, "");
     }
 
     fn airFpext(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
@@ -7602,11 +7624,25 @@ pub const FuncGen = struct {
         const target = self.dg.module.getTarget();
         const dest_bits = dest_ty.floatBits(target);
         const src_bits = operand_ty.floatBits(target);
-        if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
-            return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+
+        if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+            return self.builder.buildFPExt(operand, dest_llvm_ty, "");
+        } else {
+            const operand_llvm_ty = try self.dg.lowerType(operand_ty);
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+
+            var fn_name_buf: [64]u8 = undefined;
+            const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__extend{s}f{s}f2", .{
+                compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
+            }) catch unreachable;
+
+            const params = [1]*llvm.Value{operand};
+            const param_types = [1]*llvm.Type{operand_llvm_ty};
+            const llvm_fn = self.getLibcFunction(fn_name, &param_types, dest_llvm_ty);
+
+            return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params, params.len, .C, .Auto, "");
         }
-        const dest_llvm_ty = try self.dg.lowerType(self.air.typeOfIndex(inst));
-        return self.builder.buildFPExt(operand, dest_llvm_ty, "");
     }
 
     fn airPtrToInt(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
@@ -9064,87 +9100,6 @@ pub const FuncGen = struct {
         return null;
     }
 
-    fn softF80TruncOrExt(
-        self: *FuncGen,
-        operand: *llvm.Value,
-        src_bits: u16,
-        dest_bits: u16,
-    ) !?*llvm.Value {
-        const target = self.dg.module.getTarget();
-
-        var param_llvm_ty: *llvm.Type = self.context.intType(80);
-        var ret_llvm_ty: *llvm.Type = param_llvm_ty;
-        var fn_name: [*:0]const u8 = undefined;
-        var arg = operand;
-        var final_cast: ?*llvm.Type = null;
-
-        assert(src_bits == 80 or dest_bits == 80);
-
-        if (src_bits == 80) switch (dest_bits) {
-            16 => {
-                // See corresponding condition at definition of
-                // __truncxfhf2 in compiler-rt.
-                if (target.cpu.arch.isAARCH64()) {
-                    ret_llvm_ty = self.context.halfType();
-                } else {
-                    ret_llvm_ty = self.context.intType(16);
-                    final_cast = self.context.halfType();
-                }
-                fn_name = "__truncxfhf2";
-            },
-            32 => {
-                ret_llvm_ty = self.context.floatType();
-                fn_name = "__truncxfsf2";
-            },
-            64 => {
-                ret_llvm_ty = self.context.doubleType();
-                fn_name = "__truncxfdf2";
-            },
-            80 => return operand,
-            128 => {
-                ret_llvm_ty = self.context.fp128Type();
-                fn_name = "__extendxftf2";
-            },
-            else => unreachable,
-        } else switch (src_bits) {
-            16 => {
-                // See corresponding condition at definition of
-                // __extendhfxf2 in compiler-rt.
-                param_llvm_ty = if (target.cpu.arch.isAARCH64())
-                    self.context.halfType()
-                else
-                    self.context.intType(16);
-                arg = self.builder.buildBitCast(arg, param_llvm_ty, "");
-                fn_name = "__extendhfxf2";
-            },
-            32 => {
-                param_llvm_ty = self.context.floatType();
-                fn_name = "__extendsfxf2";
-            },
-            64 => {
-                param_llvm_ty = self.context.doubleType();
-                fn_name = "__extenddfxf2";
-            },
-            80 => return operand,
-            128 => {
-                param_llvm_ty = self.context.fp128Type();
-                fn_name = "__trunctfxf2";
-            },
-            else => unreachable,
-        }
-
-        const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: {
-            const param_types = [_]*llvm.Type{param_llvm_ty};
-            const fn_type = llvm.functionType(ret_llvm_ty, &param_types, param_types.len, .False);
-            break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type);
-        };
-
-        var args: [1]*llvm.Value = .{arg};
-        const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .C, .Auto, "");
-        const final_cast_llvm_ty = final_cast orelse return result;
-        return self.builder.buildBitCast(result, final_cast_llvm_ty, "");
-    }
-
     fn getErrorNameTable(self: *FuncGen) !*llvm.Value {
         if (self.dg.object.error_name_table) |table| {
             return table;
@@ -10424,6 +10379,11 @@ fn backendSupportsF80(target: std.Target) bool {
 /// if it produces miscompilations.
 fn backendSupportsF16(target: std.Target) bool {
     return switch (target.cpu.arch) {
+        .powerpc,
+        .powerpcle,
+        .powerpc64,
+        .powerpc64le,
+        => false,
         else => true,
     };
 }

From 0d533433e21621177fb291e2a4901bee11834501 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Fri, 7 Oct 2022 10:19:08 -0700
Subject: [PATCH 03/14] compiler_rt: Add missing `f16` functions

This change also exposes some of the existing functions under both the
PPC-style names symbols and the compiler-rt-style names, since Zig
currently lowers softfloat calls to the latter.
---
 lib/compiler_rt.zig             | 24 +++++++++++-----
 lib/compiler_rt/addhf3.zig      | 12 ++++++++
 lib/compiler_rt/cmphf2.zig      | 50 +++++++++++++++++++++++++++++++++
 lib/compiler_rt/divhf3.zig      | 11 ++++++++
 lib/compiler_rt/extendhfdf2.zig | 12 ++++++++
 lib/compiler_rt/extendhfsf2.zig |  9 ++----
 lib/compiler_rt/gehf2.zig       | 31 ++++++++++++++++++++
 lib/compiler_rt/mulf3.zig       |  5 ++--
 lib/compiler_rt/mulhf3.zig      | 12 ++++++++
 lib/compiler_rt/neghf2.zig      | 11 ++++++++
 lib/compiler_rt/subhf3.zig      | 12 ++++++++
 lib/compiler_rt/tan.zig         |  6 ++--
 lib/compiler_rt/truncsfhf2.zig  |  9 ++----
 lib/compiler_rt/unordhf2.zig    | 12 ++++++++
 lib/compiler_rt/unordxf2.zig    | 12 ++++++++
 15 files changed, 203 insertions(+), 25 deletions(-)
 create mode 100644 lib/compiler_rt/addhf3.zig
 create mode 100644 lib/compiler_rt/cmphf2.zig
 create mode 100644 lib/compiler_rt/divhf3.zig
 create mode 100644 lib/compiler_rt/extendhfdf2.zig
 create mode 100644 lib/compiler_rt/gehf2.zig
 create mode 100644 lib/compiler_rt/mulhf3.zig
 create mode 100644 lib/compiler_rt/neghf2.zig
 create mode 100644 lib/compiler_rt/subhf3.zig
 create mode 100644 lib/compiler_rt/unordhf2.zig
 create mode 100644 lib/compiler_rt/unordxf2.zig

diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig
index d261c49ff1..1597b7e135 100644
--- a/lib/compiler_rt.zig
+++ b/lib/compiler_rt.zig
@@ -4,17 +4,20 @@ comptime {
     _ = @import("compiler_rt/atomics.zig");
 
     _ = @import("compiler_rt/addf3.zig");
+    _ = @import("compiler_rt/addhf3.zig");
     _ = @import("compiler_rt/addsf3.zig");
     _ = @import("compiler_rt/adddf3.zig");
     _ = @import("compiler_rt/addtf3.zig");
     _ = @import("compiler_rt/addxf3.zig");
 
+    _ = @import("compiler_rt/subhf3.zig");
     _ = @import("compiler_rt/subsf3.zig");
     _ = @import("compiler_rt/subdf3.zig");
     _ = @import("compiler_rt/subtf3.zig");
     _ = @import("compiler_rt/subxf3.zig");
 
     _ = @import("compiler_rt/mulf3.zig");
+    _ = @import("compiler_rt/mulhf3.zig");
     _ = @import("compiler_rt/mulsf3.zig");
     _ = @import("compiler_rt/muldf3.zig");
     _ = @import("compiler_rt/multf3.zig");
@@ -34,51 +37,58 @@ comptime {
     _ = @import("compiler_rt/divxc3.zig");
     _ = @import("compiler_rt/divtc3.zig");
 
+    _ = @import("compiler_rt/neghf2.zig");
     _ = @import("compiler_rt/negsf2.zig");
     _ = @import("compiler_rt/negdf2.zig");
     _ = @import("compiler_rt/negtf2.zig");
     _ = @import("compiler_rt/negxf2.zig");
 
     _ = @import("compiler_rt/comparef.zig");
+    _ = @import("compiler_rt/cmphf2.zig");
     _ = @import("compiler_rt/cmpsf2.zig");
     _ = @import("compiler_rt/cmpdf2.zig");
     _ = @import("compiler_rt/cmptf2.zig");
     _ = @import("compiler_rt/cmpxf2.zig");
+    _ = @import("compiler_rt/gehf2.zig");
     _ = @import("compiler_rt/gesf2.zig");
     _ = @import("compiler_rt/gedf2.zig");
-    _ = @import("compiler_rt/getf2.zig");
     _ = @import("compiler_rt/gexf2.zig");
+    _ = @import("compiler_rt/getf2.zig");
+    _ = @import("compiler_rt/unordhf2.zig");
     _ = @import("compiler_rt/unordsf2.zig");
     _ = @import("compiler_rt/unorddf2.zig");
+    _ = @import("compiler_rt/unordxf2.zig");
     _ = @import("compiler_rt/unordtf2.zig");
 
     _ = @import("compiler_rt/extendf.zig");
-    _ = @import("compiler_rt/extenddftf2.zig");
-    _ = @import("compiler_rt/extenddfxf2.zig");
     _ = @import("compiler_rt/extendhfsf2.zig");
+    _ = @import("compiler_rt/extendhfdf2.zig");
     _ = @import("compiler_rt/extendhftf2.zig");
     _ = @import("compiler_rt/extendhfxf2.zig");
     _ = @import("compiler_rt/extendsfdf2.zig");
     _ = @import("compiler_rt/extendsftf2.zig");
     _ = @import("compiler_rt/extendsfxf2.zig");
+    _ = @import("compiler_rt/extenddftf2.zig");
+    _ = @import("compiler_rt/extenddfxf2.zig");
     _ = @import("compiler_rt/extendxftf2.zig");
 
     _ = @import("compiler_rt/truncf.zig");
     _ = @import("compiler_rt/truncsfhf2.zig");
     _ = @import("compiler_rt/truncdfhf2.zig");
     _ = @import("compiler_rt/truncdfsf2.zig");
+    _ = @import("compiler_rt/truncxfhf2.zig");
+    _ = @import("compiler_rt/truncxfsf2.zig");
+    _ = @import("compiler_rt/truncxfdf2.zig");
     _ = @import("compiler_rt/trunctfhf2.zig");
     _ = @import("compiler_rt/trunctfsf2.zig");
     _ = @import("compiler_rt/trunctfdf2.zig");
     _ = @import("compiler_rt/trunctfxf2.zig");
-    _ = @import("compiler_rt/truncxfhf2.zig");
-    _ = @import("compiler_rt/truncxfsf2.zig");
-    _ = @import("compiler_rt/truncxfdf2.zig");
 
-    _ = @import("compiler_rt/divtf3.zig");
+    _ = @import("compiler_rt/divhf3.zig");
     _ = @import("compiler_rt/divsf3.zig");
     _ = @import("compiler_rt/divdf3.zig");
     _ = @import("compiler_rt/divxf3.zig");
+    _ = @import("compiler_rt/divtf3.zig");
     _ = @import("compiler_rt/sin.zig");
     _ = @import("compiler_rt/cos.zig");
     _ = @import("compiler_rt/sincos.zig");
diff --git a/lib/compiler_rt/addhf3.zig b/lib/compiler_rt/addhf3.zig
new file mode 100644
index 0000000000..12086aef38
--- /dev/null
+++ b/lib/compiler_rt/addhf3.zig
@@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const addf3 = @import("./addf3.zig").addf3;
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__addhf3, .{ .name = "__addhf3", .linkage = common.linkage });
+}
+
+fn __addhf3(a: f16, b: f16) callconv(.C) f16 {
+    return addf3(f16, a, b);
+}
diff --git a/lib/compiler_rt/cmphf2.zig b/lib/compiler_rt/cmphf2.zig
new file mode 100644
index 0000000000..d5ee3f1daa
--- /dev/null
+++ b/lib/compiler_rt/cmphf2.zig
@@ -0,0 +1,50 @@
+///! The quoted behavior definitions are from
+///! https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gccint/Soft-float-library-routines.html#Soft-float-library-routines
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__eqhf2, .{ .name = "__eqhf2", .linkage = common.linkage });
+    @export(__nehf2, .{ .name = "__nehf2", .linkage = common.linkage });
+    @export(__lehf2, .{ .name = "__lehf2", .linkage = common.linkage });
+    @export(__cmphf2, .{ .name = "__cmphf2", .linkage = common.linkage });
+    @export(__lthf2, .{ .name = "__lthf2", .linkage = common.linkage });
+}
+
+/// "These functions calculate a <=> b. That is, if a is less than b, they return -1;
+/// if a is greater than b, they return 1; and if a and b are equal they return 0.
+/// If either argument is NaN they return 1..."
+///
+/// Note that this matches the definition of `__lehf2`, `__eqhf2`, `__nehf2`, `__cmphf2`,
+/// and `__lthf2`.
+fn __cmphf2(a: f16, b: f16) callconv(.C) i32 {
+    return @enumToInt(comparef.cmpf2(f16, comparef.LE, a, b));
+}
+
+/// "These functions return a value less than or equal to zero if neither argument is NaN,
+/// and a is less than or equal to b."
+pub fn __lehf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
+
+/// "These functions return zero if neither argument is NaN, and a and b are equal."
+/// Note that due to some kind of historical accident, __eqhf2 and __nehf2 are defined
+/// to have the same return value.
+pub fn __eqhf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
+
+/// "These functions return a nonzero value if either argument is NaN, or if a and b are unequal."
+/// Note that due to some kind of historical accident, __eqhf2 and __nehf2 are defined
+/// to have the same return value.
+pub fn __nehf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
+
+/// "These functions return a value less than zero if neither argument is NaN, and a
+/// is strictly less than b."
+pub fn __lthf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
diff --git a/lib/compiler_rt/divhf3.zig b/lib/compiler_rt/divhf3.zig
new file mode 100644
index 0000000000..ad73a5ddb2
--- /dev/null
+++ b/lib/compiler_rt/divhf3.zig
@@ -0,0 +1,11 @@
+const common = @import("common.zig");
+const divsf3 = @import("./divsf3.zig");
+
+comptime {
+    @export(__divhf3, .{ .name = "__divhf3", .linkage = common.linkage });
+}
+
+pub fn __divhf3(a: f16, b: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, divsf3.__divsf3(a, b));
+}
diff --git a/lib/compiler_rt/extendhfdf2.zig b/lib/compiler_rt/extendhfdf2.zig
new file mode 100644
index 0000000000..f7a94f58ef
--- /dev/null
+++ b/lib/compiler_rt/extendhfdf2.zig
@@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const extendf = @import("./extendf.zig").extendf;
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__extendhfdf2, .{ .name = "__extendhfdf2", .linkage = common.linkage });
+}
+
+pub fn __extendhfdf2(a: common.F16T) callconv(.C) f64 {
+    return extendf(f64, f16, @bitCast(u16, a));
+}
diff --git a/lib/compiler_rt/extendhfsf2.zig b/lib/compiler_rt/extendhfsf2.zig
index a6bf5f5be5..0c204ec188 100644
--- a/lib/compiler_rt/extendhfsf2.zig
+++ b/lib/compiler_rt/extendhfsf2.zig
@@ -5,22 +5,17 @@ pub const panic = common.panic;
 
 comptime {
     if (common.gnu_f16_abi) {
-        @export(__gnu_h2f_ieee, .{ .name = "__gnu_h2f_ieee", .linkage = common.linkage });
+        @export(__extendhfsf2, .{ .name = "__gnu_h2f_ieee", .linkage = common.linkage });
     } else if (common.want_aeabi) {
         @export(__aeabi_h2f, .{ .name = "__aeabi_h2f", .linkage = common.linkage });
-    } else {
-        @export(__extendhfsf2, .{ .name = "__extendhfsf2", .linkage = common.linkage });
     }
+    @export(__extendhfsf2, .{ .name = "__extendhfsf2", .linkage = common.linkage });
 }
 
 pub fn __extendhfsf2(a: common.F16T) callconv(.C) f32 {
     return extendf(f32, f16, @bitCast(u16, a));
 }
 
-fn __gnu_h2f_ieee(a: common.F16T) callconv(.C) f32 {
-    return extendf(f32, f16, @bitCast(u16, a));
-}
-
 fn __aeabi_h2f(a: u16) callconv(.AAPCS) f32 {
     return extendf(f32, f16, @bitCast(u16, a));
 }
diff --git a/lib/compiler_rt/gehf2.zig b/lib/compiler_rt/gehf2.zig
new file mode 100644
index 0000000000..651cbf943f
--- /dev/null
+++ b/lib/compiler_rt/gehf2.zig
@@ -0,0 +1,31 @@
+///! The quoted behavior definitions are from
+///! https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gccint/Soft-float-library-routines.html#Soft-float-library-routines
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__gehf2, .{ .name = "__gehf2", .linkage = common.linkage });
+    @export(__gthf2, .{ .name = "__gthf2", .linkage = common.linkage });
+}
+
+/// "These functions return a value greater than or equal to zero if neither
+/// argument is NaN, and a is greater than or equal to b."
+pub fn __gehf2(a: f16, b: f16) callconv(.C) i32 {
+    return @enumToInt(comparef.cmpf2(f16, comparef.GE, a, b));
+}
+
+/// "These functions return a value greater than zero if neither argument is NaN,
+/// and a is strictly greater than b."
+pub fn __gthf2(a: f16, b: f16) callconv(.C) i32 {
+    return __gehf2(a, b);
+}
+
+fn __aeabi_fcmpge(a: f16, b: f16) callconv(.AAPCS) i32 {
+    return @boolToInt(comparef.cmpf2(f16, comparef.GE, a, b) != .Less);
+}
+
+fn __aeabi_fcmpgt(a: f16, b: f16) callconv(.AAPCS) i32 {
+    return @boolToInt(comparef.cmpf2(f16, comparef.LE, a, b) == .Greater);
+}
diff --git a/lib/compiler_rt/mulf3.zig b/lib/compiler_rt/mulf3.zig
index 770721cb80..b02bd81671 100644
--- a/lib/compiler_rt/mulf3.zig
+++ b/lib/compiler_rt/mulf3.zig
@@ -32,8 +32,9 @@ pub inline fn mulf3(comptime T: type, a: T, b: T) T {
     const infRep = @bitCast(Z, math.inf(T));
     const minNormalRep = @bitCast(Z, math.floatMin(T));
 
-    const aExponent = @truncate(u32, (@bitCast(Z, a) >> significandBits) & maxExponent);
-    const bExponent = @truncate(u32, (@bitCast(Z, b) >> significandBits) & maxExponent);
+    const ZExp = if (typeWidth >= 32) u32 else Z;
+    const aExponent = @truncate(ZExp, (@bitCast(Z, a) >> significandBits) & maxExponent);
+    const bExponent = @truncate(ZExp, (@bitCast(Z, b) >> significandBits) & maxExponent);
     const productSign: Z = (@bitCast(Z, a) ^ @bitCast(Z, b)) & signBit;
 
     var aSignificand: ZSignificand = @intCast(ZSignificand, @bitCast(Z, a) & significandMask);
diff --git a/lib/compiler_rt/mulhf3.zig b/lib/compiler_rt/mulhf3.zig
new file mode 100644
index 0000000000..45251548be
--- /dev/null
+++ b/lib/compiler_rt/mulhf3.zig
@@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const mulf3 = @import("./mulf3.zig").mulf3;
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__mulhf3, .{ .name = "__mulhf3", .linkage = common.linkage });
+}
+
+pub fn __mulhf3(a: f16, b: f16) callconv(.C) f16 {
+    return mulf3(f16, a, b);
+}
diff --git a/lib/compiler_rt/neghf2.zig b/lib/compiler_rt/neghf2.zig
new file mode 100644
index 0000000000..fe55a751d8
--- /dev/null
+++ b/lib/compiler_rt/neghf2.zig
@@ -0,0 +1,11 @@
+const common = @import("./common.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__neghf2, .{ .name = "__neghf2", .linkage = common.linkage });
+}
+
+fn __neghf2(a: f16) callconv(.C) f16 {
+    return common.fneg(a);
+}
diff --git a/lib/compiler_rt/subhf3.zig b/lib/compiler_rt/subhf3.zig
new file mode 100644
index 0000000000..b14da2d794
--- /dev/null
+++ b/lib/compiler_rt/subhf3.zig
@@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__subhf3, .{ .name = "__subhf3", .linkage = common.linkage });
+}
+
+fn __subhf3(a: f16, b: f16) callconv(.C) f16 {
+    const neg_b = @bitCast(f16, @bitCast(u16, b) ^ (@as(u16, 1) << 15));
+    return a + neg_b;
+}
diff --git a/lib/compiler_rt/tan.zig b/lib/compiler_rt/tan.zig
index 9c44e4c682..8b8f8287a3 100644
--- a/lib/compiler_rt/tan.zig
+++ b/lib/compiler_rt/tan.zig
@@ -24,8 +24,10 @@ comptime {
     @export(tanf, .{ .name = "tanf", .linkage = common.linkage });
     @export(tan, .{ .name = "tan", .linkage = common.linkage });
     @export(__tanx, .{ .name = "__tanx", .linkage = common.linkage });
-    const tanq_sym_name = if (common.want_ppc_abi) "tanf128" else "tanq";
-    @export(tanq, .{ .name = tanq_sym_name, .linkage = common.linkage });
+    if (common.want_ppc_abi) {
+        @export(tanq, .{ .name = "tanf128", .linkage = common.linkage });
+    }
+    @export(tanq, .{ .name = "tanq", .linkage = common.linkage });
     @export(tanl, .{ .name = "tanl", .linkage = common.linkage });
 }
 
diff --git a/lib/compiler_rt/truncsfhf2.zig b/lib/compiler_rt/truncsfhf2.zig
index 489fb8658d..010e257923 100644
--- a/lib/compiler_rt/truncsfhf2.zig
+++ b/lib/compiler_rt/truncsfhf2.zig
@@ -5,22 +5,17 @@ pub const panic = common.panic;
 
 comptime {
     if (common.gnu_f16_abi) {
-        @export(__gnu_f2h_ieee, .{ .name = "__gnu_f2h_ieee", .linkage = common.linkage });
+        @export(__truncsfhf2, .{ .name = "__gnu_f2h_ieee", .linkage = common.linkage });
     } else if (common.want_aeabi) {
         @export(__aeabi_f2h, .{ .name = "__aeabi_f2h", .linkage = common.linkage });
-    } else {
-        @export(__truncsfhf2, .{ .name = "__truncsfhf2", .linkage = common.linkage });
     }
+    @export(__truncsfhf2, .{ .name = "__truncsfhf2", .linkage = common.linkage });
 }
 
 pub fn __truncsfhf2(a: f32) callconv(.C) common.F16T {
     return @bitCast(common.F16T, truncf(f16, f32, a));
 }
 
-fn __gnu_f2h_ieee(a: f32) callconv(.C) common.F16T {
-    return @bitCast(common.F16T, truncf(f16, f32, a));
-}
-
 fn __aeabi_f2h(a: f32) callconv(.AAPCS) u16 {
     return @bitCast(common.F16T, truncf(f16, f32, a));
 }
diff --git a/lib/compiler_rt/unordhf2.zig b/lib/compiler_rt/unordhf2.zig
new file mode 100644
index 0000000000..0c2aea629a
--- /dev/null
+++ b/lib/compiler_rt/unordhf2.zig
@@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__unordhf2, .{ .name = "__unordhf2", .linkage = common.linkage });
+}
+
+pub fn __unordhf2(a: f16, b: f16) callconv(.C) i32 {
+    return comparef.unordcmp(f16, a, b);
+}
diff --git a/lib/compiler_rt/unordxf2.zig b/lib/compiler_rt/unordxf2.zig
new file mode 100644
index 0000000000..e456096370
--- /dev/null
+++ b/lib/compiler_rt/unordxf2.zig
@@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__unordxf2, .{ .name = "__unordxf2", .linkage = common.linkage });
+}
+
+pub fn __unordxf2(a: f80, b: f80) callconv(.C) i32 {
+    return comparef.unordcmp(f80, a, b);
+}

From 37c6fcafa27c9dc4b323ca2fc79f5a928c1b4d14 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Fri, 7 Oct 2022 11:08:22 -0700
Subject: [PATCH 04/14] stage2 LLVM: Correct long double for WASM

Also, f16 is not supported on this backend.
---
 lib/std/target.zig   | 2 ++
 src/codegen/llvm.zig | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/lib/std/target.zig b/lib/std/target.zig
index b6a8a8b9c0..44ca6cf3b9 100644
--- a/lib/std/target.zig
+++ b/lib/std/target.zig
@@ -1789,6 +1789,8 @@ pub const Target = struct {
                 .powerpcle,
                 .powerpc64,
                 .powerpc64le,
+                .wasm32,
+                .wasm64,
                 => true,
 
                 else => false,
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 6134ba7d50..a39c3fee6c 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -10383,6 +10383,8 @@ fn backendSupportsF16(target: std.Target) bool {
         .powerpcle,
         .powerpc64,
         .powerpc64le,
+        .wasm32,
+        .wasm64,
         => false,
         else => true,
     };

From b992ea1b079c5967348655f8719cdadaf2df8261 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Sat, 8 Oct 2022 11:27:29 -0700
Subject: [PATCH 05/14] stage1: Rely on softfloat for `f16` on non-arm targets

---
 src/stage1/codegen.cpp | 51 ++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index bd572bb96c..18e30d416f 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -80,6 +80,7 @@ void codegen_set_strip(CodeGen *g, bool strip) {
     }
 }
 
+static LLVMValueRef get_soft_float_fn(CodeGen *g, const char *name, int param_count, LLVMTypeRef param_type, LLVMTypeRef return_type);
 static void render_const_val(CodeGen *g, ZigValue *const_val, const char *name);
 static void render_const_val_global(CodeGen *g, ZigValue *const_val, const char *name);
 static LLVMValueRef gen_const_val(CodeGen *g, ZigValue *const_val, const char *name);
@@ -1736,12 +1737,7 @@ static LLVMValueRef gen_soft_float_widen_or_shorten(CodeGen *g, ZigType *actual_
         }
     }
 
-    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, fn_name);
-    if (func_ref == nullptr) {
-        LLVMTypeRef fn_type = LLVMFunctionType(return_type, &param_type, 1, false);
-        func_ref = LLVMAddFunction(g->module, fn_name, fn_type);
-    }
-
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, 1, param_type, return_type);
     result = LLVMBuildCall2(g->builder, LLVMGlobalGetValueType(func_ref), func_ref, &expr_val, 1, "");
 
     // On non-Arm platforms we need to bitcast __trunc<>fhf2 result back to f16
@@ -1766,9 +1762,12 @@ static LLVMValueRef gen_widen_or_shorten(CodeGen *g, bool want_runtime_safety, Z
     uint64_t wanted_bits;
     if (scalar_actual_type->id == ZigTypeIdFloat) {
 
-        if ((scalar_actual_type == g->builtin_types.entry_f80
+        if (((scalar_actual_type == g->builtin_types.entry_f80
             || scalar_wanted_type == g->builtin_types.entry_f80)
-         && !target_has_f80(g->zig_target))
+         && !target_has_f80(g->zig_target)) ||
+            ((scalar_actual_type == g->builtin_types.entry_f16
+            || scalar_wanted_type == g->builtin_types.entry_f16)
+         && !target_is_arm(g->zig_target)))
         {
             return gen_soft_float_widen_or_shorten(g, actual_type, wanted_type, expr_val);
         }
@@ -3100,6 +3099,7 @@ static LLVMValueRef gen_float_un_op(CodeGen *g, LLVMValueRef operand, ZigType *o
     ZigType *elem_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
     if ((elem_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
         (elem_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (elem_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target)) ||
         op == BuiltinFnIdTan)
     {
         return gen_soft_float_un_op(g, operand, operand_type, op);
@@ -3690,7 +3690,8 @@ static LLVMValueRef ir_render_bin_op(CodeGen *g, Stage1Air *executable,
     ZigType *operand_type = op1->value->type;
     ZigType *scalar_type = (operand_type->id == ZigTypeIdVector) ? operand_type->data.vector.elem_type : operand_type;
     if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
         // LLVM incorrectly lowers the soft float calls for f128 as if they operated on `long double`.
         // On some targets this will be incorrect, so we manually lower the call ourselves.
         LLVMValueRef op1_value = ir_llvm_value(g, op1);
@@ -4024,7 +4025,8 @@ static LLVMValueRef ir_render_cast(CodeGen *g, Stage1Air *executable,
             assert(actual_type->id == ZigTypeIdInt);
             {
                 if ((wanted_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-                    (wanted_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+                    (wanted_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+                    (wanted_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
                     return gen_soft_int_to_float_op(g, expr_val, actual_type, wanted_type);
                 } else {
                     if (actual_type->data.integral.is_signed) {
@@ -4042,7 +4044,8 @@ static LLVMValueRef ir_render_cast(CodeGen *g, Stage1Air *executable,
 
             LLVMValueRef result;
             if ((actual_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-                (actual_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+                (actual_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+                (actual_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
                 result = gen_soft_float_to_int_op(g, expr_val, actual_type, wanted_type);
             } else {
                 if (wanted_type->data.integral.is_signed) {
@@ -4396,7 +4399,8 @@ static LLVMValueRef gen_negation(CodeGen *g, Stage1AirInst *inst, Stage1AirInst
         operand_type->data.vector.elem_type : operand_type;
 
     if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
         return gen_soft_float_neg(g, operand_type, llvm_operand);
     }
 
@@ -7374,7 +7378,9 @@ static LLVMValueRef ir_render_soft_mul_add(CodeGen *g, Stage1Air *executable, St
     uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
 
     const char *fn_name;
-    if (float_type == g->builtin_types.entry_f32)
+    if (float_type == g->builtin_types.entry_f16)
+        fn_name = "__fmah";
+    else if (float_type == g->builtin_types.entry_f32)
         fn_name = "fmaf";
     else if (float_type == g->builtin_types.entry_f64)
         fn_name = "fma";
@@ -7385,13 +7391,8 @@ static LLVMValueRef ir_render_soft_mul_add(CodeGen *g, Stage1Air *executable, St
     else
         zig_unreachable();
 
-    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, fn_name);
-    if (func_ref == nullptr) {
-        LLVMTypeRef float_type_ref = float_type->llvm_type;
-        LLVMTypeRef params[3] = { float_type_ref, float_type_ref, float_type_ref };
-        LLVMTypeRef fn_type = LLVMFunctionType(float_type_ref, params, 3, false);
-        func_ref = LLVMAddFunction(g->module, fn_name, fn_type);
-    }
+    LLVMTypeRef float_type_ref = float_type->llvm_type;
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, 3, float_type_ref, float_type_ref); 
 
     LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
     LLVMValueRef op2 = ir_llvm_value(g, instruction->op2);
@@ -7421,7 +7422,8 @@ static LLVMValueRef ir_render_mul_add(CodeGen *g, Stage1Air *executable, Stage1A
     ZigType *operand_type = instruction->op1->value->type;
     operand_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
     if ((operand_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-        (operand_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        (operand_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (operand_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
         return ir_render_soft_mul_add(g, executable, instruction, operand_type);
     }
     LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
@@ -9740,7 +9742,12 @@ static void define_builtin_types(CodeGen *g) {
         }
     }
 
-    add_fp_entry(g, "f16", 16, LLVMHalfType(), &g->builtin_types.entry_f16);
+    if (target_is_arm(g->zig_target)) {
+        add_fp_entry(g, "f16", 16, LLVMHalfType(), &g->builtin_types.entry_f16);
+    } else {
+        ZigType *u16_ty = get_int_type(g, false, 16);
+        add_fp_entry(g, "f16", 16, get_llvm_type(g, u16_ty), &g->builtin_types.entry_f16);
+    }
     add_fp_entry(g, "f32", 32, LLVMFloatType(), &g->builtin_types.entry_f32);
     add_fp_entry(g, "f64", 64, LLVMDoubleType(), &g->builtin_types.entry_f64);
     add_fp_entry(g, "f128", 128, LLVMFP128Type(), &g->builtin_types.entry_f128);

From f0d12dd82bfecf28ffe82ff706e98633d4c6b048 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Sat, 8 Oct 2022 11:31:59 -0700
Subject: [PATCH 06/14] stage2: Add softfloat support for `@reduce`

---
 src/codegen/llvm.zig | 118 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 113 insertions(+), 5 deletions(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index a39c3fee6c..2189bb3127 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -8726,12 +8726,78 @@ pub const FuncGen = struct {
         return self.builder.buildShuffleVector(a, b, llvm_mask_value, "");
     }
 
+    /// Reduce a vector by repeatedly applying `llvm_fn` to produce an accumulated result.
+    ///
+    /// Equivalent to:
+    ///   reduce: {
+    ///     var i: usize = 0;
+    ///     var accum: T = init;
+    ///     while (i < vec.len) : (i += 1) {
+    ///       accum = llvm_fn(accum, vec[i]);
+    ///     }
+    ///     break :reduce accum;
+    ///   }
+    ///
+    fn buildReducedCall(
+        self: *FuncGen,
+        llvm_fn: *llvm.Value,
+        operand_vector: *llvm.Value,
+        vector_len: usize,
+        accum_init: *llvm.Value,
+    ) !*llvm.Value {
+        const llvm_usize_ty = try self.dg.lowerType(Type.usize);
+        const llvm_vector_len = llvm_usize_ty.constInt(vector_len, .False);
+        const llvm_result_ty = accum_init.typeOf();
+
+        // Allocate and initialize our mutable variables
+        const i_ptr = self.buildAlloca(llvm_usize_ty);
+        _ = self.builder.buildStore(llvm_usize_ty.constInt(0, .False), i_ptr);
+        const accum_ptr = self.buildAlloca(llvm_result_ty);
+        _ = self.builder.buildStore(accum_init, accum_ptr);
+
+        // Setup the loop
+        const loop = self.context.appendBasicBlock(self.llvm_func, "ReduceLoop");
+        const loop_exit = self.context.appendBasicBlock(self.llvm_func, "AfterReduce");
+        _ = self.builder.buildBr(loop);
+        {
+            self.builder.positionBuilderAtEnd(loop);
+
+            // while (i < vec.len)
+            const i = self.builder.buildLoad(llvm_usize_ty, i_ptr, "");
+            const cond = self.builder.buildICmp(.ULT, i, llvm_vector_len, "");
+            const loop_then = self.context.appendBasicBlock(self.llvm_func, "ReduceLoopThen");
+
+            _ = self.builder.buildCondBr(cond, loop_then, loop_exit);
+
+            {
+                self.builder.positionBuilderAtEnd(loop_then);
+
+                // accum = f(accum, vec[i]);
+                const accum = self.builder.buildLoad(llvm_result_ty, accum_ptr, "");
+                const element = self.builder.buildExtractElement(operand_vector, i, "");
+                const params = [2]*llvm.Value{ accum, element };
+                const new_accum = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params, params.len, .C, .Auto, "");
+                _ = self.builder.buildStore(new_accum, accum_ptr);
+
+                // i += 1
+                const new_i = self.builder.buildAdd(i, llvm_usize_ty.constInt(1, .False), "");
+                _ = self.builder.buildStore(new_i, i_ptr);
+                _ = self.builder.buildBr(loop);
+            }
+        }
+
+        self.builder.positionBuilderAtEnd(loop_exit);
+        return self.builder.buildLoad(llvm_result_ty, accum_ptr, "");
+    }
+
     fn airReduce(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
         self.builder.setFastMath(want_fast_math);
+        const target = self.dg.module.getTarget();
 
         const reduce = self.air.instructions.items(.data)[inst].reduce;
-        const operand = try self.resolveInst(reduce.operand);
+        var operand = try self.resolveInst(reduce.operand);
+        const operand_ty = self.air.typeOf(reduce.operand);
         const scalar_ty = self.air.typeOfIndex(inst);
 
         // TODO handle the fast math setting
@@ -8742,17 +8808,21 @@ pub const FuncGen = struct {
             .Xor => return self.builder.buildXorReduce(operand),
             .Min => switch (scalar_ty.zigTypeTag()) {
                 .Int => return self.builder.buildIntMinReduce(operand, scalar_ty.isSignedInt()),
-                .Float => return self.builder.buildFPMinReduce(operand),
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
+                    return self.builder.buildFPMinReduce(operand);
+                },
                 else => unreachable,
             },
             .Max => switch (scalar_ty.zigTypeTag()) {
                 .Int => return self.builder.buildIntMaxReduce(operand, scalar_ty.isSignedInt()),
-                .Float => return self.builder.buildFPMaxReduce(operand),
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
+                    return self.builder.buildFPMaxReduce(operand);
+                },
                 else => unreachable,
             },
             .Add => switch (scalar_ty.zigTypeTag()) {
                 .Int => return self.builder.buildAddReduce(operand),
-                .Float => {
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
                     const scalar_llvm_ty = try self.dg.lowerType(scalar_ty);
                     const neutral_value = scalar_llvm_ty.constReal(-0.0);
                     return self.builder.buildFPAddReduce(neutral_value, operand);
@@ -8761,7 +8831,7 @@ pub const FuncGen = struct {
             },
             .Mul => switch (scalar_ty.zigTypeTag()) {
                 .Int => return self.builder.buildMulReduce(operand),
-                .Float => {
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
                     const scalar_llvm_ty = try self.dg.lowerType(scalar_ty);
                     const neutral_value = scalar_llvm_ty.constReal(1.0);
                     return self.builder.buildFPMulReduce(neutral_value, operand);
@@ -8769,6 +8839,44 @@ pub const FuncGen = struct {
                 else => unreachable,
             },
         }
+
+        // Reduction could not be performed with intrinsics.
+        // Use a manual loop over a softfloat call instead.
+        var fn_name_buf: [64]u8 = undefined;
+        const float_bits = scalar_ty.floatBits(target);
+        const fn_name = switch (reduce.operation) {
+            .Min => std.fmt.bufPrintZ(&fn_name_buf, "{s}fmin{s}", .{
+                libcFloatPrefix(float_bits), libcFloatSuffix(float_bits),
+            }) catch unreachable,
+            .Max => std.fmt.bufPrintZ(&fn_name_buf, "{s}fmax{s}", .{
+                libcFloatPrefix(float_bits), libcFloatSuffix(float_bits),
+            }) catch unreachable,
+            .Add => std.fmt.bufPrintZ(&fn_name_buf, "__add{s}f3", .{
+                compilerRtFloatAbbrev(float_bits),
+            }) catch unreachable,
+            .Mul => std.fmt.bufPrintZ(&fn_name_buf, "__mul{s}f3", .{
+                compilerRtFloatAbbrev(float_bits),
+            }) catch unreachable,
+            else => unreachable,
+        };
+        var init_value_payload = Value.Payload.Float_32{
+            .data = switch (reduce.operation) {
+                .Min => std.math.nan(f32),
+                .Max => std.math.nan(f32),
+                .Add => -0.0,
+                .Mul => 1.0,
+                else => unreachable,
+            },
+        };
+
+        const param_llvm_ty = try self.dg.lowerType(scalar_ty);
+        const param_types = [2]*llvm.Type{ param_llvm_ty, param_llvm_ty };
+        const libc_fn = self.getLibcFunction(fn_name, &param_types, param_llvm_ty);
+        const init_value = try self.dg.lowerValue(.{
+            .ty = scalar_ty,
+            .val = Value.initPayload(&init_value_payload.base),
+        });
+        return self.buildReducedCall(libc_fn, operand, operand_ty.vectorLen(), init_value);
     }
 
     fn airAggregateInit(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {

From 6e6ae8886e6885a648918cdb006f899b40b378aa Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Tue, 11 Oct 2022 11:04:29 -0700
Subject: [PATCH 07/14] stage1: Add softfloat support for `@reduce`

---
 src/stage1/analyze.cpp   |   6 +-
 src/stage1/codegen.cpp   | 122 +++++++++++++++++++++++++++++++++------
 src/stage1/softfloat.hpp |  14 +++++
 3 files changed, 123 insertions(+), 19 deletions(-)

diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp
index f0cad841be..2d0624a7c7 100644
--- a/src/stage1/analyze.cpp
+++ b/src/stage1/analyze.cpp
@@ -6358,9 +6358,11 @@ void init_const_float(ZigValue *const_val, ZigType *type, double value) {
                 const_val->data.x_f64 = value;
                 break;
             case 80:
+                zig_double_to_extF80M(value, &const_val->data.x_f80);
+                break;
             case 128:
-                // if we need this, we should add a function that accepts a float128_t param
-                zig_unreachable();
+                zig_double_to_f128M(value, &const_val->data.x_f128);
+                break;
             default:
                 zig_unreachable();
         }
diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index 18e30d416f..5546605b88 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -6481,6 +6481,55 @@ static LLVMValueRef ir_render_cmpxchg(CodeGen *g, Stage1Air *executable, Stage1A
     return result_loc;
 }
 
+static LLVMValueRef ir_render_reduced_call(CodeGen *g, LLVMValueRef llvm_fn, LLVMValueRef operand_vector, size_t vector_len, LLVMValueRef accum_init, ZigType *accum_ty) {
+    LLVMTypeRef llvm_usize_ty = g->builtin_types.entry_usize->llvm_type;
+    LLVMValueRef llvm_vector_len = LLVMConstInt(llvm_usize_ty, vector_len, false);
+    LLVMTypeRef llvm_result_ty = LLVMTypeOf(accum_init);
+
+    // Allocate and initialize our mutable variables
+    LLVMValueRef i_ptr = build_alloca(g, g->builtin_types.entry_usize, "i", 0);
+    LLVMBuildStore(g->builder, LLVMConstInt(llvm_usize_ty, 0, false), i_ptr);
+    LLVMValueRef accum_ptr = build_alloca(g, accum_ty, "accum", 0);
+    LLVMBuildStore(g->builder, accum_init, accum_ptr);
+
+    // Setup the loop
+    LLVMBasicBlockRef loop = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoop");
+    LLVMBasicBlockRef loop_exit = LLVMAppendBasicBlock(g->cur_fn_val, "AfterReduce");
+    LLVMBuildBr(g->builder, loop);
+    {
+        LLVMPositionBuilderAtEnd(g->builder, loop);
+
+        // while (i < vec.len)
+        LLVMValueRef i = LLVMBuildLoad2(g->builder, llvm_usize_ty, i_ptr, "");
+        LLVMValueRef cond = LLVMBuildICmp(g->builder, LLVMIntULT, i, llvm_vector_len, "");
+        LLVMBasicBlockRef loop_then = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoopThen");
+
+        LLVMBuildCondBr(g->builder, cond, loop_then, loop_exit);
+
+        {
+            LLVMPositionBuilderAtEnd(g->builder, loop_then);
+
+            // accum = f(accum, vec[i]);
+            LLVMValueRef accum = LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, "");
+            LLVMValueRef element = LLVMBuildExtractElement(g->builder, operand_vector, i, "");
+            LLVMValueRef params[] {
+                accum,
+                element
+            };
+            LLVMValueRef new_accum = LLVMBuildCall2(g->builder, LLVMGlobalGetValueType(llvm_fn), llvm_fn, params, 2, "");
+            LLVMBuildStore(g->builder, new_accum, accum_ptr);
+
+            // i += 1
+            LLVMValueRef new_i = LLVMBuildAdd(g->builder, i, LLVMConstInt(llvm_usize_ty, 1, false), "");
+            LLVMBuildStore(g->builder, new_i, i_ptr);
+            LLVMBuildBr(g->builder, loop);
+        }
+    }
+
+    LLVMPositionBuilderAtEnd(g->builder, loop_exit);
+    return LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, "");
+}
+
 static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1AirInstReduce *instruction) {
     LLVMValueRef value = ir_llvm_value(g, instruction->value);
 
@@ -6488,61 +6537,100 @@ static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1Ai
     assert(value_type->id == ZigTypeIdVector);
     ZigType *scalar_type = value_type->data.vector.elem_type;
 
+    bool float_intrinsics_allowed = true;
+    const char *compiler_rt_type_abbrev = nullptr;
+    const char *math_float_prefix = nullptr;
+    const char *math_float_suffix = nullptr;
+    if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
+        float_intrinsics_allowed = false;
+        compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(scalar_type);
+        math_float_prefix = libc_float_prefix(g, scalar_type);
+        math_float_suffix = libc_float_suffix(g, scalar_type);
+    }
+
     ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, &instruction->base));
 
-    LLVMValueRef result_val;
+    char fn_name[64];
+    ZigValue *init_value = nullptr;
     switch (instruction->op) {
         case ReduceOp_and:
             assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildAndReduce(g->builder, value);
+            return ZigLLVMBuildAndReduce(g->builder, value);
             break;
         case ReduceOp_or:
             assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildOrReduce(g->builder, value);
+            return ZigLLVMBuildOrReduce(g->builder, value);
             break;
         case ReduceOp_xor:
             assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildXorReduce(g->builder, value);
+            return ZigLLVMBuildXorReduce(g->builder, value);
             break;
         case ReduceOp_min: {
             if (scalar_type->id == ZigTypeIdInt) {
                 const bool is_signed = scalar_type->data.integral.is_signed;
-                result_val = ZigLLVMBuildIntMinReduce(g->builder, value, is_signed);
+                return ZigLLVMBuildIntMinReduce(g->builder, value, is_signed);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                result_val = ZigLLVMBuildFPMinReduce(g->builder, value);
+                if (float_intrinsics_allowed) {
+                    return ZigLLVMBuildFPMinReduce(g->builder, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "%sfmin%s", math_float_prefix, math_float_suffix);
+                    init_value = create_const_float(g, scalar_type, NAN);
+                }
             } else zig_unreachable();
         } break;
         case ReduceOp_max: {
             if (scalar_type->id == ZigTypeIdInt) {
                 const bool is_signed = scalar_type->data.integral.is_signed;
-                result_val = ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed);
+                return ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                result_val = ZigLLVMBuildFPMaxReduce(g->builder, value);
+                if (float_intrinsics_allowed) {
+                    return ZigLLVMBuildFPMaxReduce(g->builder, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "%sfmax%s", math_float_prefix, math_float_suffix);
+                    init_value = create_const_float(g, scalar_type, NAN);
+                }
             } else zig_unreachable();
         } break;
         case ReduceOp_add: {
             if (scalar_type->id == ZigTypeIdInt) {
-                result_val = ZigLLVMBuildAddReduce(g->builder, value);
+                return ZigLLVMBuildAddReduce(g->builder, value);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                LLVMValueRef neutral_value = LLVMConstReal(
-                        get_llvm_type(g, scalar_type), -0.0);
-                result_val = ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value);
+                if (float_intrinsics_allowed) {
+                    LLVMValueRef neutral_value = LLVMConstReal(
+                            get_llvm_type(g, scalar_type), -0.0);
+                    return ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "__add%sf3", compiler_rt_type_abbrev);
+                    init_value = create_const_float(g, scalar_type, 0.0);
+                }
             } else zig_unreachable();
         } break;
         case ReduceOp_mul: {
             if (scalar_type->id == ZigTypeIdInt) {
-                result_val = ZigLLVMBuildMulReduce(g->builder, value);
+                return ZigLLVMBuildMulReduce(g->builder, value);
             } else if (scalar_type->id == ZigTypeIdFloat) {
-                LLVMValueRef neutral_value = LLVMConstReal(
-                        get_llvm_type(g, scalar_type), 1.0);
-                result_val = ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value);
+                if (float_intrinsics_allowed) {
+                    LLVMValueRef neutral_value = LLVMConstReal(
+                            get_llvm_type(g, scalar_type), 1.0);
+                    return ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "__mul%sf3", compiler_rt_type_abbrev);
+                    init_value = create_const_float(g, scalar_type, 1.0);
+                }
             } else zig_unreachable();
         } break;
         default:
             zig_unreachable();
     }
 
-    return result_val;
+
+    LLVMValueRef llvm_init_value = gen_const_val(g, init_value, "");
+    uint32_t vector_len = value_type->data.vector.len;
+    LLVMTypeRef llvm_scalar_type = get_llvm_type(g, scalar_type);
+    const LLVMValueRef llvm_fn = get_soft_float_fn(g, fn_name, 2, llvm_scalar_type, llvm_scalar_type);
+    return ir_render_reduced_call(g, llvm_fn, value, vector_len, llvm_init_value, scalar_type);
 }
 
 static LLVMValueRef ir_render_fence(CodeGen *g, Stage1Air *executable, Stage1AirInstFence *instruction) {
diff --git a/src/stage1/softfloat.hpp b/src/stage1/softfloat.hpp
index a0d270d55f..b9d886d311 100644
--- a/src/stage1/softfloat.hpp
+++ b/src/stage1/softfloat.hpp
@@ -21,6 +21,20 @@ static inline float16_t zig_double_to_f16(double x) {
     return f64_to_f16(y);
 }
 
+static inline void zig_double_to_extF80M(double x, extFloat80_t *result) {
+    float64_t y;
+    static_assert(sizeof(x) == sizeof(y), "");
+    memcpy(&y, &x, sizeof(x));
+    f64_to_extF80M(y, result);
+}
+
+static inline void zig_double_to_f128M(double x, float128_t *result) {
+    float64_t y;
+    static_assert(sizeof(x) == sizeof(y), "");
+    memcpy(&y, &x, sizeof(x));
+    f64_to_f128M(y, result);
+}
+
 
 // Return value is safe to coerce to float even when |x| is NaN or Infinity.
 static inline double zig_f16_to_double(float16_t x) {

From db9058e41a7b11fcf0c1742fc7eac8a67fab3bcf Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Sat, 8 Oct 2022 11:32:32 -0700
Subject: [PATCH 08/14] Disable llvm.prefetch for PowerPC

This instruction is not supported on this backend, so should
just be a noop.
---
 src/codegen/llvm.zig   | 8 +++++++-
 src/stage1/codegen.cpp | 4 ++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 2189bb3127..40953182d9 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -9168,7 +9168,13 @@ pub const FuncGen = struct {
         const target = self.dg.module.getTarget();
         switch (prefetch.cache) {
             .instruction => switch (target.cpu.arch) {
-                .x86_64, .i386 => return null,
+                .x86_64,
+                .i386,
+                .powerpc,
+                .powerpcle,
+                .powerpc64,
+                .powerpc64le,
+                => return null,
                 .arm, .armeb, .thumb, .thumbeb => {
                     switch (prefetch.rw) {
                         .write => return null,
diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index 5546605b88..48f9d20ec4 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -6742,6 +6742,10 @@ static LLVMValueRef ir_render_prefetch(CodeGen *g, Stage1Air *executable, Stage1
         switch (g->zig_target->arch) {
             case ZigLLVM_x86:
             case ZigLLVM_x86_64:
+            case ZigLLVM_ppc:
+            case ZigLLVM_ppcle:
+            case ZigLLVM_ppc64:
+            case ZigLLVM_ppc64le:
                 return nullptr;
             default:
                 break;

From 46fb73071254658fae63539e450afed08a9f1e4b Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Sat, 8 Oct 2022 12:28:02 -0700
Subject: [PATCH 09/14] stage2: Use softfloat for `f16` on MIPS targets

---
 src/codegen/llvm.zig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 40953182d9..aca33529f2 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -10499,6 +10499,10 @@ fn backendSupportsF16(target: std.Target) bool {
         .powerpc64le,
         .wasm32,
         .wasm64,
+        .mips,
+        .mipsel,
+        .mips64,
+        .mips64el,
         => false,
         else => true,
     };

From b8c587eb4068d2fec5303641b7960d1ab885864b Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Tue, 11 Oct 2022 11:11:46 -0700
Subject: [PATCH 10/14] tests: Enable PPC64LE as a test target

---
 lib/std/atomic/Atomic.zig |  4 ++++
 test/tests.zig            | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/lib/std/atomic/Atomic.zig b/lib/std/atomic/Atomic.zig
index 57866d21d6..6d5a0fe4fc 100644
--- a/lib/std/atomic/Atomic.zig
+++ b/lib/std/atomic/Atomic.zig
@@ -374,6 +374,10 @@ const atomic_rmw_orderings = [_]Ordering{
 };
 
 test "Atomic.swap" {
+    // TODO: Re-enable when LLVM is released with a bugfix for isel of
+    //       atomic load (currently fixed on trunk, broken on 15.0.2)
+    if (builtin.cpu.arch == .powerpc64le) return error.SkipZigTest;
+
     inline for (atomic_rmw_orderings) |ordering| {
         var x = Atomic(usize).init(5);
         try testing.expectEqual(x.swap(10, ordering), 5);
diff --git a/test/tests.zig b/test/tests.zig
index 53e58156a4..aef549d4f9 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -315,6 +315,30 @@ const test_targets = blk: {
         //    .link_libc = true,
         //},
 
+        .{
+            .target = .{
+                .cpu_arch = .powerpc64le,
+                .os_tag = .linux,
+                .abi = .none,
+            },
+        },
+        .{
+            .target = .{
+                .cpu_arch = .powerpc64le,
+                .os_tag = .linux,
+                .abi = .musl,
+            },
+            .link_libc = true,
+        },
+        .{
+            .target = .{
+                .cpu_arch = .powerpc64le,
+                .os_tag = .linux,
+                .abi = .gnu,
+            },
+            .link_libc = true,
+        },
+
         .{
             .target = .{
                 .cpu_arch = .riscv64,

From f035437b5dcd054d136f5e823979afb583c40f59 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Thu, 13 Oct 2022 11:31:21 -0700
Subject: [PATCH 11/14] Re-enable Vector f16 tests on Windows

Closes #4952
---
 test/behavior/vector.zig | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig
index 7205c53c46..80fa2021d8 100644
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@@ -506,18 +506,12 @@ test "vector division operators" {
         }
 
         fn doTheTest() !void {
-            // https://github.com/ziglang/zig/issues/4952
-            if (builtin.target.os.tag != .windows) {
-                try doTheTestDiv(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, -1.0, -2.0 });
-            }
+            try doTheTestDiv(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, -1.0, -2.0 });
 
             try doTheTestDiv(f32, [4]f32{ 4.0, -4.0, 4.0, -4.0 }, [4]f32{ 1.0, 2.0, -1.0, -2.0 });
             try doTheTestDiv(f64, [4]f64{ 4.0, -4.0, 4.0, -4.0 }, [4]f64{ 1.0, 2.0, -1.0, -2.0 });
 
-            // https://github.com/ziglang/zig/issues/4952
-            if (builtin.target.os.tag != .windows) {
-                try doTheTestMod(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, 0.5, 3.0 });
-            }
+            try doTheTestMod(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, 0.5, 3.0 });
             try doTheTestMod(f32, [4]f32{ 4.0, -4.0, 4.0, -4.0 }, [4]f32{ 1.0, 2.0, 0.5, 3.0 });
             try doTheTestMod(f64, [4]f64{ 4.0, -4.0, 4.0, -4.0 }, [4]f64{ 1.0, 2.0, 0.5, 3.0 });
 

From 1e278131e39e214684566db2d4fc3c1e16c71475 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Thu, 13 Oct 2022 12:47:47 -0700
Subject: [PATCH 12/14] Re-enable `@mulAdd` f128 tests on aarch64-darwin

Closes #9900
---
 test/behavior/muladd.zig | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig
index 861b786a56..f31b8d3c5c 100644
--- a/test/behavior/muladd.zig
+++ b/test/behavior/muladd.zig
@@ -71,11 +71,6 @@ test "@mulAdd f128" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
 
-    if (builtin.os.tag == .macos and builtin.cpu.arch == .aarch64) {
-        // https://github.com/ziglang/zig/issues/9900
-        return error.SkipZigTest;
-    }
-
     if (builtin.zig_backend == .stage1 and
         builtin.cpu.arch == .i386 and builtin.os.tag == .linux)
     {

From 34863224c388570892d3a0f110138de56a74d04c Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Thu, 13 Oct 2022 12:49:35 -0700
Subject: [PATCH 13/14] Re-enable `mulAdd` f128 tests on i386-linux

---
 test/behavior/muladd.zig | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig
index f31b8d3c5c..1ce5ffb1e7 100644
--- a/test/behavior/muladd.zig
+++ b/test/behavior/muladd.zig
@@ -71,12 +71,6 @@ test "@mulAdd f128" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
 
-    if (builtin.zig_backend == .stage1 and
-        builtin.cpu.arch == .i386 and builtin.os.tag == .linux)
-    {
-        return error.SkipZigTest;
-    }
-
     comptime try testMulAdd128();
     try testMulAdd128();
 }

From a168893e0097093665154c7897b7f909cec855a1 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Fri, 14 Oct 2022 12:58:37 -0700
Subject: [PATCH 14/14] stage1: Fix ppcle `long double` size

Resolves #13110
---
 src/stage1/codegen.cpp | 1 +
 src/stage1/target.cpp  | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index 48f9d20ec4..e5c694967c 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -9936,6 +9936,7 @@ static void define_builtin_types(CodeGen *g) {
             add_fp_entry(g, "c_longdouble", 128, LLVMFP128Type(), &g->builtin_types.entry_c_longdouble);
             break;
         case ZigLLVM_ppc:
+        case ZigLLVM_ppcle:
         case ZigLLVM_ppc64:
         case ZigLLVM_ppc64le:
             add_fp_entry(g, "c_longdouble", 128, LLVMFP128Type(), &g->builtin_types.entry_c_longdouble);
diff --git a/src/stage1/target.cpp b/src/stage1/target.cpp
index 3031b7e588..dfd91bed8a 100644
--- a/src/stage1/target.cpp
+++ b/src/stage1/target.cpp
@@ -950,7 +950,6 @@ bool target_is_arm(const ZigTarget *target) {
         case ZigLLVM_msp430:
         case ZigLLVM_nvptx:
         case ZigLLVM_nvptx64:
-        case ZigLLVM_ppc64le:
         case ZigLLVM_r600:
         case ZigLLVM_renderscript32:
         case ZigLLVM_renderscript64:
@@ -971,6 +970,7 @@ bool target_is_arm(const ZigTarget *target) {
         case ZigLLVM_ppc:
         case ZigLLVM_ppcle:
         case ZigLLVM_ppc64:
+        case ZigLLVM_ppc64le:
         case ZigLLVM_ve:
         case ZigLLVM_spirv32:
         case ZigLLVM_spirv64:
@@ -1125,8 +1125,8 @@ bool target_is_mips(const ZigTarget *target) {
 }
 
 bool target_is_ppc(const ZigTarget *target) {
-    return target->arch == ZigLLVM_ppc || target->arch == ZigLLVM_ppc64 ||
-        target->arch == ZigLLVM_ppc64le;
+    return target->arch == ZigLLVM_ppc || target->arch == ZigLLVM_ppcle ||
+        target->arch == ZigLLVM_ppc64 || target->arch == ZigLLVM_ppc64le;
 }
 
 // Returns the minimum alignment for every function pointer on the given