Merge pull request #13100 from topolarity/powerpc64le

stage2: Fix softfloat support for PPC64(LE)
2026-02-21 16:54:52 +00:00 · 2022-10-15 10:05:00 -04:00 · 2022-10-15 10:05:00 -04:00 · b4e3424594
commit b4e3424594
parent 8bb2e96ac3 a168893e00
35 changed files with 601 additions and 201 deletions
--- a/deps/SoftFloat-3e-prebuilt/platform.h
+++ b/deps/SoftFloat-3e-prebuilt/platform.h
@ -3,6 +3,10 @@

 #if defined(__BIG_ENDIAN__)
 #define BIGENDIAN 1
+#elif defined(_BIG_ENDIAN) && (_BIG_ENDIAN == 1)
+#define BIGENDIAN 1
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define BIGENDIAN 1
 #elif defined(__ARMEB__)
 #define BIGENDIAN 1
 #elif defined(__THUMBEB__)
@ -15,18 +19,12 @@
 #define BIGENDIAN 1
 #elif defined(__MIPSEB__)
 #define BIGENDIAN 1
-#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#define BIGENDIAN 1
 #elif defined(__sparc)
 #define BIGENDIAN 1
 #elif defined(__sparc__)
 #define BIGENDIAN 1
 #elif defined(_POWER)
 #define BIGENDIAN 1
-#elif defined(__powerpc__)
-#define BIGENDIAN 1
-#elif defined(__ppc__)
-#define BIGENDIAN 1
 #elif defined(__hpux)
 #define BIGENDIAN 1
 #elif defined(__hppa)
@ -39,6 +37,10 @@

 #if defined(__LITTLE_ENDIAN__)
 #define LITTLEENDIAN 1
+#elif defined(_LITTLE_ENDIAN) && (_LITTLE_ENDIAN == 1)
+#define LITTLEENDIAN 1
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define LITTLEENDIAN 1
 #elif defined(__ARMEL__)
 #define LITTLEENDIAN 1
 #elif defined(__THUMBEL__)
@ -51,8 +53,6 @@
 #define LITTLEENDIAN 1
 #elif defined(__MIPSEL__)
 #define LITTLEENDIAN 1
-#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define LITTLEENDIAN 1
 #elif defined(__i386__)
 #define LITTLEENDIAN 1
 #elif defined(__alpha__)
--- a/lib/compiler_rt.zig
+++ b/lib/compiler_rt.zig
@ -4,17 +4,20 @@ comptime {
    _ = @import("compiler_rt/atomics.zig");

    _ = @import("compiler_rt/addf3.zig");
+    _ = @import("compiler_rt/addhf3.zig");
    _ = @import("compiler_rt/addsf3.zig");
    _ = @import("compiler_rt/adddf3.zig");
    _ = @import("compiler_rt/addtf3.zig");
    _ = @import("compiler_rt/addxf3.zig");

+    _ = @import("compiler_rt/subhf3.zig");
    _ = @import("compiler_rt/subsf3.zig");
    _ = @import("compiler_rt/subdf3.zig");
    _ = @import("compiler_rt/subtf3.zig");
    _ = @import("compiler_rt/subxf3.zig");

    _ = @import("compiler_rt/mulf3.zig");
+    _ = @import("compiler_rt/mulhf3.zig");
    _ = @import("compiler_rt/mulsf3.zig");
    _ = @import("compiler_rt/muldf3.zig");
    _ = @import("compiler_rt/multf3.zig");
@ -34,51 +37,58 @@ comptime {
    _ = @import("compiler_rt/divxc3.zig");
    _ = @import("compiler_rt/divtc3.zig");

+    _ = @import("compiler_rt/neghf2.zig");
    _ = @import("compiler_rt/negsf2.zig");
    _ = @import("compiler_rt/negdf2.zig");
    _ = @import("compiler_rt/negtf2.zig");
    _ = @import("compiler_rt/negxf2.zig");

    _ = @import("compiler_rt/comparef.zig");
+    _ = @import("compiler_rt/cmphf2.zig");
    _ = @import("compiler_rt/cmpsf2.zig");
    _ = @import("compiler_rt/cmpdf2.zig");
    _ = @import("compiler_rt/cmptf2.zig");
    _ = @import("compiler_rt/cmpxf2.zig");
+    _ = @import("compiler_rt/gehf2.zig");
    _ = @import("compiler_rt/gesf2.zig");
    _ = @import("compiler_rt/gedf2.zig");
-    _ = @import("compiler_rt/getf2.zig");
    _ = @import("compiler_rt/gexf2.zig");
+    _ = @import("compiler_rt/getf2.zig");
+    _ = @import("compiler_rt/unordhf2.zig");
    _ = @import("compiler_rt/unordsf2.zig");
    _ = @import("compiler_rt/unorddf2.zig");
+    _ = @import("compiler_rt/unordxf2.zig");
    _ = @import("compiler_rt/unordtf2.zig");

    _ = @import("compiler_rt/extendf.zig");
-    _ = @import("compiler_rt/extenddftf2.zig");
-    _ = @import("compiler_rt/extenddfxf2.zig");
    _ = @import("compiler_rt/extendhfsf2.zig");
+    _ = @import("compiler_rt/extendhfdf2.zig");
    _ = @import("compiler_rt/extendhftf2.zig");
    _ = @import("compiler_rt/extendhfxf2.zig");
    _ = @import("compiler_rt/extendsfdf2.zig");
    _ = @import("compiler_rt/extendsftf2.zig");
    _ = @import("compiler_rt/extendsfxf2.zig");
+    _ = @import("compiler_rt/extenddftf2.zig");
+    _ = @import("compiler_rt/extenddfxf2.zig");
    _ = @import("compiler_rt/extendxftf2.zig");

    _ = @import("compiler_rt/truncf.zig");
    _ = @import("compiler_rt/truncsfhf2.zig");
    _ = @import("compiler_rt/truncdfhf2.zig");
    _ = @import("compiler_rt/truncdfsf2.zig");
+    _ = @import("compiler_rt/truncxfhf2.zig");
+    _ = @import("compiler_rt/truncxfsf2.zig");
+    _ = @import("compiler_rt/truncxfdf2.zig");
    _ = @import("compiler_rt/trunctfhf2.zig");
    _ = @import("compiler_rt/trunctfsf2.zig");
    _ = @import("compiler_rt/trunctfdf2.zig");
    _ = @import("compiler_rt/trunctfxf2.zig");
-    _ = @import("compiler_rt/truncxfhf2.zig");
-    _ = @import("compiler_rt/truncxfsf2.zig");
-    _ = @import("compiler_rt/truncxfdf2.zig");

-    _ = @import("compiler_rt/divtf3.zig");
+    _ = @import("compiler_rt/divhf3.zig");
    _ = @import("compiler_rt/divsf3.zig");
    _ = @import("compiler_rt/divdf3.zig");
    _ = @import("compiler_rt/divxf3.zig");
+    _ = @import("compiler_rt/divtf3.zig");
    _ = @import("compiler_rt/sin.zig");
    _ = @import("compiler_rt/cos.zig");
    _ = @import("compiler_rt/sincos.zig");
--- a/lib/compiler_rt/addhf3.zig
+++ b/lib/compiler_rt/addhf3.zig
@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const addf3 = @import("./addf3.zig").addf3;
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__addhf3, .{ .name = "__addhf3", .linkage = common.linkage });
+}
+
+fn __addhf3(a: f16, b: f16) callconv(.C) f16 {
+    return addf3(f16, a, b);
+}
--- a/lib/compiler_rt/cmphf2.zig
+++ b/lib/compiler_rt/cmphf2.zig
@ -0,0 +1,50 @@
+///! The quoted behavior definitions are from
+///! https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gccint/Soft-float-library-routines.html#Soft-float-library-routines
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__eqhf2, .{ .name = "__eqhf2", .linkage = common.linkage });
+    @export(__nehf2, .{ .name = "__nehf2", .linkage = common.linkage });
+    @export(__lehf2, .{ .name = "__lehf2", .linkage = common.linkage });
+    @export(__cmphf2, .{ .name = "__cmphf2", .linkage = common.linkage });
+    @export(__lthf2, .{ .name = "__lthf2", .linkage = common.linkage });
+}
+
+/// "These functions calculate a <=> b. That is, if a is less than b, they return -1;
+/// if a is greater than b, they return 1; and if a and b are equal they return 0.
+/// If either argument is NaN they return 1..."
+///
+/// Note that this matches the definition of `__lehf2`, `__eqhf2`, `__nehf2`, `__cmphf2`,
+/// and `__lthf2`.
+fn __cmphf2(a: f16, b: f16) callconv(.C) i32 {
+    return @enumToInt(comparef.cmpf2(f16, comparef.LE, a, b));
+}
+
+/// "These functions return a value less than or equal to zero if neither argument is NaN,
+/// and a is less than or equal to b."
+pub fn __lehf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
+
+/// "These functions return zero if neither argument is NaN, and a and b are equal."
+/// Note that due to some kind of historical accident, __eqhf2 and __nehf2 are defined
+/// to have the same return value.
+pub fn __eqhf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
+
+/// "These functions return a nonzero value if either argument is NaN, or if a and b are unequal."
+/// Note that due to some kind of historical accident, __eqhf2 and __nehf2 are defined
+/// to have the same return value.
+pub fn __nehf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
+
+/// "These functions return a value less than zero if neither argument is NaN, and a
+/// is strictly less than b."
+pub fn __lthf2(a: f16, b: f16) callconv(.C) i32 {
+    return __cmphf2(a, b);
+}
--- a/lib/compiler_rt/divhf3.zig
+++ b/lib/compiler_rt/divhf3.zig
@ -0,0 +1,11 @@
+const common = @import("common.zig");
+const divsf3 = @import("./divsf3.zig");
+
+comptime {
+    @export(__divhf3, .{ .name = "__divhf3", .linkage = common.linkage });
+}
+
+pub fn __divhf3(a: f16, b: f16) callconv(.C) f16 {
+    // TODO: more efficient implementation
+    return @floatCast(f16, divsf3.__divsf3(a, b));
+}
--- a/lib/compiler_rt/extendhfdf2.zig
+++ b/lib/compiler_rt/extendhfdf2.zig
@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const extendf = @import("./extendf.zig").extendf;
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__extendhfdf2, .{ .name = "__extendhfdf2", .linkage = common.linkage });
+}
+
+pub fn __extendhfdf2(a: common.F16T) callconv(.C) f64 {
+    return extendf(f64, f16, @bitCast(u16, a));
+}
--- a/lib/compiler_rt/extendhfsf2.zig
+++ b/lib/compiler_rt/extendhfsf2.zig
@ -5,22 +5,17 @@ pub const panic = common.panic;

 comptime {
    if (common.gnu_f16_abi) {
-        @export(__gnu_h2f_ieee, .{ .name = "__gnu_h2f_ieee", .linkage = common.linkage });
+        @export(__extendhfsf2, .{ .name = "__gnu_h2f_ieee", .linkage = common.linkage });
    } else if (common.want_aeabi) {
        @export(__aeabi_h2f, .{ .name = "__aeabi_h2f", .linkage = common.linkage });
-    } else {
-        @export(__extendhfsf2, .{ .name = "__extendhfsf2", .linkage = common.linkage });
    }
+    @export(__extendhfsf2, .{ .name = "__extendhfsf2", .linkage = common.linkage });
 }

 pub fn __extendhfsf2(a: common.F16T) callconv(.C) f32 {
    return extendf(f32, f16, @bitCast(u16, a));
 }

-fn __gnu_h2f_ieee(a: common.F16T) callconv(.C) f32 {
-    return extendf(f32, f16, @bitCast(u16, a));
-}
-
 fn __aeabi_h2f(a: u16) callconv(.AAPCS) f32 {
    return extendf(f32, f16, @bitCast(u16, a));
 }
--- a/lib/compiler_rt/gehf2.zig
+++ b/lib/compiler_rt/gehf2.zig
@ -0,0 +1,31 @@
+///! The quoted behavior definitions are from
+///! https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gccint/Soft-float-library-routines.html#Soft-float-library-routines
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__gehf2, .{ .name = "__gehf2", .linkage = common.linkage });
+    @export(__gthf2, .{ .name = "__gthf2", .linkage = common.linkage });
+}
+
+/// "These functions return a value greater than or equal to zero if neither
+/// argument is NaN, and a is greater than or equal to b."
+pub fn __gehf2(a: f16, b: f16) callconv(.C) i32 {
+    return @enumToInt(comparef.cmpf2(f16, comparef.GE, a, b));
+}
+
+/// "These functions return a value greater than zero if neither argument is NaN,
+/// and a is strictly greater than b."
+pub fn __gthf2(a: f16, b: f16) callconv(.C) i32 {
+    return __gehf2(a, b);
+}
+
+fn __aeabi_fcmpge(a: f16, b: f16) callconv(.AAPCS) i32 {
+    return @boolToInt(comparef.cmpf2(f16, comparef.GE, a, b) != .Less);
+}
+
+fn __aeabi_fcmpgt(a: f16, b: f16) callconv(.AAPCS) i32 {
+    return @boolToInt(comparef.cmpf2(f16, comparef.LE, a, b) == .Greater);
+}
--- a/lib/compiler_rt/mulf3.zig
+++ b/lib/compiler_rt/mulf3.zig
@ -32,8 +32,9 @@ pub inline fn mulf3(comptime T: type, a: T, b: T) T {
    const infRep = @bitCast(Z, math.inf(T));
    const minNormalRep = @bitCast(Z, math.floatMin(T));

-    const aExponent = @truncate(u32, (@bitCast(Z, a) >> significandBits) & maxExponent);
-    const bExponent = @truncate(u32, (@bitCast(Z, b) >> significandBits) & maxExponent);
+    const ZExp = if (typeWidth >= 32) u32 else Z;
+    const aExponent = @truncate(ZExp, (@bitCast(Z, a) >> significandBits) & maxExponent);
+    const bExponent = @truncate(ZExp, (@bitCast(Z, b) >> significandBits) & maxExponent);
    const productSign: Z = (@bitCast(Z, a) ^ @bitCast(Z, b)) & signBit;

    var aSignificand: ZSignificand = @intCast(ZSignificand, @bitCast(Z, a) & significandMask);
--- a/lib/compiler_rt/mulhf3.zig
+++ b/lib/compiler_rt/mulhf3.zig
@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const mulf3 = @import("./mulf3.zig").mulf3;
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__mulhf3, .{ .name = "__mulhf3", .linkage = common.linkage });
+}
+
+pub fn __mulhf3(a: f16, b: f16) callconv(.C) f16 {
+    return mulf3(f16, a, b);
+}
--- a/lib/compiler_rt/neghf2.zig
+++ b/lib/compiler_rt/neghf2.zig
@ -0,0 +1,11 @@
+const common = @import("./common.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__neghf2, .{ .name = "__neghf2", .linkage = common.linkage });
+}
+
+fn __neghf2(a: f16) callconv(.C) f16 {
+    return common.fneg(a);
+}
--- a/lib/compiler_rt/subhf3.zig
+++ b/lib/compiler_rt/subhf3.zig
@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__subhf3, .{ .name = "__subhf3", .linkage = common.linkage });
+}
+
+fn __subhf3(a: f16, b: f16) callconv(.C) f16 {
+    const neg_b = @bitCast(f16, @bitCast(u16, b) ^ (@as(u16, 1) << 15));
+    return a + neg_b;
+}
--- a/lib/compiler_rt/tan.zig
+++ b/lib/compiler_rt/tan.zig
@ -24,8 +24,10 @@ comptime {
    @export(tanf, .{ .name = "tanf", .linkage = common.linkage });
    @export(tan, .{ .name = "tan", .linkage = common.linkage });
    @export(__tanx, .{ .name = "__tanx", .linkage = common.linkage });
-    const tanq_sym_name = if (common.want_ppc_abi) "tanf128" else "tanq";
-    @export(tanq, .{ .name = tanq_sym_name, .linkage = common.linkage });
+    if (common.want_ppc_abi) {
+        @export(tanq, .{ .name = "tanf128", .linkage = common.linkage });
+    }
+    @export(tanq, .{ .name = "tanq", .linkage = common.linkage });
    @export(tanl, .{ .name = "tanl", .linkage = common.linkage });
 }

--- a/lib/compiler_rt/truncsfhf2.zig
+++ b/lib/compiler_rt/truncsfhf2.zig
@ -5,22 +5,17 @@ pub const panic = common.panic;

 comptime {
    if (common.gnu_f16_abi) {
-        @export(__gnu_f2h_ieee, .{ .name = "__gnu_f2h_ieee", .linkage = common.linkage });
+        @export(__truncsfhf2, .{ .name = "__gnu_f2h_ieee", .linkage = common.linkage });
    } else if (common.want_aeabi) {
        @export(__aeabi_f2h, .{ .name = "__aeabi_f2h", .linkage = common.linkage });
-    } else {
-        @export(__truncsfhf2, .{ .name = "__truncsfhf2", .linkage = common.linkage });
    }
+    @export(__truncsfhf2, .{ .name = "__truncsfhf2", .linkage = common.linkage });
 }

 pub fn __truncsfhf2(a: f32) callconv(.C) common.F16T {
    return @bitCast(common.F16T, truncf(f16, f32, a));
 }

-fn __gnu_f2h_ieee(a: f32) callconv(.C) common.F16T {
-    return @bitCast(common.F16T, truncf(f16, f32, a));
-}
-
 fn __aeabi_f2h(a: f32) callconv(.AAPCS) u16 {
    return @bitCast(common.F16T, truncf(f16, f32, a));
 }
--- a/lib/compiler_rt/unordhf2.zig
+++ b/lib/compiler_rt/unordhf2.zig
@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__unordhf2, .{ .name = "__unordhf2", .linkage = common.linkage });
+}
+
+pub fn __unordhf2(a: f16, b: f16) callconv(.C) i32 {
+    return comparef.unordcmp(f16, a, b);
+}
--- a/lib/compiler_rt/unordxf2.zig
+++ b/lib/compiler_rt/unordxf2.zig
@ -0,0 +1,12 @@
+const common = @import("./common.zig");
+const comparef = @import("./comparef.zig");
+
+pub const panic = common.panic;
+
+comptime {
+    @export(__unordxf2, .{ .name = "__unordxf2", .linkage = common.linkage });
+}
+
+pub fn __unordxf2(a: f80, b: f80) callconv(.C) i32 {
+    return comparef.unordcmp(f80, a, b);
+}
--- a/lib/std/atomic/Atomic.zig
+++ b/lib/std/atomic/Atomic.zig
@ -374,6 +374,10 @@ const atomic_rmw_orderings = [_]Ordering{
 };

 test "Atomic.swap" {
+    // TODO: Re-enable when LLVM is released with a bugfix for isel of
+    //       atomic load (currently fixed on trunk, broken on 15.0.2)
+    if (builtin.cpu.arch == .powerpc64le) return error.SkipZigTest;
+
    inline for (atomic_rmw_orderings) |ordering| {
        var x = Atomic(usize).init(5);
        try testing.expectEqual(x.swap(10, ordering), 5);
--- a/lib/std/target.zig
+++ b/lib/std/target.zig
@ -1789,6 +1789,8 @@ pub const Target = struct {
                .powerpcle,
                .powerpc64,
                .powerpc64le,
+                .wasm32,
+                .wasm64,
                => true,

                else => false,
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@ -2738,7 +2738,7 @@ pub const DeclGen = struct {
                return dg.context.intType(bit_count);
            },
            .Float => switch (t.floatBits(target)) {
-                16 => return dg.context.halfType(),
+                16 => return if (backendSupportsF16(target)) dg.context.halfType() else dg.context.intType(16),
                32 => return dg.context.floatType(),
                64 => return dg.context.doubleType(),
                80 => return if (backendSupportsF80(target)) dg.context.x86FP80Type() else dg.context.intType(80),
@ -3253,7 +3253,15 @@ pub const DeclGen = struct {
            .Float => {
                const llvm_ty = try dg.lowerType(tv.ty);
                switch (tv.ty.floatBits(target)) {
-                    16, 32, 64 => return llvm_ty.constReal(tv.val.toFloat(f64)),
+                    16 => if (intrinsicsAllowed(tv.ty, target)) {
+                        return llvm_ty.constReal(tv.val.toFloat(f16));
+                    } else {
+                        const repr = @bitCast(u16, tv.val.toFloat(f16));
+                        const llvm_i16 = dg.context.intType(16);
+                        const int = llvm_i16.constInt(repr, .False);
+                        return int.constBitCast(llvm_ty);
+                    },
+                    32, 64 => return llvm_ty.constReal(tv.val.toFloat(f64)),
                    80 => {
                        const float = tv.val.toFloat(f80);
                        const repr = std.math.break_f80(float);
@ -7611,11 +7619,25 @@ pub const FuncGen = struct {
        const target = self.dg.module.getTarget();
        const dest_bits = dest_ty.floatBits(target);
        const src_bits = operand_ty.floatBits(target);
-        if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
-            return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+
+        if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+            return self.builder.buildFPTrunc(operand, dest_llvm_ty, "");
+        } else {
+            const operand_llvm_ty = try self.dg.lowerType(operand_ty);
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+
+            var fn_name_buf: [64]u8 = undefined;
+            const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__trunc{s}f{s}f2", .{
+                compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
+            }) catch unreachable;
+
+            const params = [1]*llvm.Value{operand};
+            const param_types = [1]*llvm.Type{operand_llvm_ty};
+            const llvm_fn = self.getLibcFunction(fn_name, &param_types, dest_llvm_ty);
+
+            return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params, params.len, .C, .Auto, "");
        }
-        const dest_llvm_ty = try self.dg.lowerType(dest_ty);
-        return self.builder.buildFPTrunc(operand, dest_llvm_ty, "");
    }

    fn airFpext(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
@ -7629,11 +7651,25 @@ pub const FuncGen = struct {
        const target = self.dg.module.getTarget();
        const dest_bits = dest_ty.floatBits(target);
        const src_bits = operand_ty.floatBits(target);
-        if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
-            return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+
+        if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+            return self.builder.buildFPExt(operand, dest_llvm_ty, "");
+        } else {
+            const operand_llvm_ty = try self.dg.lowerType(operand_ty);
+            const dest_llvm_ty = try self.dg.lowerType(dest_ty);
+
+            var fn_name_buf: [64]u8 = undefined;
+            const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__extend{s}f{s}f2", .{
+                compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
+            }) catch unreachable;
+
+            const params = [1]*llvm.Value{operand};
+            const param_types = [1]*llvm.Type{operand_llvm_ty};
+            const llvm_fn = self.getLibcFunction(fn_name, &param_types, dest_llvm_ty);
+
+            return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params, params.len, .C, .Auto, "");
        }
-        const dest_llvm_ty = try self.dg.lowerType(self.air.typeOfIndex(inst));
-        return self.builder.buildFPExt(operand, dest_llvm_ty, "");
    }

    fn airPtrToInt(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
@ -8717,12 +8753,78 @@ pub const FuncGen = struct {
        return self.builder.buildShuffleVector(a, b, llvm_mask_value, "");
    }

+    /// Reduce a vector by repeatedly applying `llvm_fn` to produce an accumulated result.
+    ///
+    /// Equivalent to:
+    ///   reduce: {
+    ///     var i: usize = 0;
+    ///     var accum: T = init;
+    ///     while (i < vec.len) : (i += 1) {
+    ///       accum = llvm_fn(accum, vec[i]);
+    ///     }
+    ///     break :reduce accum;
+    ///   }
+    ///
+    fn buildReducedCall(
+        self: *FuncGen,
+        llvm_fn: *llvm.Value,
+        operand_vector: *llvm.Value,
+        vector_len: usize,
+        accum_init: *llvm.Value,
+    ) !*llvm.Value {
+        const llvm_usize_ty = try self.dg.lowerType(Type.usize);
+        const llvm_vector_len = llvm_usize_ty.constInt(vector_len, .False);
+        const llvm_result_ty = accum_init.typeOf();
+
+        // Allocate and initialize our mutable variables
+        const i_ptr = self.buildAlloca(llvm_usize_ty);
+        _ = self.builder.buildStore(llvm_usize_ty.constInt(0, .False), i_ptr);
+        const accum_ptr = self.buildAlloca(llvm_result_ty);
+        _ = self.builder.buildStore(accum_init, accum_ptr);
+
+        // Setup the loop
+        const loop = self.context.appendBasicBlock(self.llvm_func, "ReduceLoop");
+        const loop_exit = self.context.appendBasicBlock(self.llvm_func, "AfterReduce");
+        _ = self.builder.buildBr(loop);
+        {
+            self.builder.positionBuilderAtEnd(loop);
+
+            // while (i < vec.len)
+            const i = self.builder.buildLoad(llvm_usize_ty, i_ptr, "");
+            const cond = self.builder.buildICmp(.ULT, i, llvm_vector_len, "");
+            const loop_then = self.context.appendBasicBlock(self.llvm_func, "ReduceLoopThen");
+
+            _ = self.builder.buildCondBr(cond, loop_then, loop_exit);
+
+            {
+                self.builder.positionBuilderAtEnd(loop_then);
+
+                // accum = f(accum, vec[i]);
+                const accum = self.builder.buildLoad(llvm_result_ty, accum_ptr, "");
+                const element = self.builder.buildExtractElement(operand_vector, i, "");
+                const params = [2]*llvm.Value{ accum, element };
+                const new_accum = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params, params.len, .C, .Auto, "");
+                _ = self.builder.buildStore(new_accum, accum_ptr);
+
+                // i += 1
+                const new_i = self.builder.buildAdd(i, llvm_usize_ty.constInt(1, .False), "");
+                _ = self.builder.buildStore(new_i, i_ptr);
+                _ = self.builder.buildBr(loop);
+            }
+        }
+
+        self.builder.positionBuilderAtEnd(loop_exit);
+        return self.builder.buildLoad(llvm_result_ty, accum_ptr, "");
+    }
+
    fn airReduce(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
+        const target = self.dg.module.getTarget();

        const reduce = self.air.instructions.items(.data)[inst].reduce;
-        const operand = try self.resolveInst(reduce.operand);
+        var operand = try self.resolveInst(reduce.operand);
+        const operand_ty = self.air.typeOf(reduce.operand);
        const scalar_ty = self.air.typeOfIndex(inst);

        // TODO handle the fast math setting
@ -8733,17 +8835,21 @@ pub const FuncGen = struct {
            .Xor => return self.builder.buildXorReduce(operand),
            .Min => switch (scalar_ty.zigTypeTag()) {
                .Int => return self.builder.buildIntMinReduce(operand, scalar_ty.isSignedInt()),
-                .Float => return self.builder.buildFPMinReduce(operand),
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
+                    return self.builder.buildFPMinReduce(operand);
+                },
                else => unreachable,
            },
            .Max => switch (scalar_ty.zigTypeTag()) {
                .Int => return self.builder.buildIntMaxReduce(operand, scalar_ty.isSignedInt()),
-                .Float => return self.builder.buildFPMaxReduce(operand),
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
+                    return self.builder.buildFPMaxReduce(operand);
+                },
                else => unreachable,
            },
            .Add => switch (scalar_ty.zigTypeTag()) {
                .Int => return self.builder.buildAddReduce(operand),
-                .Float => {
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
                    const scalar_llvm_ty = try self.dg.lowerType(scalar_ty);
                    const neutral_value = scalar_llvm_ty.constReal(-0.0);
                    return self.builder.buildFPAddReduce(neutral_value, operand);
@ -8752,7 +8858,7 @@ pub const FuncGen = struct {
            },
            .Mul => switch (scalar_ty.zigTypeTag()) {
                .Int => return self.builder.buildMulReduce(operand),
-                .Float => {
+                .Float => if (intrinsicsAllowed(scalar_ty, target)) {
                    const scalar_llvm_ty = try self.dg.lowerType(scalar_ty);
                    const neutral_value = scalar_llvm_ty.constReal(1.0);
                    return self.builder.buildFPMulReduce(neutral_value, operand);
@ -8760,6 +8866,44 @@ pub const FuncGen = struct {
                else => unreachable,
            },
        }
+
+        // Reduction could not be performed with intrinsics.
+        // Use a manual loop over a softfloat call instead.
+        var fn_name_buf: [64]u8 = undefined;
+        const float_bits = scalar_ty.floatBits(target);
+        const fn_name = switch (reduce.operation) {
+            .Min => std.fmt.bufPrintZ(&fn_name_buf, "{s}fmin{s}", .{
+                libcFloatPrefix(float_bits), libcFloatSuffix(float_bits),
+            }) catch unreachable,
+            .Max => std.fmt.bufPrintZ(&fn_name_buf, "{s}fmax{s}", .{
+                libcFloatPrefix(float_bits), libcFloatSuffix(float_bits),
+            }) catch unreachable,
+            .Add => std.fmt.bufPrintZ(&fn_name_buf, "__add{s}f3", .{
+                compilerRtFloatAbbrev(float_bits),
+            }) catch unreachable,
+            .Mul => std.fmt.bufPrintZ(&fn_name_buf, "__mul{s}f3", .{
+                compilerRtFloatAbbrev(float_bits),
+            }) catch unreachable,
+            else => unreachable,
+        };
+        var init_value_payload = Value.Payload.Float_32{
+            .data = switch (reduce.operation) {
+                .Min => std.math.nan(f32),
+                .Max => std.math.nan(f32),
+                .Add => -0.0,
+                .Mul => 1.0,
+                else => unreachable,
+            },
+        };
+
+        const param_llvm_ty = try self.dg.lowerType(scalar_ty);
+        const param_types = [2]*llvm.Type{ param_llvm_ty, param_llvm_ty };
+        const libc_fn = self.getLibcFunction(fn_name, &param_types, param_llvm_ty);
+        const init_value = try self.dg.lowerValue(.{
+            .ty = scalar_ty,
+            .val = Value.initPayload(&init_value_payload.base),
+        });
+        return self.buildReducedCall(libc_fn, operand, operand_ty.vectorLen(), init_value);
    }

    fn airAggregateInit(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
@ -9051,7 +9195,13 @@ pub const FuncGen = struct {
        const target = self.dg.module.getTarget();
        switch (prefetch.cache) {
            .instruction => switch (target.cpu.arch) {
-                .x86_64, .i386 => return null,
+                .x86_64,
+                .i386,
+                .powerpc,
+                .powerpcle,
+                .powerpc64,
+                .powerpc64le,
+                => return null,
                .arm, .armeb, .thumb, .thumbeb => {
                    switch (prefetch.rw) {
                        .write => return null,
@ -9091,87 +9241,6 @@ pub const FuncGen = struct {
        return null;
    }

-    fn softF80TruncOrExt(
-        self: *FuncGen,
-        operand: *llvm.Value,
-        src_bits: u16,
-        dest_bits: u16,
-    ) !?*llvm.Value {
-        const target = self.dg.module.getTarget();
-
-        var param_llvm_ty: *llvm.Type = self.context.intType(80);
-        var ret_llvm_ty: *llvm.Type = param_llvm_ty;
-        var fn_name: [*:0]const u8 = undefined;
-        var arg = operand;
-        var final_cast: ?*llvm.Type = null;
-
-        assert(src_bits == 80 or dest_bits == 80);
-
-        if (src_bits == 80) switch (dest_bits) {
-            16 => {
-                // See corresponding condition at definition of
-                // __truncxfhf2 in compiler-rt.
-                if (target.cpu.arch.isAARCH64()) {
-                    ret_llvm_ty = self.context.halfType();
-                } else {
-                    ret_llvm_ty = self.context.intType(16);
-                    final_cast = self.context.halfType();
-                }
-                fn_name = "__truncxfhf2";
-            },
-            32 => {
-                ret_llvm_ty = self.context.floatType();
-                fn_name = "__truncxfsf2";
-            },
-            64 => {
-                ret_llvm_ty = self.context.doubleType();
-                fn_name = "__truncxfdf2";
-            },
-            80 => return operand,
-            128 => {
-                ret_llvm_ty = self.context.fp128Type();
-                fn_name = "__extendxftf2";
-            },
-            else => unreachable,
-        } else switch (src_bits) {
-            16 => {
-                // See corresponding condition at definition of
-                // __extendhfxf2 in compiler-rt.
-                param_llvm_ty = if (target.cpu.arch.isAARCH64())
-                    self.context.halfType()
-                else
-                    self.context.intType(16);
-                arg = self.builder.buildBitCast(arg, param_llvm_ty, "");
-                fn_name = "__extendhfxf2";
-            },
-            32 => {
-                param_llvm_ty = self.context.floatType();
-                fn_name = "__extendsfxf2";
-            },
-            64 => {
-                param_llvm_ty = self.context.doubleType();
-                fn_name = "__extenddfxf2";
-            },
-            80 => return operand,
-            128 => {
-                param_llvm_ty = self.context.fp128Type();
-                fn_name = "__trunctfxf2";
-            },
-            else => unreachable,
-        }
-
-        const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: {
-            const param_types = [_]*llvm.Type{param_llvm_ty};
-            const fn_type = llvm.functionType(ret_llvm_ty, &param_types, param_types.len, .False);
-            break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type);
-        };
-
-        var args: [1]*llvm.Value = .{arg};
-        const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .C, .Auto, "");
-        const final_cast_llvm_ty = final_cast orelse return result;
-        return self.builder.buildBitCast(result, final_cast_llvm_ty, "");
-    }
-
    fn getErrorNameTable(self: *FuncGen) !*llvm.Value {
        if (self.dg.object.error_name_table) |table| {
            return table;
@ -10451,6 +10520,17 @@ fn backendSupportsF80(target: std.Target) bool {
 /// if it produces miscompilations.
 fn backendSupportsF16(target: std.Target) bool {
    return switch (target.cpu.arch) {
+        .powerpc,
+        .powerpcle,
+        .powerpc64,
+        .powerpc64le,
+        .wasm32,
+        .wasm64,
+        .mips,
+        .mipsel,
+        .mips64,
+        .mips64el,
+        => false,
        else => true,
    };
 }
--- a/src/stage1/analyze.cpp
+++ b/src/stage1/analyze.cpp
@ -6358,9 +6358,11 @@ void init_const_float(ZigValue *const_val, ZigType *type, double value) {
                const_val->data.x_f64 = value;
                break;
            case 80:
+                zig_double_to_extF80M(value, &const_val->data.x_f80);
+                break;
            case 128:
-                // if we need this, we should add a function that accepts a float128_t param
-                zig_unreachable();
+                zig_double_to_f128M(value, &const_val->data.x_f128);
+                break;
            default:
                zig_unreachable();
        }
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@ -80,6 +80,7 @@ void codegen_set_strip(CodeGen *g, bool strip) {
    }
 }

+static LLVMValueRef get_soft_float_fn(CodeGen *g, const char *name, int param_count, LLVMTypeRef param_type, LLVMTypeRef return_type);
 static void render_const_val(CodeGen *g, ZigValue *const_val, const char *name);
 static void render_const_val_global(CodeGen *g, ZigValue *const_val, const char *name);
 static LLVMValueRef gen_const_val(CodeGen *g, ZigValue *const_val, const char *name);
@ -1736,12 +1737,7 @@ static LLVMValueRef gen_soft_float_widen_or_shorten(CodeGen *g, ZigType *actual_
        }
    }

-    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, fn_name);
-    if (func_ref == nullptr) {
-        LLVMTypeRef fn_type = LLVMFunctionType(return_type, &param_type, 1, false);
-        func_ref = LLVMAddFunction(g->module, fn_name, fn_type);
-    }
-
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, 1, param_type, return_type);
    result = LLVMBuildCall2(g->builder, LLVMGlobalGetValueType(func_ref), func_ref, &expr_val, 1, "");

    // On non-Arm platforms we need to bitcast __trunc<>fhf2 result back to f16
@ -1766,9 +1762,12 @@ static LLVMValueRef gen_widen_or_shorten(CodeGen *g, bool want_runtime_safety, Z
    uint64_t wanted_bits;
    if (scalar_actual_type->id == ZigTypeIdFloat) {

-        if ((scalar_actual_type == g->builtin_types.entry_f80
+        if (((scalar_actual_type == g->builtin_types.entry_f80
            || scalar_wanted_type == g->builtin_types.entry_f80)
-         && !target_has_f80(g->zig_target))
+         && !target_has_f80(g->zig_target)) ||
+            ((scalar_actual_type == g->builtin_types.entry_f16
+            || scalar_wanted_type == g->builtin_types.entry_f16)
+         && !target_is_arm(g->zig_target)))
        {
            return gen_soft_float_widen_or_shorten(g, actual_type, wanted_type, expr_val);
        }
@ -3100,6 +3099,7 @@ static LLVMValueRef gen_float_un_op(CodeGen *g, LLVMValueRef operand, ZigType *o
    ZigType *elem_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
    if ((elem_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
        (elem_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (elem_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target)) ||
        op == BuiltinFnIdTan)
    {
        return gen_soft_float_un_op(g, operand, operand_type, op);
@ -3690,7 +3690,8 @@ static LLVMValueRef ir_render_bin_op(CodeGen *g, Stage1Air *executable,
    ZigType *operand_type = op1->value->type;
    ZigType *scalar_type = (operand_type->id == ZigTypeIdVector) ? operand_type->data.vector.elem_type : operand_type;
    if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
        // LLVM incorrectly lowers the soft float calls for f128 as if they operated on `long double`.
        // On some targets this will be incorrect, so we manually lower the call ourselves.
        LLVMValueRef op1_value = ir_llvm_value(g, op1);
@ -4024,7 +4025,8 @@ static LLVMValueRef ir_render_cast(CodeGen *g, Stage1Air *executable,
            assert(actual_type->id == ZigTypeIdInt);
            {
                if ((wanted_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-                    (wanted_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+                    (wanted_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+                    (wanted_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
                    return gen_soft_int_to_float_op(g, expr_val, actual_type, wanted_type);
                } else {
                    if (actual_type->data.integral.is_signed) {
@ -4042,7 +4044,8 @@ static LLVMValueRef ir_render_cast(CodeGen *g, Stage1Air *executable,

            LLVMValueRef result;
            if ((actual_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-                (actual_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+                (actual_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+                (actual_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
                result = gen_soft_float_to_int_op(g, expr_val, actual_type, wanted_type);
            } else {
                if (wanted_type->data.integral.is_signed) {
@ -4396,7 +4399,8 @@ static LLVMValueRef gen_negation(CodeGen *g, Stage1AirInst *inst, Stage1AirInst
        operand_type->data.vector.elem_type : operand_type;

    if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
        return gen_soft_float_neg(g, operand_type, llvm_operand);
    }

@ -6477,6 +6481,55 @@ static LLVMValueRef ir_render_cmpxchg(CodeGen *g, Stage1Air *executable, Stage1A
    return result_loc;
 }

+static LLVMValueRef ir_render_reduced_call(CodeGen *g, LLVMValueRef llvm_fn, LLVMValueRef operand_vector, size_t vector_len, LLVMValueRef accum_init, ZigType *accum_ty) {
+    LLVMTypeRef llvm_usize_ty = g->builtin_types.entry_usize->llvm_type;
+    LLVMValueRef llvm_vector_len = LLVMConstInt(llvm_usize_ty, vector_len, false);
+    LLVMTypeRef llvm_result_ty = LLVMTypeOf(accum_init);
+
+    // Allocate and initialize our mutable variables
+    LLVMValueRef i_ptr = build_alloca(g, g->builtin_types.entry_usize, "i", 0);
+    LLVMBuildStore(g->builder, LLVMConstInt(llvm_usize_ty, 0, false), i_ptr);
+    LLVMValueRef accum_ptr = build_alloca(g, accum_ty, "accum", 0);
+    LLVMBuildStore(g->builder, accum_init, accum_ptr);
+
+    // Setup the loop
+    LLVMBasicBlockRef loop = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoop");
+    LLVMBasicBlockRef loop_exit = LLVMAppendBasicBlock(g->cur_fn_val, "AfterReduce");
+    LLVMBuildBr(g->builder, loop);
+    {
+        LLVMPositionBuilderAtEnd(g->builder, loop);
+
+        // while (i < vec.len)
+        LLVMValueRef i = LLVMBuildLoad2(g->builder, llvm_usize_ty, i_ptr, "");
+        LLVMValueRef cond = LLVMBuildICmp(g->builder, LLVMIntULT, i, llvm_vector_len, "");
+        LLVMBasicBlockRef loop_then = LLVMAppendBasicBlock(g->cur_fn_val, "ReduceLoopThen");
+
+        LLVMBuildCondBr(g->builder, cond, loop_then, loop_exit);
+
+        {
+            LLVMPositionBuilderAtEnd(g->builder, loop_then);
+
+            // accum = f(accum, vec[i]);
+            LLVMValueRef accum = LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, "");
+            LLVMValueRef element = LLVMBuildExtractElement(g->builder, operand_vector, i, "");
+            LLVMValueRef params[] {
+                accum,
+                element
+            };
+            LLVMValueRef new_accum = LLVMBuildCall2(g->builder, LLVMGlobalGetValueType(llvm_fn), llvm_fn, params, 2, "");
+            LLVMBuildStore(g->builder, new_accum, accum_ptr);
+
+            // i += 1
+            LLVMValueRef new_i = LLVMBuildAdd(g->builder, i, LLVMConstInt(llvm_usize_ty, 1, false), "");
+            LLVMBuildStore(g->builder, new_i, i_ptr);
+            LLVMBuildBr(g->builder, loop);
+        }
+    }
+
+    LLVMPositionBuilderAtEnd(g->builder, loop_exit);
+    return LLVMBuildLoad2(g->builder, llvm_result_ty, accum_ptr, "");
+}
+
 static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1AirInstReduce *instruction) {
    LLVMValueRef value = ir_llvm_value(g, instruction->value);

@ -6484,61 +6537,100 @@ static LLVMValueRef ir_render_reduce(CodeGen *g, Stage1Air *executable, Stage1Ai
    assert(value_type->id == ZigTypeIdVector);
    ZigType *scalar_type = value_type->data.vector.elem_type;

+    bool float_intrinsics_allowed = true;
+    const char *compiler_rt_type_abbrev = nullptr;
+    const char *math_float_prefix = nullptr;
+    const char *math_float_suffix = nullptr;
+    if ((scalar_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (scalar_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
+        float_intrinsics_allowed = false;
+        compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(scalar_type);
+        math_float_prefix = libc_float_prefix(g, scalar_type);
+        math_float_suffix = libc_float_suffix(g, scalar_type);
+    }
+
    ZigLLVMSetFastMath(g->builder, ir_want_fast_math(g, &instruction->base));

-    LLVMValueRef result_val;
+    char fn_name[64];
+    ZigValue *init_value = nullptr;
    switch (instruction->op) {
        case ReduceOp_and:
            assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildAndReduce(g->builder, value);
+            return ZigLLVMBuildAndReduce(g->builder, value);
            break;
        case ReduceOp_or:
            assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildOrReduce(g->builder, value);
+            return ZigLLVMBuildOrReduce(g->builder, value);
            break;
        case ReduceOp_xor:
            assert(scalar_type->id == ZigTypeIdInt || scalar_type->id == ZigTypeIdBool);
-            result_val = ZigLLVMBuildXorReduce(g->builder, value);
+            return ZigLLVMBuildXorReduce(g->builder, value);
            break;
        case ReduceOp_min: {
            if (scalar_type->id == ZigTypeIdInt) {
                const bool is_signed = scalar_type->data.integral.is_signed;
-                result_val = ZigLLVMBuildIntMinReduce(g->builder, value, is_signed);
+                return ZigLLVMBuildIntMinReduce(g->builder, value, is_signed);
            } else if (scalar_type->id == ZigTypeIdFloat) {
-                result_val = ZigLLVMBuildFPMinReduce(g->builder, value);
+                if (float_intrinsics_allowed) {
+                    return ZigLLVMBuildFPMinReduce(g->builder, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "%sfmin%s", math_float_prefix, math_float_suffix);
+                    init_value = create_const_float(g, scalar_type, NAN);
+                }
            } else zig_unreachable();
        } break;
        case ReduceOp_max: {
            if (scalar_type->id == ZigTypeIdInt) {
                const bool is_signed = scalar_type->data.integral.is_signed;
-                result_val = ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed);
+                return ZigLLVMBuildIntMaxReduce(g->builder, value, is_signed);
            } else if (scalar_type->id == ZigTypeIdFloat) {
-                result_val = ZigLLVMBuildFPMaxReduce(g->builder, value);
+                if (float_intrinsics_allowed) {
+                    return ZigLLVMBuildFPMaxReduce(g->builder, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "%sfmax%s", math_float_prefix, math_float_suffix);
+                    init_value = create_const_float(g, scalar_type, NAN);
+                }
            } else zig_unreachable();
        } break;
        case ReduceOp_add: {
            if (scalar_type->id == ZigTypeIdInt) {
-                result_val = ZigLLVMBuildAddReduce(g->builder, value);
+                return ZigLLVMBuildAddReduce(g->builder, value);
            } else if (scalar_type->id == ZigTypeIdFloat) {
-                LLVMValueRef neutral_value = LLVMConstReal(
-                        get_llvm_type(g, scalar_type), -0.0);
-                result_val = ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value);
+                if (float_intrinsics_allowed) {
+                    LLVMValueRef neutral_value = LLVMConstReal(
+                            get_llvm_type(g, scalar_type), -0.0);
+                    return ZigLLVMBuildFPAddReduce(g->builder, neutral_value, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "__add%sf3", compiler_rt_type_abbrev);
+                    init_value = create_const_float(g, scalar_type, 0.0);
+                }
            } else zig_unreachable();
        } break;
        case ReduceOp_mul: {
            if (scalar_type->id == ZigTypeIdInt) {
-                result_val = ZigLLVMBuildMulReduce(g->builder, value);
+                return ZigLLVMBuildMulReduce(g->builder, value);
            } else if (scalar_type->id == ZigTypeIdFloat) {
-                LLVMValueRef neutral_value = LLVMConstReal(
-                        get_llvm_type(g, scalar_type), 1.0);
-                result_val = ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value);
+                if (float_intrinsics_allowed) {
+                    LLVMValueRef neutral_value = LLVMConstReal(
+                            get_llvm_type(g, scalar_type), 1.0);
+                    return ZigLLVMBuildFPMulReduce(g->builder, neutral_value, value);
+                } else {
+                    snprintf(fn_name, sizeof(fn_name), "__mul%sf3", compiler_rt_type_abbrev);
+                    init_value = create_const_float(g, scalar_type, 1.0);
+                }
            } else zig_unreachable();
        } break;
        default:
            zig_unreachable();
    }

-    return result_val;
+
+    LLVMValueRef llvm_init_value = gen_const_val(g, init_value, "");
+    uint32_t vector_len = value_type->data.vector.len;
+    LLVMTypeRef llvm_scalar_type = get_llvm_type(g, scalar_type);
+    const LLVMValueRef llvm_fn = get_soft_float_fn(g, fn_name, 2, llvm_scalar_type, llvm_scalar_type);
+    return ir_render_reduced_call(g, llvm_fn, value, vector_len, llvm_init_value, scalar_type);
 }

 static LLVMValueRef ir_render_fence(CodeGen *g, Stage1Air *executable, Stage1AirInstFence *instruction) {
@ -6650,6 +6742,10 @@ static LLVMValueRef ir_render_prefetch(CodeGen *g, Stage1Air *executable, Stage1
        switch (g->zig_target->arch) {
            case ZigLLVM_x86:
            case ZigLLVM_x86_64:
+            case ZigLLVM_ppc:
+            case ZigLLVM_ppcle:
+            case ZigLLVM_ppc64:
+            case ZigLLVM_ppc64le:
                return nullptr;
            default:
                break;
@ -7374,7 +7470,9 @@ static LLVMValueRef ir_render_soft_mul_add(CodeGen *g, Stage1Air *executable, St
    uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;

    const char *fn_name;
-    if (float_type == g->builtin_types.entry_f32)
+    if (float_type == g->builtin_types.entry_f16)
+        fn_name = "__fmah";
+    else if (float_type == g->builtin_types.entry_f32)
        fn_name = "fmaf";
    else if (float_type == g->builtin_types.entry_f64)
        fn_name = "fma";
@ -7385,13 +7483,8 @@ static LLVMValueRef ir_render_soft_mul_add(CodeGen *g, Stage1Air *executable, St
    else
        zig_unreachable();

-    LLVMValueRef func_ref = LLVMGetNamedFunction(g->module, fn_name);
-    if (func_ref == nullptr) {
-        LLVMTypeRef float_type_ref = float_type->llvm_type;
-        LLVMTypeRef params[3] = { float_type_ref, float_type_ref, float_type_ref };
-        LLVMTypeRef fn_type = LLVMFunctionType(float_type_ref, params, 3, false);
-        func_ref = LLVMAddFunction(g->module, fn_name, fn_type);
-    }
+    LLVMTypeRef float_type_ref = float_type->llvm_type;
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, 3, float_type_ref, float_type_ref); 

    LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
    LLVMValueRef op2 = ir_llvm_value(g, instruction->op2);
@ -7421,7 +7514,8 @@ static LLVMValueRef ir_render_mul_add(CodeGen *g, Stage1Air *executable, Stage1A
    ZigType *operand_type = instruction->op1->value->type;
    operand_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
    if ((operand_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-        (operand_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        (operand_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        (operand_type == g->builtin_types.entry_f16 && !target_is_arm(g->zig_target))) {
        return ir_render_soft_mul_add(g, executable, instruction, operand_type);
    }
    LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
@ -9740,7 +9834,12 @@ static void define_builtin_types(CodeGen *g) {
        }
    }

-    add_fp_entry(g, "f16", 16, LLVMHalfType(), &g->builtin_types.entry_f16);
+    if (target_is_arm(g->zig_target)) {
+        add_fp_entry(g, "f16", 16, LLVMHalfType(), &g->builtin_types.entry_f16);
+    } else {
+        ZigType *u16_ty = get_int_type(g, false, 16);
+        add_fp_entry(g, "f16", 16, get_llvm_type(g, u16_ty), &g->builtin_types.entry_f16);
+    }
    add_fp_entry(g, "f32", 32, LLVMFloatType(), &g->builtin_types.entry_f32);
    add_fp_entry(g, "f64", 64, LLVMDoubleType(), &g->builtin_types.entry_f64);
    add_fp_entry(g, "f128", 128, LLVMFP128Type(), &g->builtin_types.entry_f128);
@ -9837,6 +9936,7 @@ static void define_builtin_types(CodeGen *g) {
            add_fp_entry(g, "c_longdouble", 128, LLVMFP128Type(), &g->builtin_types.entry_c_longdouble);
            break;
        case ZigLLVM_ppc:
+        case ZigLLVM_ppcle:
        case ZigLLVM_ppc64:
        case ZigLLVM_ppc64le:
            add_fp_entry(g, "c_longdouble", 128, LLVMFP128Type(), &g->builtin_types.entry_c_longdouble);
--- a/src/stage1/softfloat.hpp
+++ b/src/stage1/softfloat.hpp
@ -21,6 +21,20 @@ static inline float16_t zig_double_to_f16(double x) {
    return f64_to_f16(y);
 }

+static inline void zig_double_to_extF80M(double x, extFloat80_t *result) {
+    float64_t y;
+    static_assert(sizeof(x) == sizeof(y), "");
+    memcpy(&y, &x, sizeof(x));
+    f64_to_extF80M(y, result);
+}
+
+static inline void zig_double_to_f128M(double x, float128_t *result) {
+    float64_t y;
+    static_assert(sizeof(x) == sizeof(y), "");
+    memcpy(&y, &x, sizeof(x));
+    f64_to_f128M(y, result);
+}
+

 // Return value is safe to coerce to float even when |x| is NaN or Infinity.
 static inline double zig_f16_to_double(float16_t x) {
--- a/src/stage1/target.cpp
+++ b/src/stage1/target.cpp
@ -950,7 +950,6 @@ bool target_is_arm(const ZigTarget *target) {
        case ZigLLVM_msp430:
        case ZigLLVM_nvptx:
        case ZigLLVM_nvptx64:
-        case ZigLLVM_ppc64le:
        case ZigLLVM_r600:
        case ZigLLVM_renderscript32:
        case ZigLLVM_renderscript64:
@ -971,6 +970,7 @@ bool target_is_arm(const ZigTarget *target) {
        case ZigLLVM_ppc:
        case ZigLLVM_ppcle:
        case ZigLLVM_ppc64:
+        case ZigLLVM_ppc64le:
        case ZigLLVM_ve:
        case ZigLLVM_spirv32:
        case ZigLLVM_spirv64:
@ -1125,8 +1125,8 @@ bool target_is_mips(const ZigTarget *target) {
 }

 bool target_is_ppc(const ZigTarget *target) {
-    return target->arch == ZigLLVM_ppc || target->arch == ZigLLVM_ppc64 ||
-        target->arch == ZigLLVM_ppc64le;
+    return target->arch == ZigLLVM_ppc || target->arch == ZigLLVM_ppcle ||
+        target->arch == ZigLLVM_ppc64 || target->arch == ZigLLVM_ppc64le;
 }

 // Returns the minimum alignment for every function pointer on the given
--- a/test/behavior.zig
+++ b/test/behavior.zig
@ -89,7 +89,6 @@ test {
    _ = @import("behavior/bugs/12551.zig");
    _ = @import("behavior/bugs/12644.zig");
    _ = @import("behavior/bugs/12680.zig");
-    _ = @import("behavior/bugs/12776.zig");
    _ = @import("behavior/bugs/12786.zig");
    _ = @import("behavior/bugs/12794.zig");
    _ = @import("behavior/bugs/12801-1.zig");
@ -187,6 +186,8 @@ test {
        _ = @import("behavior/packed_struct_explicit_backing_int.zig");
        _ = @import("behavior/empty_union.zig");
        _ = @import("behavior/inline_switch.zig");
+        _ = @import("behavior/bugs/12723.zig");
+        _ = @import("behavior/bugs/12776.zig");
    }

    if (builtin.os.tag != .wasi) {
--- a/test/behavior/align.zig
+++ b/test/behavior/align.zig
@ -566,6 +566,8 @@ test "@alignCast null" {
 }

 test "alignment of slice element" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
+
    const a: []align(1024) const u8 = undefined;
    try expect(@TypeOf(&a[0]) == *align(1024) const u8);
 }
--- a/test/behavior/bugs/11816.zig
+++ b/test/behavior/bugs/11816.zig
@ -3,6 +3,7 @@ const builtin = @import("builtin");

 test {
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;

    var x: u32 = 3;
    const val: usize = while (true) switch (x) {
--- a/test/behavior/bugs/12723.zig
+++ b/test/behavior/bugs/12723.zig
@ -0,0 +1,11 @@
+const expect = @import("std").testing.expect;
+
+// This test causes a compile error on stage1 regardless of whether
+// the body of the test is comptime-gated or not. To workaround this,
+// we gate the inclusion of the test file.
+test "Non-exhaustive enum backed by comptime_int" {
+    const E = enum(comptime_int) { a, b, c, _ };
+    comptime var e: E = .a;
+    e = @intToEnum(E, 378089457309184723749);
+    try expect(@enumToInt(e) == 378089457309184723749);
+}
--- a/test/behavior/bugs/12801-1.zig
+++ b/test/behavior/bugs/12801-1.zig
@ -8,6 +8,7 @@ fn capacity_() u64 {

 test {
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;

    try std.testing.expect((@This(){}).capacity() == 64);
 }
--- a/test/behavior/bugs/12801-2.zig
+++ b/test/behavior/bugs/12801-2.zig
@ -14,6 +14,7 @@ const Auto = struct {
    }
 };
 test {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
--- a/test/behavior/enum.zig
+++ b/test/behavior/enum.zig
@ -1169,10 +1169,3 @@ test "Non-exhaustive enum with nonstandard int size behaves correctly" {
    const E = enum(u15) { _ };
    try expect(@sizeOf(E) == @sizeOf(u15));
 }
-
-test "Non-exhaustive enum backed by comptime_int" {
-    const E = enum(comptime_int) { a, b, c, _ };
-    comptime var e: E = .a;
-    e = @intToEnum(E, 378089457309184723749);
-    try expect(@enumToInt(e) == 378089457309184723749);
-}
--- a/test/behavior/eval.zig
+++ b/test/behavior/eval.zig
@ -1339,6 +1339,8 @@ test "lazy value is resolved as slice operand" {
 }

 test "break from inline loop depends on runtime condition" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
+
    const S = struct {
        fn foo(a: u8) bool {
            return a == 4;
--- a/test/behavior/muladd.zig
+++ b/test/behavior/muladd.zig
@ -71,17 +71,6 @@ test "@mulAdd f128" {
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO

-    if (builtin.os.tag == .macos and builtin.cpu.arch == .aarch64) {
-        // https://github.com/ziglang/zig/issues/9900
-        return error.SkipZigTest;
-    }
-
-    if (builtin.zig_backend == .stage1 and
-        builtin.cpu.arch == .i386 and builtin.os.tag == .linux)
-    {
-        return error.SkipZigTest;
-    }
-
    comptime try testMulAdd128();
    try testMulAdd128();
 }
--- a/test/behavior/packed-struct.zig
+++ b/test/behavior/packed-struct.zig
@ -585,6 +585,7 @@ test "runtime init of unnamed packed struct type" {
 }

 test "packed struct passed to callconv(.C) function" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@ -506,18 +506,12 @@ test "vector division operators" {
        }

        fn doTheTest() !void {
-            // https://github.com/ziglang/zig/issues/4952
-            if (builtin.target.os.tag != .windows) {
-                try doTheTestDiv(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, -1.0, -2.0 });
-            }
+            try doTheTestDiv(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, -1.0, -2.0 });

            try doTheTestDiv(f32, [4]f32{ 4.0, -4.0, 4.0, -4.0 }, [4]f32{ 1.0, 2.0, -1.0, -2.0 });
            try doTheTestDiv(f64, [4]f64{ 4.0, -4.0, 4.0, -4.0 }, [4]f64{ 1.0, 2.0, -1.0, -2.0 });

-            // https://github.com/ziglang/zig/issues/4952
-            if (builtin.target.os.tag != .windows) {
-                try doTheTestMod(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, 0.5, 3.0 });
-            }
+            try doTheTestMod(f16, [4]f16{ 4.0, -4.0, 4.0, -4.0 }, [4]f16{ 1.0, 2.0, 0.5, 3.0 });
            try doTheTestMod(f32, [4]f32{ 4.0, -4.0, 4.0, -4.0 }, [4]f32{ 1.0, 2.0, 0.5, 3.0 });
            try doTheTestMod(f64, [4]f64{ 4.0, -4.0, 4.0, -4.0 }, [4]f64{ 1.0, 2.0, 0.5, 3.0 });

--- a/test/tests.zig
+++ b/test/tests.zig
@ -315,6 +315,30 @@ const test_targets = blk: {
        //    .link_libc = true,
        //},

+        .{
+            .target = .{
+                .cpu_arch = .powerpc64le,
+                .os_tag = .linux,
+                .abi = .none,
+            },
+        },
+        .{
+            .target = .{
+                .cpu_arch = .powerpc64le,
+                .os_tag = .linux,
+                .abi = .musl,
+            },
+            .link_libc = true,
+        },
+        .{
+            .target = .{
+                .cpu_arch = .powerpc64le,
+                .os_tag = .linux,
+                .abi = .gnu,
+            },
+            .link_libc = true,
+        },
+
        .{
            .target = .{
                .cpu_arch = .riscv64,