diff --git a/lib/std/math/fma.zig b/lib/std/math/fma.zig
index dd76481e10..7ef734cf4e 100644
--- a/lib/std/math/fma.zig
+++ b/lib/std/math/fma.zig
@@ -19,6 +19,8 @@ pub fn fma(comptime T: type, x: T, y: T, z: T) T {
         // TODO this is not correct for some targets
         c_longdouble => @floatCast(c_longdouble, fma128(x, y, z)),
 
+        f80 => @floatCast(f80, fma128(x, y, z)),
+
         else => @compileError("fma not implemented for " ++ @typeName(T)),
     };
 }
diff --git a/lib/std/special/c.zig b/lib/std/special/c.zig
index 9a246f75ed..db1c56c536 100644
--- a/lib/std/special/c.zig
+++ b/lib/std/special/c.zig
@@ -12,7 +12,7 @@ const maxInt = std.math.maxInt;
 const native_os = builtin.os.tag;
 const native_arch = builtin.cpu.arch;
 const native_abi = builtin.abi;
-const long_double_is_f128 = builtin.target.longDoubleIsF128();
+const long_double_is_f128 = builtin.target.longDoubleIs(f128);
 
 const is_wasm = switch (native_arch) {
     .wasm32, .wasm64 => true,
@@ -90,10 +90,6 @@ comptime {
     @export(fmod, .{ .name = "fmod", .linkage = .Strong });
     @export(fmodf, .{ .name = "fmodf", .linkage = .Strong });
 
-    @export(fma, .{ .name = "fma", .linkage = .Strong });
-    @export(fmaf, .{ .name = "fmaf", .linkage = .Strong });
-    @export(fmal, .{ .name = "fmal", .linkage = .Strong });
-
     @export(sincos, .{ .name = "sincos", .linkage = .Strong });
     @export(sincosf, .{ .name = "sincosf", .linkage = .Strong });
 
@@ -561,20 +557,6 @@ test "fmod, fmodf" {
     }
 }
 
-fn fmaf(a: f32, b: f32, c: f32) callconv(.C) f32 {
-    return math.fma(f32, a, b, c);
-}
-
-fn fma(a: f64, b: f64, c: f64) callconv(.C) f64 {
-    return math.fma(f64, a, b, c);
-}
-fn fmal(a: c_longdouble, b: c_longdouble, c: c_longdouble) callconv(.C) c_longdouble {
-    if (!long_double_is_f128) {
-        @panic("TODO implement this");
-    }
-    return math.fma(c_longdouble, a, b, c);
-}
-
 fn sincos(a: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void {
     r_sin.* = math.sin(a);
     r_cos.* = math.cos(a);
diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig
index 50091995da..6819fe784d 100644
--- a/lib/std/special/compiler_rt.zig
+++ b/lib/std/special/compiler_rt.zig
@@ -19,7 +19,8 @@ const strong_linkage = if (is_test)
 else
     std.builtin.GlobalLinkage.Strong;
 
-const long_double_is_f128 = builtin.target.longDoubleIsF128();
+const long_double_is_f80 = builtin.target.longDoubleIs(f80);
+const long_double_is_f128 = builtin.target.longDoubleIs(f128);
 
 comptime {
     // These files do their own comptime exporting logic.
@@ -673,6 +674,29 @@ comptime {
         @export(_aullrem, .{ .name = "\x01__aullrem", .linkage = strong_linkage });
     }
 
+    const fmodl = @import("compiler_rt/floatfmodl.zig").fmodl;
+    if (!is_test) {
+        @export(fmodl, .{ .name = "fmodl", .linkage = linkage });
+
+        @export(floorf, .{ .name = "floorf", .linkage = linkage });
+        @export(floor, .{ .name = "floor", .linkage = linkage });
+        @export(floorl, .{ .name = "floorl", .linkage = linkage });
+
+        @export(fma, .{ .name = "fma", .linkage = linkage });
+        @export(fmaf, .{ .name = "fmaf", .linkage = linkage });
+        @export(fmal, .{ .name = "fmal", .linkage = linkage });
+        if (long_double_is_f80) {
+            @export(fmal, .{ .name = "__fmax", .linkage = linkage });
+        } else {
+            @export(__fmax, .{ .name = "__fmax", .linkage = linkage });
+        }
+        if (long_double_is_f128) {
+            @export(fmal, .{ .name = "fmaq", .linkage = linkage });
+        } else {
+            @export(fmaq, .{ .name = "fmaq", .linkage = linkage });
+        }
+    }
+
     if (arch.isSPARC()) {
         // SPARC systems use a different naming scheme
         const _Qp_add = @import("compiler_rt/sparc.zig")._Qp_add;
@@ -725,7 +749,7 @@ comptime {
         @export(_Qp_qtod, .{ .name = "_Qp_qtod", .linkage = linkage });
     }
 
-    if ((arch == .powerpc or arch.isPPC64()) and !is_test) {
+    if ((arch.isPPC() or arch.isPPC64()) and !is_test) {
         @export(__addtf3, .{ .name = "__addkf3", .linkage = linkage });
         @export(__subtf3, .{ .name = "__subkf3", .linkage = linkage });
         @export(__multf3, .{ .name = "__mulkf3", .linkage = linkage });
@@ -750,22 +774,29 @@ comptime {
         @export(__letf2, .{ .name = "__lekf2", .linkage = linkage });
         @export(__getf2, .{ .name = "__gtkf2", .linkage = linkage });
         @export(__unordtf2, .{ .name = "__unordkf2", .linkage = linkage });
+
+        // LLVM PPC backend lowers f128 fma to `fmaf128`.
+        @export(fmal, .{ .name = "fmaf128", .linkage = linkage });
     }
-
-    const fmodl = @import("compiler_rt/floatfmodl.zig").fmodl;
-    @export(fmodl, .{ .name = "fmodl", .linkage = linkage });
-
-    @export(floorf, .{ .name = "floorf", .linkage = linkage });
-    @export(floor, .{ .name = "floor", .linkage = linkage });
-    @export(floorl, .{ .name = "floorl", .linkage = linkage });
-    @export(fmaq, .{ .name = "fmaq", .linkage = linkage });
 }
 
 const math = std.math;
 
+fn fmaf(a: f32, b: f32, c: f32) callconv(.C) f32 {
+    return math.fma(f32, a, b, c);
+}
+fn fma(a: f64, b: f64, c: f64) callconv(.C) f64 {
+    return math.fma(f64, a, b, c);
+}
+fn __fmax(a: f80, b: f80, c: f80) callconv(.C) f80 {
+    return math.fma(f80, a, b, c);
+}
 fn fmaq(a: f128, b: f128, c: f128) callconv(.C) f128 {
     return math.fma(f128, a, b, c);
 }
+fn fmal(a: c_longdouble, b: c_longdouble, c: c_longdouble) callconv(.C) c_longdouble {
+    return math.fma(c_longdouble, a, b, c);
+}
 
 // TODO add intrinsics for these (and probably the double version too)
 // and have the math stuff use the intrinsic. same as @mod and @rem
diff --git a/lib/std/target.zig b/lib/std/target.zig
index 9a2dcfcc66..0b2a4a4df6 100644
--- a/lib/std/target.zig
+++ b/lib/std/target.zig
@@ -1714,9 +1714,55 @@ pub const Target = struct {
         };
     }
 
-    pub inline fn longDoubleIsF128(target: Target) bool {
-        return switch (target.cpu.arch) {
-            .riscv64, .aarch64, .aarch64_be, .aarch64_32, .s390x, .mips64, .mips64el => true,
+    pub inline fn longDoubleIs(target: Target, comptime F: type) bool {
+        if (target.abi == .msvc) {
+            return F == f64;
+        }
+        return switch (F) {
+            f128 => switch (target.cpu.arch) {
+                .riscv64,
+                .aarch64,
+                .aarch64_be,
+                .aarch64_32,
+                .s390x,
+                .mips64,
+                .mips64el,
+                .sparc,
+                .sparcv9,
+                .sparcel,
+                .powerpc,
+                .powerpcle,
+                .powerpc64,
+                .powerpc64le,
+                => true,
+
+                else => false,
+            },
+            f80 => switch (target.cpu.arch) {
+                .x86_64, .i386 => true,
+                else => false,
+            },
+            f64 => switch (target.cpu.arch) {
+                .x86_64,
+                .i386,
+                .riscv64,
+                .aarch64,
+                .aarch64_be,
+                .aarch64_32,
+                .s390x,
+                .mips64,
+                .mips64el,
+                .sparc,
+                .sparcv9,
+                .sparcel,
+                .powerpc,
+                .powerpcle,
+                .powerpc64,
+                .powerpc64le,
+                => false,
+
+                else => true,
+            },
             else => false,
         };
     }
diff --git a/src/Air.zig b/src/Air.zig
index 268b6c8631..cf5bfad620 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -579,6 +579,11 @@ pub const Inst = struct {
         /// Uses the `prefetch` field.
         prefetch,
 
+        /// Computes `(a * b) + c`, but only rounds once.
+        /// Uses the `pl_op` field with payload `Bin`.
+        /// The operand is the addend. The mulends are lhs and rhs.
+        mul_add,
+
         /// Implements @fieldParentPtr builtin.
         /// Uses the `ty_pl` field.
         field_parent_ptr,
@@ -986,6 +991,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
             return ptr_ty.elemType();
         },
 
+        .mul_add => return air.typeOf(datas[inst].pl_op.operand),
+
         .add_with_overflow,
         .sub_with_overflow,
         .mul_with_overflow,
diff --git a/src/AstGen.zig b/src/AstGen.zig
index cb710af900..fdd58dd948 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -7309,8 +7309,8 @@ fn builtinCall(
         },
         .mul_add => {
             const float_type = try typeExpr(gz, scope, params[0]);
-            const mulend1 = try expr(gz, scope, .{ .ty = float_type }, params[1]);
-            const mulend2 = try expr(gz, scope, .{ .ty = float_type }, params[2]);
+            const mulend1 = try expr(gz, scope, .{ .coerced_ty = float_type }, params[1]);
+            const mulend2 = try expr(gz, scope, .{ .coerced_ty = float_type }, params[2]);
             const addend = try expr(gz, scope, .{ .ty = float_type }, params[3]);
             const result = try gz.addPlNode(.mul_add, node, Zir.Inst.MulAdd{
                 .mulend1 = mulend1,
diff --git a/src/Liveness.zig b/src/Liveness.zig
index 7f007b5718..f2d0b5ca94 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -464,6 +464,11 @@ fn analyzeInst(
             const extra = a.air.extraData(Air.Cmpxchg, inst_datas[inst].ty_pl.payload).data;
             return trackOperands(a, new_set, inst, main_tomb, .{ extra.ptr, extra.expected_value, extra.new_value });
         },
+        .mul_add => {
+            const pl_op = inst_datas[inst].pl_op;
+            const extra = a.air.extraData(Air.Bin, pl_op.payload).data;
+            return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, pl_op.operand });
+        },
         .atomic_load => {
             const ptr = inst_datas[inst].atomic_load.ptr;
             return trackOperands(a, new_set, inst, main_tomb, .{ ptr, .none, .none });
diff --git a/src/Sema.zig b/src/Sema.zig
index b0bf3c4a23..4da7fabac6 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -13518,8 +13518,83 @@ fn zirAtomicStore(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
 
 fn zirMulAdd(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
     const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
+    const extra = sema.code.extraData(Zir.Inst.MulAdd, inst_data.payload_index).data;
     const src = inst_data.src();
-    return sema.fail(block, src, "TODO: Sema.zirMulAdd", .{});
+
+    const mulend1_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const mulend2_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
+    const addend_src: LazySrcLoc = .{ .node_offset_builtin_call_arg3 = inst_data.src_node };
+
+    const addend = sema.resolveInst(extra.addend);
+    const ty = sema.typeOf(addend);
+    const mulend1 = try sema.coerce(block, ty, sema.resolveInst(extra.mulend1), mulend1_src);
+    const mulend2 = try sema.coerce(block, ty, sema.resolveInst(extra.mulend2), mulend2_src);
+
+    const target = sema.mod.getTarget();
+
+    switch (ty.zigTypeTag()) {
+        .ComptimeFloat, .Float => {
+            const maybe_mulend1 = try sema.resolveMaybeUndefVal(block, mulend1_src, mulend1);
+            const maybe_mulend2 = try sema.resolveMaybeUndefVal(block, mulend2_src, mulend2);
+            const maybe_addend = try sema.resolveMaybeUndefVal(block, addend_src, addend);
+
+            const runtime_src = if (maybe_mulend1) |mulend1_val| rs: {
+                if (maybe_mulend2) |mulend2_val| {
+                    if (mulend2_val.isUndef()) return sema.addConstUndef(ty);
+
+                    if (maybe_addend) |addend_val| {
+                        if (addend_val.isUndef()) return sema.addConstUndef(ty);
+
+                        const result_val = try Value.mulAdd(
+                            ty,
+                            mulend1_val,
+                            mulend2_val,
+                            addend_val,
+                            sema.arena,
+                            target,
+                        );
+                        return sema.addConstant(ty, result_val);
+                    } else {
+                        break :rs addend_src;
+                    }
+                } else {
+                    if (maybe_addend) |addend_val| {
+                        if (addend_val.isUndef()) return sema.addConstUndef(ty);
+                    }
+                    break :rs mulend2_src;
+                }
+            } else rs: {
+                if (maybe_mulend2) |mulend2_val| {
+                    if (mulend2_val.isUndef()) return sema.addConstUndef(ty);
+                }
+                if (maybe_addend) |addend_val| {
+                    if (addend_val.isUndef()) return sema.addConstUndef(ty);
+                }
+                break :rs mulend1_src;
+            };
+
+            try sema.requireRuntimeBlock(block, runtime_src);
+            return block.addInst(.{
+                .tag = .mul_add,
+                .data = .{ .pl_op = .{
+                    .operand = addend,
+                    .payload = try sema.addExtra(Air.Bin{
+                        .lhs = mulend1,
+                        .rhs = mulend2,
+                    }),
+                } },
+            });
+        },
+        .Vector => {
+            const scalar_ty = ty.scalarType();
+            switch (scalar_ty.zigTypeTag()) {
+                .ComptimeFloat, .Float => {},
+                else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{scalar_ty}),
+            }
+            return sema.fail(block, src, "TODO: implement @mulAdd for vectors", .{});
+        },
+        else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{ty}),
+    }
 }
 
 fn zirBuiltinCall(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
diff --git a/src/Zir.zig b/src/Zir.zig
index caf2002f0f..5ccdbda86d 100644
--- a/src/Zir.zig
+++ b/src/Zir.zig
@@ -891,6 +891,8 @@ pub const Inst = struct {
         atomic_store,
         /// Implements the `@mulAdd` builtin.
         /// Uses the `pl_node` union field with payload `MulAdd`.
+        /// The addend communicates the type of the builtin.
+        /// The mulends need to be coerced to the same type.
         mul_add,
         /// Implements the `@call` builtin.
         /// Uses the `pl_node` union field with payload `BuiltinCall`.
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index b3447f43e7..8f8ef8a70c 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -632,6 +632,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
+            .mul_add         => try self.airMulAdd(inst),
 
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
@@ -3652,6 +3653,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none });
 }
 
+fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
+    const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+    const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+        return self.fail("TODO implement airMulAdd for aarch64", .{});
+    };
+    return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
+}
+
 fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
     // First section of indexes correspond to a set number of constant values.
     const ref_int = @enumToInt(inst);
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 80f0169ba5..5a45019224 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -628,6 +628,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
+            .mul_add         => try self.airMulAdd(inst),
 
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
@@ -4086,6 +4087,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none });
 }
 
+fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
+    const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+    const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+        return self.fail("TODO implement airMulAdd for arm", .{});
+    };
+    return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
+}
+
 fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
     // First section of indexes correspond to a set number of constant values.
     const ref_int = @enumToInt(inst);
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index 15600c09dd..c14e54e9e4 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -600,6 +600,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
+            .mul_add         => try self.airMulAdd(inst),
 
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
@@ -2203,6 +2204,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none });
 }
 
+fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
+    const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+    const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+        return self.fail("TODO implement airMulAdd for riscv64", .{});
+    };
+    return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
+}
+
 fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
     // First section of indexes correspond to a set number of constant values.
     const ref_int = @enumToInt(inst);
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index b293d20db9..c3e8bb7864 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1333,6 +1333,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .error_name,
         .errunion_payload_ptr_set,
         .field_parent_ptr,
+        .mul_add,
 
         // For these 4, probably best to wait until https://github.com/ziglang/zig/issues/10248
         // is implemented in the frontend before implementing them here in the wasm backend.
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 62dc924124..4f8dd0fb0b 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -717,6 +717,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .aggregate_init  => try self.airAggregateInit(inst),
             .union_init      => try self.airUnionInit(inst),
             .prefetch        => try self.airPrefetch(inst),
+            .mul_add         => try self.airMulAdd(inst),
 
             .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
             .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
@@ -5559,6 +5560,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none });
 }
 
+fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
+    const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+    const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else {
+        return self.fail("TODO implement airMulAdd for x86_64", .{});
+    };
+    return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand });
+}
+
 fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
     // First section of indexes correspond to a set number of constant values.
     const ref_int = @enumToInt(inst);
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index ba7bb6fa3a..2a10a8094a 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -16,6 +16,7 @@ const trace = @import("../tracy.zig").trace;
 const LazySrcLoc = Module.LazySrcLoc;
 const Air = @import("../Air.zig");
 const Liveness = @import("../Liveness.zig");
+const CType = @import("../type.zig").CType;
 
 const Mutability = enum { Const, Mut };
 const BigIntConst = std.math.big.int.Const;
@@ -1635,6 +1636,8 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .trunc_float,
             => |tag| return f.fail("TODO: C backend: implement unary op for tag '{s}'", .{@tagName(tag)}),
 
+            .mul_add => try airMulAdd(f, inst),
+
             .add_with_overflow => try airAddWithOverflow(f, inst),
             .sub_with_overflow => try airSubWithOverflow(f, inst),
             .mul_with_overflow => try airMulWithOverflow(f, inst),
@@ -3621,6 +3624,35 @@ fn airWasmMemoryGrow(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
+fn airMulAdd(f: *Function, inst: Air.Inst.Index) !CValue {
+    if (f.liveness.isUnused(inst)) return CValue.none;
+    const pl_op = f.air.instructions.items(.data)[inst].pl_op;
+    const extra = f.air.extraData(Air.Bin, pl_op.payload).data;
+    const inst_ty = f.air.typeOfIndex(inst);
+    const mulend1 = try f.resolveInst(extra.lhs);
+    const mulend2 = try f.resolveInst(extra.rhs);
+    const addend = try f.resolveInst(pl_op.operand);
+    const writer = f.object.writer();
+    const target = f.object.dg.module.getTarget();
+    const fn_name = switch (inst_ty.floatBits(target)) {
+        16, 32 => "fmaf",
+        64 => "fma",
+        80 => if (CType.longdouble.sizeInBits(target) == 80) "fmal" else "__fmax",
+        128 => if (CType.longdouble.sizeInBits(target) == 128) "fmal" else "fmaq",
+        else => unreachable,
+    };
+    const local = try f.allocLocal(inst_ty, .Const);
+    try writer.writeAll(" = ");
+    try writer.print("{s}(", .{fn_name});
+    try f.writeCValue(writer, mulend1);
+    try writer.writeAll(", ");
+    try f.writeCValue(writer, mulend2);
+    try writer.writeAll(", ");
+    try f.writeCValue(writer, addend);
+    try writer.writeAll(");\n");
+    return local;
+}
+
 fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 {
     return switch (order) {
         .Unordered => "memory_order_relaxed",
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 446876dfe5..c837d9a00d 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -18,6 +18,7 @@ const target_util = @import("../target.zig");
 const Value = @import("../value.zig").Value;
 const Type = @import("../type.zig").Type;
 const LazySrcLoc = Module.LazySrcLoc;
+const CType = @import("../type.zig").CType;
 
 const Error = error{ OutOfMemory, CodegenFail };
 
@@ -2189,6 +2190,7 @@ pub const FuncGen = struct {
                 .min       => try self.airMin(inst),
                 .max       => try self.airMax(inst),
                 .slice     => try self.airSlice(inst),
+                .mul_add   => try self.airMulAdd(inst),
 
                 .add_with_overflow => try self.airOverflow(inst, "llvm.sadd.with.overflow", "llvm.uadd.with.overflow"),
                 .sub_with_overflow => try self.airOverflow(inst, "llvm.ssub.with.overflow", "llvm.usub.with.overflow"),
@@ -3842,6 +3844,46 @@ pub const FuncGen = struct {
         return overflow_bit;
     }
 
+    fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+        const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+
+        const mulend1 = try self.resolveInst(extra.lhs);
+        const mulend2 = try self.resolveInst(extra.rhs);
+        const addend = try self.resolveInst(pl_op.operand);
+
+        const ty = self.air.typeOfIndex(inst);
+        const llvm_ty = try self.dg.llvmType(ty);
+        const target = self.dg.module.getTarget();
+
+        const Strat = union(enum) {
+            intrinsic,
+            libc: [*:0]const u8,
+        };
+        const strat: Strat = switch (ty.floatBits(target)) {
+            16, 32, 64 => Strat.intrinsic,
+            80 => if (CType.longdouble.sizeInBits(target) == 80) Strat{ .intrinsic = {} } else Strat{ .libc = "__fmax" },
+            // LLVM always lowers the fma builtin for f128 to fmal, which is for `long double`.
+            // On some targets this will be correct; on others it will be incorrect.
+            128 => if (CType.longdouble.sizeInBits(target) == 128) Strat{ .intrinsic = {} } else Strat{ .libc = "fmaq" },
+            else => unreachable,
+        };
+
+        const llvm_fn = switch (strat) {
+            .intrinsic => self.getIntrinsic("llvm.fma", &.{llvm_ty}),
+            .libc => |fn_name| self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: {
+                const param_types = [_]*const llvm.Type{ llvm_ty, llvm_ty, llvm_ty };
+                const fn_type = llvm.functionType(llvm_ty, &param_types, param_types.len, .False);
+                break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type);
+            },
+        };
+
+        const params = [_]*const llvm.Value{ mulend1, mulend2, addend };
+        return self.builder.buildCall(llvm_fn, &params, params.len, .C, .Auto, "");
+    }
+
     fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
         if (self.liveness.isUnused(inst))
             return null;
@@ -4020,8 +4062,15 @@ pub const FuncGen = struct {
 
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
         const operand = try self.resolveInst(ty_op.operand);
-        const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst));
-
+        const operand_ty = self.air.typeOf(ty_op.operand);
+        const dest_ty = self.air.typeOfIndex(inst);
+        const target = self.dg.module.getTarget();
+        const dest_bits = dest_ty.floatBits(target);
+        const src_bits = operand_ty.floatBits(target);
+        if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
+            return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+        }
+        const dest_llvm_ty = try self.dg.llvmType(dest_ty);
         return self.builder.buildFPTrunc(operand, dest_llvm_ty, "");
     }
 
@@ -4031,8 +4080,15 @@ pub const FuncGen = struct {
 
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
         const operand = try self.resolveInst(ty_op.operand);
+        const operand_ty = self.air.typeOf(ty_op.operand);
+        const dest_ty = self.air.typeOfIndex(inst);
+        const target = self.dg.module.getTarget();
+        const dest_bits = dest_ty.floatBits(target);
+        const src_bits = operand_ty.floatBits(target);
+        if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) {
+            return softF80TruncOrExt(self, operand, src_bits, dest_bits);
+        }
         const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst));
-
         return self.builder.buildFPExt(operand, dest_llvm_ty, "");
     }
 
@@ -5064,6 +5120,87 @@ pub const FuncGen = struct {
         return null;
     }
 
+    fn softF80TruncOrExt(
+        self: *FuncGen,
+        operand: *const llvm.Value,
+        src_bits: u16,
+        dest_bits: u16,
+    ) !?*const llvm.Value {
+        const target = self.dg.module.getTarget();
+
+        var param_llvm_ty: *const llvm.Type = self.context.intType(80);
+        var ret_llvm_ty: *const llvm.Type = param_llvm_ty;
+        var fn_name: [*:0]const u8 = undefined;
+        var arg = operand;
+        var final_cast: ?*const llvm.Type = null;
+
+        assert(src_bits == 80 or dest_bits == 80);
+
+        if (src_bits == 80) switch (dest_bits) {
+            16 => {
+                // See corresponding condition at definition of
+                // __truncxfhf2 in compiler-rt.
+                if (target.cpu.arch.isAARCH64()) {
+                    ret_llvm_ty = self.context.halfType();
+                } else {
+                    ret_llvm_ty = self.context.intType(16);
+                    final_cast = self.context.halfType();
+                }
+                fn_name = "__truncxfhf2";
+            },
+            32 => {
+                ret_llvm_ty = self.context.floatType();
+                fn_name = "__truncxfsf2";
+            },
+            64 => {
+                ret_llvm_ty = self.context.doubleType();
+                fn_name = "__truncxfdf2";
+            },
+            80 => return operand,
+            128 => {
+                ret_llvm_ty = self.context.fp128Type();
+                fn_name = "__extendxftf2";
+            },
+            else => unreachable,
+        } else switch (src_bits) {
+            16 => {
+                // See corresponding condition at definition of
+                // __extendhfxf2 in compiler-rt.
+                param_llvm_ty = if (target.cpu.arch.isAARCH64())
+                    self.context.halfType()
+                else
+                    self.context.intType(16);
+                arg = self.builder.buildBitCast(arg, param_llvm_ty, "");
+                fn_name = "__extendhfxf2";
+            },
+            32 => {
+                param_llvm_ty = self.context.floatType();
+                fn_name = "__extendsfxf2";
+            },
+            64 => {
+                param_llvm_ty = self.context.doubleType();
+                fn_name = "__extenddfxf2";
+            },
+            80 => return operand,
+            128 => {
+                param_llvm_ty = self.context.fp128Type();
+                fn_name = "__trunctfxf2";
+            },
+            else => unreachable,
+        }
+
+        const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: {
+            const param_types = [_]*const llvm.Type{param_llvm_ty};
+            const fn_type = llvm.functionType(ret_llvm_ty, &param_types, param_types.len, .False);
+            break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type);
+        };
+
+        var args: [1]*const llvm.Value = .{arg};
+        const result = self.builder.buildCall(llvm_fn, &args, args.len, .C, .Auto, "");
+        const final_cast_llvm_ty = final_cast orelse return result;
+        return self.builder.buildBitCast(result, final_cast_llvm_ty, "");
+    }
+
     fn getErrorNameTable(self: *FuncGen) !*const llvm.Value {
         if (self.dg.object.error_name_table) |table| {
             return table;
diff --git a/src/print_air.zig b/src/print_air.zig
index 2149be764a..6eac7dee31 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -252,6 +252,7 @@ const Writer = struct {
             .field_parent_ptr => try w.writeFieldParentPtr(s, inst),
             .wasm_memory_size => try w.writeWasmMemorySize(s, inst),
             .wasm_memory_grow => try w.writeWasmMemoryGrow(s, inst),
+            .mul_add => try w.writeMulAdd(s, inst),
 
             .add_with_overflow,
             .sub_with_overflow,
@@ -358,6 +359,17 @@ const Writer = struct {
         });
     }
 
+    fn writeMulAdd(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const pl_op = w.air.instructions.items(.data)[inst].pl_op;
+        const extra = w.air.extraData(Air.Bin, pl_op.payload).data;
+
+        try w.writeOperand(s, inst, 0, extra.lhs);
+        try s.writeAll(", ");
+        try w.writeOperand(s, inst, 1, extra.rhs);
+        try s.writeAll(", ");
+        try w.writeOperand(s, inst, 2, pl_op.operand);
+    }
+
     fn writeFence(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
         const atomic_order = w.air.instructions.items(.data)[inst].fence;
 
diff --git a/src/stage1/target.cpp b/src/stage1/target.cpp
index a505b4bd21..51aac6c072 100644
--- a/src/stage1/target.cpp
+++ b/src/stage1/target.cpp
@@ -1004,6 +1004,9 @@ bool target_has_debug_info(const ZigTarget *target) {
 }
 
 bool target_long_double_is_f128(const ZigTarget *target) {
+    if (target->abi == ZigLLVM_MSVC) {
+        return false;
+    }
     switch (target->arch) {
         case ZigLLVM_riscv64:
         case ZigLLVM_aarch64:
@@ -1012,6 +1015,13 @@ bool target_long_double_is_f128(const ZigTarget *target) {
         case ZigLLVM_systemz:
         case ZigLLVM_mips64:
         case ZigLLVM_mips64el:
+        case ZigLLVM_sparc:
+        case ZigLLVM_sparcv9:
+        case ZigLLVM_sparcel:
+        case ZigLLVM_ppc:
+        case ZigLLVM_ppcle:
+        case ZigLLVM_ppc64:
+        case ZigLLVM_ppc64le:
             return true;
 
         default:
diff --git a/src/type.zig b/src/type.zig
index 169574bbd4..a371577c7a 100644
--- a/src/type.zig
+++ b/src/type.zig
@@ -5436,33 +5436,36 @@ pub const CType = enum {
         switch (target.os.tag) {
             .freestanding, .other => switch (target.cpu.arch) {
                 .msp430 => switch (self) {
-                    .short,
-                    .ushort,
-                    .int,
-                    .uint,
-                    => return 16,
-                    .long,
-                    .ulong,
-                    => return 32,
-                    .longlong,
-                    .ulonglong,
-                    => return 64,
-                    .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
+                    .short, .ushort, .int, .uint => return 16,
+                    .long, .ulong => return 32,
+                    .longlong, .ulonglong, .longdouble => return 64,
                 },
                 else => switch (self) {
-                    .short,
-                    .ushort,
-                    => return 16,
-                    .int,
-                    .uint,
-                    => return 32,
-                    .long,
-                    .ulong,
-                    => return target.cpu.arch.ptrBitWidth(),
-                    .longlong,
-                    .ulonglong,
-                    => return 64,
-                    .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
+                    .short, .ushort => return 16,
+                    .int, .uint => return 32,
+                    .long, .ulong => return target.cpu.arch.ptrBitWidth(),
+                    .longlong, .ulonglong => return 64,
+                    .longdouble => switch (target.cpu.arch) {
+                        .i386, .x86_64 => return 80,
+
+                        .riscv64,
+                        .aarch64,
+                        .aarch64_be,
+                        .aarch64_32,
+                        .s390x,
+                        .mips64,
+                        .mips64el,
+                        .sparc,
+                        .sparcv9,
+                        .sparcel,
+                        .powerpc,
+                        .powerpcle,
+                        .powerpc64,
+                        .powerpc64le,
+                        => return 128,
+
+                        else => return 64,
+                    },
                 },
             },
 
@@ -5477,19 +5480,13 @@ pub const CType = enum {
             .plan9,
             .solaris,
             => switch (self) {
-                .short,
-                .ushort,
-                => return 16,
-                .int,
-                .uint,
-                => return 32,
-                .long,
-                .ulong,
-                => return target.cpu.arch.ptrBitWidth(),
-                .longlong,
-                .ulonglong,
-                => return 64,
+                .short, .ushort => return 16,
+                .int, .uint => return 32,
+                .long, .ulong => return target.cpu.arch.ptrBitWidth(),
+                .longlong, .ulonglong => return 64,
                 .longdouble => switch (target.cpu.arch) {
+                    .i386, .x86_64 => return 80,
+
                     .riscv64,
                     .aarch64,
                     .aarch64_be,
@@ -5497,40 +5494,33 @@ pub const CType = enum {
                     .s390x,
                     .mips64,
                     .mips64el,
+                    .sparc,
+                    .sparcv9,
+                    .sparcel,
+                    .powerpc,
+                    .powerpcle,
+                    .powerpc64,
+                    .powerpc64le,
                     => return 128,
 
-                    else => return 80,
+                    else => return 64,
                 },
             },
 
             .windows, .uefi => switch (self) {
-                .short,
-                .ushort,
-                => return 16,
-                .int,
-                .uint,
-                .long,
-                .ulong,
-                => return 32,
-                .longlong,
-                .ulonglong,
-                => return 64,
-                .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
+                .short, .ushort => return 16,
+                .int, .uint, .long, .ulong => return 32,
+                .longlong, .ulonglong, .longdouble => return 64,
             },
 
-            .ios => switch (self) {
-                .short,
-                .ushort,
-                => return 16,
-                .int,
-                .uint,
-                => return 32,
-                .long,
-                .ulong,
-                .longlong,
-                .ulonglong,
-                => return 64,
-                .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"),
+            .ios, .tvos, .watchos => switch (self) {
+                .short, .ushort => return 16,
+                .int, .uint => return 32,
+                .long, .ulong, .longlong, .ulonglong => return 64,
+                .longdouble => switch (target.cpu.arch) {
+                    .i386, .x86_64 => return 80,
+                    else => return 64,
+                },
             },
 
             .ananas,
@@ -5549,8 +5539,6 @@ pub const CType = enum {
             .amdhsa,
             .ps4,
             .elfiamcu,
-            .tvos,
-            .watchos,
             .mesa3d,
             .contiki,
             .amdpal,
diff --git a/src/value.zig b/src/value.zig
index e667c566b9..87fd4d81cb 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -2931,7 +2931,7 @@ pub const Value = extern union {
         return fromBigInt(arena, result_bigint.toConst());
     }
 
-    /// operands must be integers; handles undefined. 
+    /// operands must be integers; handles undefined.
     pub fn bitwiseAnd(lhs: Value, rhs: Value, arena: Allocator) !Value {
         if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef);
 
@@ -2951,7 +2951,7 @@ pub const Value = extern union {
         return fromBigInt(arena, result_bigint.toConst());
     }
 
-    /// operands must be integers; handles undefined. 
+    /// operands must be integers; handles undefined.
     pub fn bitwiseNand(lhs: Value, rhs: Value, ty: Type, arena: Allocator, target: Target) !Value {
         if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef);
 
@@ -2965,7 +2965,7 @@ pub const Value = extern union {
         return bitwiseXor(anded, all_ones, arena);
     }
 
-    /// operands must be integers; handles undefined. 
+    /// operands must be integers; handles undefined.
     pub fn bitwiseOr(lhs: Value, rhs: Value, arena: Allocator) !Value {
         if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef);
 
@@ -2984,7 +2984,7 @@ pub const Value = extern union {
         return fromBigInt(arena, result_bigint.toConst());
     }
 
-    /// operands must be integers; handles undefined. 
+    /// operands must be integers; handles undefined.
     pub fn bitwiseXor(lhs: Value, rhs: Value, arena: Allocator) !Value {
         if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef);
 
@@ -4020,6 +4020,49 @@ pub const Value = extern union {
         }
     }
 
+    pub fn mulAdd(
+        float_type: Type,
+        mulend1: Value,
+        mulend2: Value,
+        addend: Value,
+        arena: Allocator,
+        target: Target,
+    ) Allocator.Error!Value {
+        switch (float_type.floatBits(target)) {
+            16 => {
+                const m1 = mulend1.toFloat(f16);
+                const m2 = mulend2.toFloat(f16);
+                const a = addend.toFloat(f16);
+                return Value.Tag.float_16.create(arena, @mulAdd(f16, m1, m2, a));
+            },
+            32 => {
+                const m1 = mulend1.toFloat(f32);
+                const m2 = mulend2.toFloat(f32);
+                const a = addend.toFloat(f32);
+                return Value.Tag.float_32.create(arena, @mulAdd(f32, m1, m2, a));
+            },
+            64 => {
+                const m1 = mulend1.toFloat(f64);
+                const m2 = mulend2.toFloat(f64);
+                const a = addend.toFloat(f64);
+                return Value.Tag.float_64.create(arena, @mulAdd(f64, m1, m2, a));
+            },
+            80 => {
+                const m1 = mulend1.toFloat(f80);
+                const m2 = mulend2.toFloat(f80);
+                const a = addend.toFloat(f80);
+                return Value.Tag.float_80.create(arena, @mulAdd(f80, m1, m2, a));
+            },
+            128 => {
+                const m1 = mulend1.toFloat(f128);
+                const m2 = mulend2.toFloat(f128);
+                const a = addend.toFloat(f128);
+                return Value.Tag.float_128.create(arena, @mulAdd(f128, m1, m2, a));
+            },
+            else => unreachable,
+        }
+    }
+
     /// This type is not copyable since it may contain pointers to its inner data.
     pub const Payload = struct {
         tag: Tag,
diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig
index bf50541b56..cc7ad15184 100644
--- a/test/behavior/muladd.zig
+++ b/test/behavior/muladd.zig
@@ -2,7 +2,11 @@ const builtin = @import("builtin");
 const expect = @import("std").testing.expect;
 
 test "@mulAdd" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
 
     comptime try testMulAdd();
     try testMulAdd();
@@ -47,18 +51,28 @@ fn testMulAdd80() !void {
 }
 
 test "@mulAdd f128" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
 
     if (builtin.os.tag == .macos and builtin.cpu.arch == .aarch64) {
         // https://github.com/ziglang/zig/issues/9900
         return error.SkipZigTest;
     }
 
-    comptime try testMullAdd128();
-    try testMullAdd128();
+    if (builtin.zig_backend == .stage1 and
+        builtin.cpu.arch == .i386 and builtin.os.tag == .linux)
+    {
+        return error.SkipZigTest;
+    }
+
+    comptime try testMulAdd128();
+    try testMulAdd128();
 }
 
-fn testMullAdd128() !void {
+fn testMulAdd128() !void {
     var a: f16 = 5.5;
     var b: f128 = 2.5;
     var c: f128 = 6.25;