diff --git a/lib/std/math/fma.zig b/lib/std/math/fma.zig index dd76481e10..7ef734cf4e 100644 --- a/lib/std/math/fma.zig +++ b/lib/std/math/fma.zig @@ -19,6 +19,8 @@ pub fn fma(comptime T: type, x: T, y: T, z: T) T { // TODO this is not correct for some targets c_longdouble => @floatCast(c_longdouble, fma128(x, y, z)), + f80 => @floatCast(f80, fma128(x, y, z)), + else => @compileError("fma not implemented for " ++ @typeName(T)), }; } diff --git a/lib/std/special/c.zig b/lib/std/special/c.zig index 9a246f75ed..db1c56c536 100644 --- a/lib/std/special/c.zig +++ b/lib/std/special/c.zig @@ -12,7 +12,7 @@ const maxInt = std.math.maxInt; const native_os = builtin.os.tag; const native_arch = builtin.cpu.arch; const native_abi = builtin.abi; -const long_double_is_f128 = builtin.target.longDoubleIsF128(); +const long_double_is_f128 = builtin.target.longDoubleIs(f128); const is_wasm = switch (native_arch) { .wasm32, .wasm64 => true, @@ -90,10 +90,6 @@ comptime { @export(fmod, .{ .name = "fmod", .linkage = .Strong }); @export(fmodf, .{ .name = "fmodf", .linkage = .Strong }); - @export(fma, .{ .name = "fma", .linkage = .Strong }); - @export(fmaf, .{ .name = "fmaf", .linkage = .Strong }); - @export(fmal, .{ .name = "fmal", .linkage = .Strong }); - @export(sincos, .{ .name = "sincos", .linkage = .Strong }); @export(sincosf, .{ .name = "sincosf", .linkage = .Strong }); @@ -561,20 +557,6 @@ test "fmod, fmodf" { } } -fn fmaf(a: f32, b: f32, c: f32) callconv(.C) f32 { - return math.fma(f32, a, b, c); -} - -fn fma(a: f64, b: f64, c: f64) callconv(.C) f64 { - return math.fma(f64, a, b, c); -} -fn fmal(a: c_longdouble, b: c_longdouble, c: c_longdouble) callconv(.C) c_longdouble { - if (!long_double_is_f128) { - @panic("TODO implement this"); - } - return math.fma(c_longdouble, a, b, c); -} - fn sincos(a: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void { r_sin.* = math.sin(a); r_cos.* = math.cos(a); diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index 50091995da..6819fe784d 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -19,7 +19,8 @@ const strong_linkage = if (is_test) else std.builtin.GlobalLinkage.Strong; -const long_double_is_f128 = builtin.target.longDoubleIsF128(); +const long_double_is_f80 = builtin.target.longDoubleIs(f80); +const long_double_is_f128 = builtin.target.longDoubleIs(f128); comptime { // These files do their own comptime exporting logic. @@ -673,6 +674,29 @@ comptime { @export(_aullrem, .{ .name = "\x01__aullrem", .linkage = strong_linkage }); } + const fmodl = @import("compiler_rt/floatfmodl.zig").fmodl; + if (!is_test) { + @export(fmodl, .{ .name = "fmodl", .linkage = linkage }); + + @export(floorf, .{ .name = "floorf", .linkage = linkage }); + @export(floor, .{ .name = "floor", .linkage = linkage }); + @export(floorl, .{ .name = "floorl", .linkage = linkage }); + + @export(fma, .{ .name = "fma", .linkage = linkage }); + @export(fmaf, .{ .name = "fmaf", .linkage = linkage }); + @export(fmal, .{ .name = "fmal", .linkage = linkage }); + if (long_double_is_f80) { + @export(fmal, .{ .name = "__fmax", .linkage = linkage }); + } else { + @export(__fmax, .{ .name = "__fmax", .linkage = linkage }); + } + if (long_double_is_f128) { + @export(fmal, .{ .name = "fmaq", .linkage = linkage }); + } else { + @export(fmaq, .{ .name = "fmaq", .linkage = linkage }); + } + } + if (arch.isSPARC()) { // SPARC systems use a different naming scheme const _Qp_add = @import("compiler_rt/sparc.zig")._Qp_add; @@ -725,7 +749,7 @@ comptime { @export(_Qp_qtod, .{ .name = "_Qp_qtod", .linkage = linkage }); } - if ((arch == .powerpc or arch.isPPC64()) and !is_test) { + if ((arch.isPPC() or arch.isPPC64()) and !is_test) { @export(__addtf3, .{ .name = "__addkf3", .linkage = linkage }); @export(__subtf3, .{ .name = "__subkf3", .linkage = linkage }); @export(__multf3, .{ .name = "__mulkf3", .linkage = linkage }); @@ -750,22 +774,29 @@ comptime { @export(__letf2, .{ .name = "__lekf2", .linkage = linkage }); @export(__getf2, .{ .name = "__gtkf2", .linkage = linkage }); @export(__unordtf2, .{ .name = "__unordkf2", .linkage = linkage }); + + // LLVM PPC backend lowers f128 fma to `fmaf128`. + @export(fmal, .{ .name = "fmaf128", .linkage = linkage }); } - - const fmodl = @import("compiler_rt/floatfmodl.zig").fmodl; - @export(fmodl, .{ .name = "fmodl", .linkage = linkage }); - - @export(floorf, .{ .name = "floorf", .linkage = linkage }); - @export(floor, .{ .name = "floor", .linkage = linkage }); - @export(floorl, .{ .name = "floorl", .linkage = linkage }); - @export(fmaq, .{ .name = "fmaq", .linkage = linkage }); } const math = std.math; +fn fmaf(a: f32, b: f32, c: f32) callconv(.C) f32 { + return math.fma(f32, a, b, c); +} +fn fma(a: f64, b: f64, c: f64) callconv(.C) f64 { + return math.fma(f64, a, b, c); +} +fn __fmax(a: f80, b: f80, c: f80) callconv(.C) f80 { + return math.fma(f80, a, b, c); +} fn fmaq(a: f128, b: f128, c: f128) callconv(.C) f128 { return math.fma(f128, a, b, c); } +fn fmal(a: c_longdouble, b: c_longdouble, c: c_longdouble) callconv(.C) c_longdouble { + return math.fma(c_longdouble, a, b, c); +} // TODO add intrinsics for these (and probably the double version too) // and have the math stuff use the intrinsic. same as @mod and @rem diff --git a/lib/std/target.zig b/lib/std/target.zig index 9a2dcfcc66..0b2a4a4df6 100644 --- a/lib/std/target.zig +++ b/lib/std/target.zig @@ -1714,9 +1714,55 @@ pub const Target = struct { }; } - pub inline fn longDoubleIsF128(target: Target) bool { - return switch (target.cpu.arch) { - .riscv64, .aarch64, .aarch64_be, .aarch64_32, .s390x, .mips64, .mips64el => true, + pub inline fn longDoubleIs(target: Target, comptime F: type) bool { + if (target.abi == .msvc) { + return F == f64; + } + return switch (F) { + f128 => switch (target.cpu.arch) { + .riscv64, + .aarch64, + .aarch64_be, + .aarch64_32, + .s390x, + .mips64, + .mips64el, + .sparc, + .sparcv9, + .sparcel, + .powerpc, + .powerpcle, + .powerpc64, + .powerpc64le, + => true, + + else => false, + }, + f80 => switch (target.cpu.arch) { + .x86_64, .i386 => true, + else => false, + }, + f64 => switch (target.cpu.arch) { + .x86_64, + .i386, + .riscv64, + .aarch64, + .aarch64_be, + .aarch64_32, + .s390x, + .mips64, + .mips64el, + .sparc, + .sparcv9, + .sparcel, + .powerpc, + .powerpcle, + .powerpc64, + .powerpc64le, + => false, + + else => true, + }, else => false, }; } diff --git a/src/Air.zig b/src/Air.zig index 268b6c8631..cf5bfad620 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -579,6 +579,11 @@ pub const Inst = struct { /// Uses the `prefetch` field. prefetch, + /// Computes `(a * b) + c`, but only rounds once. + /// Uses the `pl_op` field with payload `Bin`. + /// The operand is the addend. The mulends are lhs and rhs. + mul_add, + /// Implements @fieldParentPtr builtin. /// Uses the `ty_pl` field. field_parent_ptr, @@ -986,6 +991,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { return ptr_ty.elemType(); }, + .mul_add => return air.typeOf(datas[inst].pl_op.operand), + .add_with_overflow, .sub_with_overflow, .mul_with_overflow, diff --git a/src/AstGen.zig b/src/AstGen.zig index cb710af900..fdd58dd948 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -7309,8 +7309,8 @@ fn builtinCall( }, .mul_add => { const float_type = try typeExpr(gz, scope, params[0]); - const mulend1 = try expr(gz, scope, .{ .ty = float_type }, params[1]); - const mulend2 = try expr(gz, scope, .{ .ty = float_type }, params[2]); + const mulend1 = try expr(gz, scope, .{ .coerced_ty = float_type }, params[1]); + const mulend2 = try expr(gz, scope, .{ .coerced_ty = float_type }, params[2]); const addend = try expr(gz, scope, .{ .ty = float_type }, params[3]); const result = try gz.addPlNode(.mul_add, node, Zir.Inst.MulAdd{ .mulend1 = mulend1, diff --git a/src/Liveness.zig b/src/Liveness.zig index 7f007b5718..f2d0b5ca94 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -464,6 +464,11 @@ fn analyzeInst( const extra = a.air.extraData(Air.Cmpxchg, inst_datas[inst].ty_pl.payload).data; return trackOperands(a, new_set, inst, main_tomb, .{ extra.ptr, extra.expected_value, extra.new_value }); }, + .mul_add => { + const pl_op = inst_datas[inst].pl_op; + const extra = a.air.extraData(Air.Bin, pl_op.payload).data; + return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, pl_op.operand }); + }, .atomic_load => { const ptr = inst_datas[inst].atomic_load.ptr; return trackOperands(a, new_set, inst, main_tomb, .{ ptr, .none, .none }); diff --git a/src/Sema.zig b/src/Sema.zig index b0bf3c4a23..4da7fabac6 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -13518,8 +13518,83 @@ fn zirAtomicStore(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError fn zirMulAdd(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { const inst_data = sema.code.instructions.items(.data)[inst].pl_node; + const extra = sema.code.extraData(Zir.Inst.MulAdd, inst_data.payload_index).data; const src = inst_data.src(); - return sema.fail(block, src, "TODO: Sema.zirMulAdd", .{}); + + const mulend1_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node }; + const mulend2_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node }; + const addend_src: LazySrcLoc = .{ .node_offset_builtin_call_arg3 = inst_data.src_node }; + + const addend = sema.resolveInst(extra.addend); + const ty = sema.typeOf(addend); + const mulend1 = try sema.coerce(block, ty, sema.resolveInst(extra.mulend1), mulend1_src); + const mulend2 = try sema.coerce(block, ty, sema.resolveInst(extra.mulend2), mulend2_src); + + const target = sema.mod.getTarget(); + + switch (ty.zigTypeTag()) { + .ComptimeFloat, .Float => { + const maybe_mulend1 = try sema.resolveMaybeUndefVal(block, mulend1_src, mulend1); + const maybe_mulend2 = try sema.resolveMaybeUndefVal(block, mulend2_src, mulend2); + const maybe_addend = try sema.resolveMaybeUndefVal(block, addend_src, addend); + + const runtime_src = if (maybe_mulend1) |mulend1_val| rs: { + if (maybe_mulend2) |mulend2_val| { + if (mulend2_val.isUndef()) return sema.addConstUndef(ty); + + if (maybe_addend) |addend_val| { + if (addend_val.isUndef()) return sema.addConstUndef(ty); + + const result_val = try Value.mulAdd( + ty, + mulend1_val, + mulend2_val, + addend_val, + sema.arena, + target, + ); + return sema.addConstant(ty, result_val); + } else { + break :rs addend_src; + } + } else { + if (maybe_addend) |addend_val| { + if (addend_val.isUndef()) return sema.addConstUndef(ty); + } + break :rs mulend2_src; + } + } else rs: { + if (maybe_mulend2) |mulend2_val| { + if (mulend2_val.isUndef()) return sema.addConstUndef(ty); + } + if (maybe_addend) |addend_val| { + if (addend_val.isUndef()) return sema.addConstUndef(ty); + } + break :rs mulend1_src; + }; + + try sema.requireRuntimeBlock(block, runtime_src); + return block.addInst(.{ + .tag = .mul_add, + .data = .{ .pl_op = .{ + .operand = addend, + .payload = try sema.addExtra(Air.Bin{ + .lhs = mulend1, + .rhs = mulend2, + }), + } }, + }); + }, + .Vector => { + const scalar_ty = ty.scalarType(); + switch (scalar_ty.zigTypeTag()) { + .ComptimeFloat, .Float => {}, + else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{scalar_ty}), + } + return sema.fail(block, src, "TODO: implement @mulAdd for vectors", .{}); + }, + else => return sema.fail(block, src, "expected vector of floats or float type, found '{}'", .{ty}), + } } fn zirBuiltinCall(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { diff --git a/src/Zir.zig b/src/Zir.zig index caf2002f0f..5ccdbda86d 100644 --- a/src/Zir.zig +++ b/src/Zir.zig @@ -891,6 +891,8 @@ pub const Inst = struct { atomic_store, /// Implements the `@mulAdd` builtin. /// Uses the `pl_node` union field with payload `MulAdd`. + /// The addend communicates the type of the builtin. + /// The mulends need to be coerced to the same type. mul_add, /// Implements the `@call` builtin. /// Uses the `pl_node` union field with payload `BuiltinCall`. diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index b3447f43e7..8f8ef8a70c 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -632,6 +632,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), + .mul_add => try self.airMulAdd(inst), .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered), .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic), @@ -3652,6 +3653,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none }); } +fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const extra = self.air.extraData(Air.Bin, pl_op.payload).data; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else { + return self.fail("TODO implement airMulAdd for aarch64", .{}); + }; + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand }); +} + fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { // First section of indexes correspond to a set number of constant values. const ref_int = @enumToInt(inst); diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 80f0169ba5..5a45019224 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -628,6 +628,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), + .mul_add => try self.airMulAdd(inst), .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered), .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic), @@ -4086,6 +4087,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none }); } +fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const extra = self.air.extraData(Air.Bin, pl_op.payload).data; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else { + return self.fail("TODO implement airMulAdd for arm", .{}); + }; + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand }); +} + fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { // First section of indexes correspond to a set number of constant values. const ref_int = @enumToInt(inst); diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 15600c09dd..c14e54e9e4 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -600,6 +600,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), + .mul_add => try self.airMulAdd(inst), .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered), .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic), @@ -2203,6 +2204,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none }); } +fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const extra = self.air.extraData(Air.Bin, pl_op.payload).data; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else { + return self.fail("TODO implement airMulAdd for riscv64", .{}); + }; + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand }); +} + fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { // First section of indexes correspond to a set number of constant values. const ref_int = @enumToInt(inst); diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index b293d20db9..c3e8bb7864 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1333,6 +1333,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue { .error_name, .errunion_payload_ptr_set, .field_parent_ptr, + .mul_add, // For these 4, probably best to wait until https://github.com/ziglang/zig/issues/10248 // is implemented in the frontend before implementing them here in the wasm backend. diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 62dc924124..4f8dd0fb0b 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -717,6 +717,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), + .mul_add => try self.airMulAdd(inst), .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered), .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic), @@ -5559,6 +5560,15 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, MCValue.dead, .{ prefetch.ptr, .none, .none }); } +fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const extra = self.air.extraData(Air.Bin, pl_op.payload).data; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else { + return self.fail("TODO implement airMulAdd for x86_64", .{}); + }; + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand }); +} + fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { // First section of indexes correspond to a set number of constant values. const ref_int = @enumToInt(inst); diff --git a/src/codegen/c.zig b/src/codegen/c.zig index ba7bb6fa3a..2a10a8094a 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -16,6 +16,7 @@ const trace = @import("../tracy.zig").trace; const LazySrcLoc = Module.LazySrcLoc; const Air = @import("../Air.zig"); const Liveness = @import("../Liveness.zig"); +const CType = @import("../type.zig").CType; const Mutability = enum { Const, Mut }; const BigIntConst = std.math.big.int.Const; @@ -1635,6 +1636,8 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO .trunc_float, => |tag| return f.fail("TODO: C backend: implement unary op for tag '{s}'", .{@tagName(tag)}), + .mul_add => try airMulAdd(f, inst), + .add_with_overflow => try airAddWithOverflow(f, inst), .sub_with_overflow => try airSubWithOverflow(f, inst), .mul_with_overflow => try airMulWithOverflow(f, inst), @@ -3621,6 +3624,35 @@ fn airWasmMemoryGrow(f: *Function, inst: Air.Inst.Index) !CValue { return local; } +fn airMulAdd(f: *Function, inst: Air.Inst.Index) !CValue { + if (f.liveness.isUnused(inst)) return CValue.none; + const pl_op = f.air.instructions.items(.data)[inst].pl_op; + const extra = f.air.extraData(Air.Bin, pl_op.payload).data; + const inst_ty = f.air.typeOfIndex(inst); + const mulend1 = try f.resolveInst(extra.lhs); + const mulend2 = try f.resolveInst(extra.rhs); + const addend = try f.resolveInst(pl_op.operand); + const writer = f.object.writer(); + const target = f.object.dg.module.getTarget(); + const fn_name = switch (inst_ty.floatBits(target)) { + 16, 32 => "fmaf", + 64 => "fma", + 80 => if (CType.longdouble.sizeInBits(target) == 80) "fmal" else "__fmax", + 128 => if (CType.longdouble.sizeInBits(target) == 128) "fmal" else "fmaq", + else => unreachable, + }; + const local = try f.allocLocal(inst_ty, .Const); + try writer.writeAll(" = "); + try writer.print("{s}(", .{fn_name}); + try f.writeCValue(writer, mulend1); + try writer.writeAll(", "); + try f.writeCValue(writer, mulend2); + try writer.writeAll(", "); + try f.writeCValue(writer, addend); + try writer.writeAll(");\n"); + return local; +} + fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 { return switch (order) { .Unordered => "memory_order_relaxed", diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 446876dfe5..c837d9a00d 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -18,6 +18,7 @@ const target_util = @import("../target.zig"); const Value = @import("../value.zig").Value; const Type = @import("../type.zig").Type; const LazySrcLoc = Module.LazySrcLoc; +const CType = @import("../type.zig").CType; const Error = error{ OutOfMemory, CodegenFail }; @@ -2189,6 +2190,7 @@ pub const FuncGen = struct { .min => try self.airMin(inst), .max => try self.airMax(inst), .slice => try self.airSlice(inst), + .mul_add => try self.airMulAdd(inst), .add_with_overflow => try self.airOverflow(inst, "llvm.sadd.with.overflow", "llvm.uadd.with.overflow"), .sub_with_overflow => try self.airOverflow(inst, "llvm.ssub.with.overflow", "llvm.usub.with.overflow"), @@ -3842,6 +3844,46 @@ pub const FuncGen = struct { return overflow_bit; } + fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + if (self.liveness.isUnused(inst)) return null; + + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const extra = self.air.extraData(Air.Bin, pl_op.payload).data; + + const mulend1 = try self.resolveInst(extra.lhs); + const mulend2 = try self.resolveInst(extra.rhs); + const addend = try self.resolveInst(pl_op.operand); + + const ty = self.air.typeOfIndex(inst); + const llvm_ty = try self.dg.llvmType(ty); + const target = self.dg.module.getTarget(); + + const Strat = union(enum) { + intrinsic, + libc: [*:0]const u8, + }; + const strat: Strat = switch (ty.floatBits(target)) { + 16, 32, 64 => Strat.intrinsic, + 80 => if (CType.longdouble.sizeInBits(target) == 80) Strat{ .intrinsic = {} } else Strat{ .libc = "__fmax" }, + // LLVM always lowers the fma builtin for f128 to fmal, which is for `long double`. + // On some targets this will be correct; on others it will be incorrect. + 128 => if (CType.longdouble.sizeInBits(target) == 128) Strat{ .intrinsic = {} } else Strat{ .libc = "fmaq" }, + else => unreachable, + }; + + const llvm_fn = switch (strat) { + .intrinsic => self.getIntrinsic("llvm.fma", &.{llvm_ty}), + .libc => |fn_name| self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: { + const param_types = [_]*const llvm.Type{ llvm_ty, llvm_ty, llvm_ty }; + const fn_type = llvm.functionType(llvm_ty, ¶m_types, param_types.len, .False); + break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type); + }, + }; + + const params = [_]*const llvm.Value{ mulend1, mulend2, addend }; + return self.builder.buildCall(llvm_fn, ¶ms, params.len, .C, .Auto, ""); + } + fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; @@ -4020,8 +4062,15 @@ pub const FuncGen = struct { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); - const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst)); - + const operand_ty = self.air.typeOf(ty_op.operand); + const dest_ty = self.air.typeOfIndex(inst); + const target = self.dg.module.getTarget(); + const dest_bits = dest_ty.floatBits(target); + const src_bits = operand_ty.floatBits(target); + if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { + return softF80TruncOrExt(self, operand, src_bits, dest_bits); + } + const dest_llvm_ty = try self.dg.llvmType(dest_ty); return self.builder.buildFPTrunc(operand, dest_llvm_ty, ""); } @@ -4031,8 +4080,15 @@ pub const FuncGen = struct { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); + const operand_ty = self.air.typeOf(ty_op.operand); + const dest_ty = self.air.typeOfIndex(inst); + const target = self.dg.module.getTarget(); + const dest_bits = dest_ty.floatBits(target); + const src_bits = operand_ty.floatBits(target); + if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { + return softF80TruncOrExt(self, operand, src_bits, dest_bits); + } const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst)); - return self.builder.buildFPExt(operand, dest_llvm_ty, ""); } @@ -5064,6 +5120,87 @@ pub const FuncGen = struct { return null; } + fn softF80TruncOrExt( + self: *FuncGen, + operand: *const llvm.Value, + src_bits: u16, + dest_bits: u16, + ) !?*const llvm.Value { + const target = self.dg.module.getTarget(); + + var param_llvm_ty: *const llvm.Type = self.context.intType(80); + var ret_llvm_ty: *const llvm.Type = param_llvm_ty; + var fn_name: [*:0]const u8 = undefined; + var arg = operand; + var final_cast: ?*const llvm.Type = null; + + assert(src_bits == 80 or dest_bits == 80); + + if (src_bits == 80) switch (dest_bits) { + 16 => { + // See corresponding condition at definition of + // __truncxfhf2 in compiler-rt. + if (target.cpu.arch.isAARCH64()) { + ret_llvm_ty = self.context.halfType(); + } else { + ret_llvm_ty = self.context.intType(16); + final_cast = self.context.halfType(); + } + fn_name = "__truncxfhf2"; + }, + 32 => { + ret_llvm_ty = self.context.floatType(); + fn_name = "__truncxfsf2"; + }, + 64 => { + ret_llvm_ty = self.context.doubleType(); + fn_name = "__truncxfdf2"; + }, + 80 => return operand, + 128 => { + ret_llvm_ty = self.context.fp128Type(); + fn_name = "__extendxftf2"; + }, + else => unreachable, + } else switch (src_bits) { + 16 => { + // See corresponding condition at definition of + // __extendhfxf2 in compiler-rt. + param_llvm_ty = if (target.cpu.arch.isAARCH64()) + self.context.halfType() + else + self.context.intType(16); + arg = self.builder.buildBitCast(arg, param_llvm_ty, ""); + fn_name = "__extendhfxf2"; + }, + 32 => { + param_llvm_ty = self.context.floatType(); + fn_name = "__extendsfxf2"; + }, + 64 => { + param_llvm_ty = self.context.doubleType(); + fn_name = "__extenddfxf2"; + }, + 80 => return operand, + 128 => { + param_llvm_ty = self.context.fp128Type(); + fn_name = "__trunctfxf2"; + }, + else => unreachable, + } + + const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: { + const param_types = [_]*const llvm.Type{param_llvm_ty}; + const fn_type = llvm.functionType(ret_llvm_ty, ¶m_types, param_types.len, .False); + break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type); + }; + + var args: [1]*const llvm.Value = .{arg}; + const result = self.builder.buildCall(llvm_fn, &args, args.len, .C, .Auto, ""); + const final_cast_llvm_ty = final_cast orelse return result; + return self.builder.buildBitCast(result, final_cast_llvm_ty, ""); + } + fn getErrorNameTable(self: *FuncGen) !*const llvm.Value { if (self.dg.object.error_name_table) |table| { return table; diff --git a/src/print_air.zig b/src/print_air.zig index 2149be764a..6eac7dee31 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -252,6 +252,7 @@ const Writer = struct { .field_parent_ptr => try w.writeFieldParentPtr(s, inst), .wasm_memory_size => try w.writeWasmMemorySize(s, inst), .wasm_memory_grow => try w.writeWasmMemoryGrow(s, inst), + .mul_add => try w.writeMulAdd(s, inst), .add_with_overflow, .sub_with_overflow, @@ -358,6 +359,17 @@ const Writer = struct { }); } + fn writeMulAdd(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { + const pl_op = w.air.instructions.items(.data)[inst].pl_op; + const extra = w.air.extraData(Air.Bin, pl_op.payload).data; + + try w.writeOperand(s, inst, 0, extra.lhs); + try s.writeAll(", "); + try w.writeOperand(s, inst, 1, extra.rhs); + try s.writeAll(", "); + try w.writeOperand(s, inst, 2, pl_op.operand); + } + fn writeFence(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { const atomic_order = w.air.instructions.items(.data)[inst].fence; diff --git a/src/stage1/target.cpp b/src/stage1/target.cpp index a505b4bd21..51aac6c072 100644 --- a/src/stage1/target.cpp +++ b/src/stage1/target.cpp @@ -1004,6 +1004,9 @@ bool target_has_debug_info(const ZigTarget *target) { } bool target_long_double_is_f128(const ZigTarget *target) { + if (target->abi == ZigLLVM_MSVC) { + return false; + } switch (target->arch) { case ZigLLVM_riscv64: case ZigLLVM_aarch64: @@ -1012,6 +1015,13 @@ bool target_long_double_is_f128(const ZigTarget *target) { case ZigLLVM_systemz: case ZigLLVM_mips64: case ZigLLVM_mips64el: + case ZigLLVM_sparc: + case ZigLLVM_sparcv9: + case ZigLLVM_sparcel: + case ZigLLVM_ppc: + case ZigLLVM_ppcle: + case ZigLLVM_ppc64: + case ZigLLVM_ppc64le: return true; default: diff --git a/src/type.zig b/src/type.zig index 169574bbd4..a371577c7a 100644 --- a/src/type.zig +++ b/src/type.zig @@ -5436,33 +5436,36 @@ pub const CType = enum { switch (target.os.tag) { .freestanding, .other => switch (target.cpu.arch) { .msp430 => switch (self) { - .short, - .ushort, - .int, - .uint, - => return 16, - .long, - .ulong, - => return 32, - .longlong, - .ulonglong, - => return 64, - .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"), + .short, .ushort, .int, .uint => return 16, + .long, .ulong => return 32, + .longlong, .ulonglong, .longdouble => return 64, }, else => switch (self) { - .short, - .ushort, - => return 16, - .int, - .uint, - => return 32, - .long, - .ulong, - => return target.cpu.arch.ptrBitWidth(), - .longlong, - .ulonglong, - => return 64, - .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"), + .short, .ushort => return 16, + .int, .uint => return 32, + .long, .ulong => return target.cpu.arch.ptrBitWidth(), + .longlong, .ulonglong => return 64, + .longdouble => switch (target.cpu.arch) { + .i386, .x86_64 => return 80, + + .riscv64, + .aarch64, + .aarch64_be, + .aarch64_32, + .s390x, + .mips64, + .mips64el, + .sparc, + .sparcv9, + .sparcel, + .powerpc, + .powerpcle, + .powerpc64, + .powerpc64le, + => return 128, + + else => return 64, + }, }, }, @@ -5477,19 +5480,13 @@ pub const CType = enum { .plan9, .solaris, => switch (self) { - .short, - .ushort, - => return 16, - .int, - .uint, - => return 32, - .long, - .ulong, - => return target.cpu.arch.ptrBitWidth(), - .longlong, - .ulonglong, - => return 64, + .short, .ushort => return 16, + .int, .uint => return 32, + .long, .ulong => return target.cpu.arch.ptrBitWidth(), + .longlong, .ulonglong => return 64, .longdouble => switch (target.cpu.arch) { + .i386, .x86_64 => return 80, + .riscv64, .aarch64, .aarch64_be, @@ -5497,40 +5494,33 @@ pub const CType = enum { .s390x, .mips64, .mips64el, + .sparc, + .sparcv9, + .sparcel, + .powerpc, + .powerpcle, + .powerpc64, + .powerpc64le, => return 128, - else => return 80, + else => return 64, }, }, .windows, .uefi => switch (self) { - .short, - .ushort, - => return 16, - .int, - .uint, - .long, - .ulong, - => return 32, - .longlong, - .ulonglong, - => return 64, - .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"), + .short, .ushort => return 16, + .int, .uint, .long, .ulong => return 32, + .longlong, .ulonglong, .longdouble => return 64, }, - .ios => switch (self) { - .short, - .ushort, - => return 16, - .int, - .uint, - => return 32, - .long, - .ulong, - .longlong, - .ulonglong, - => return 64, - .longdouble => @panic("TODO figure out what kind of float `long double` is on this target"), + .ios, .tvos, .watchos => switch (self) { + .short, .ushort => return 16, + .int, .uint => return 32, + .long, .ulong, .longlong, .ulonglong => return 64, + .longdouble => switch (target.cpu.arch) { + .i386, .x86_64 => return 80, + else => return 64, + }, }, .ananas, @@ -5549,8 +5539,6 @@ pub const CType = enum { .amdhsa, .ps4, .elfiamcu, - .tvos, - .watchos, .mesa3d, .contiki, .amdpal, diff --git a/src/value.zig b/src/value.zig index e667c566b9..87fd4d81cb 100644 --- a/src/value.zig +++ b/src/value.zig @@ -2931,7 +2931,7 @@ pub const Value = extern union { return fromBigInt(arena, result_bigint.toConst()); } - /// operands must be integers; handles undefined. + /// operands must be integers; handles undefined. pub fn bitwiseAnd(lhs: Value, rhs: Value, arena: Allocator) !Value { if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef); @@ -2951,7 +2951,7 @@ pub const Value = extern union { return fromBigInt(arena, result_bigint.toConst()); } - /// operands must be integers; handles undefined. + /// operands must be integers; handles undefined. pub fn bitwiseNand(lhs: Value, rhs: Value, ty: Type, arena: Allocator, target: Target) !Value { if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef); @@ -2965,7 +2965,7 @@ pub const Value = extern union { return bitwiseXor(anded, all_ones, arena); } - /// operands must be integers; handles undefined. + /// operands must be integers; handles undefined. pub fn bitwiseOr(lhs: Value, rhs: Value, arena: Allocator) !Value { if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef); @@ -2984,7 +2984,7 @@ pub const Value = extern union { return fromBigInt(arena, result_bigint.toConst()); } - /// operands must be integers; handles undefined. + /// operands must be integers; handles undefined. pub fn bitwiseXor(lhs: Value, rhs: Value, arena: Allocator) !Value { if (lhs.isUndef() or rhs.isUndef()) return Value.initTag(.undef); @@ -4020,6 +4020,49 @@ pub const Value = extern union { } } + pub fn mulAdd( + float_type: Type, + mulend1: Value, + mulend2: Value, + addend: Value, + arena: Allocator, + target: Target, + ) Allocator.Error!Value { + switch (float_type.floatBits(target)) { + 16 => { + const m1 = mulend1.toFloat(f16); + const m2 = mulend2.toFloat(f16); + const a = addend.toFloat(f16); + return Value.Tag.float_16.create(arena, @mulAdd(f16, m1, m2, a)); + }, + 32 => { + const m1 = mulend1.toFloat(f32); + const m2 = mulend2.toFloat(f32); + const a = addend.toFloat(f32); + return Value.Tag.float_32.create(arena, @mulAdd(f32, m1, m2, a)); + }, + 64 => { + const m1 = mulend1.toFloat(f64); + const m2 = mulend2.toFloat(f64); + const a = addend.toFloat(f64); + return Value.Tag.float_64.create(arena, @mulAdd(f64, m1, m2, a)); + }, + 80 => { + const m1 = mulend1.toFloat(f80); + const m2 = mulend2.toFloat(f80); + const a = addend.toFloat(f80); + return Value.Tag.float_80.create(arena, @mulAdd(f80, m1, m2, a)); + }, + 128 => { + const m1 = mulend1.toFloat(f128); + const m2 = mulend2.toFloat(f128); + const a = addend.toFloat(f128); + return Value.Tag.float_128.create(arena, @mulAdd(f128, m1, m2, a)); + }, + else => unreachable, + } + } + /// This type is not copyable since it may contain pointers to its inner data. pub const Payload = struct { tag: Tag, diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig index bf50541b56..cc7ad15184 100644 --- a/test/behavior/muladd.zig +++ b/test/behavior/muladd.zig @@ -2,7 +2,11 @@ const builtin = @import("builtin"); const expect = @import("std").testing.expect; test "@mulAdd" { - if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO comptime try testMulAdd(); try testMulAdd(); @@ -47,18 +51,28 @@ fn testMulAdd80() !void { } test "@mulAdd f128" { - if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.os.tag == .macos and builtin.cpu.arch == .aarch64) { // https://github.com/ziglang/zig/issues/9900 return error.SkipZigTest; } - comptime try testMullAdd128(); - try testMullAdd128(); + if (builtin.zig_backend == .stage1 and + builtin.cpu.arch == .i386 and builtin.os.tag == .linux) + { + return error.SkipZigTest; + } + + comptime try testMulAdd128(); + try testMulAdd128(); } -fn testMullAdd128() !void { +fn testMulAdd128() !void { var a: f16 = 5.5; var b: f128 = 2.5; var c: f128 = 6.25;