x86_64: implement float arithmetic builtins

This commit is contained in:
Jacob Young 2023-09-30 22:11:51 -04:00
parent 1eb023908d
commit fbe5bf469e
3 changed files with 301 additions and 187 deletions

View File

@ -6570,6 +6570,34 @@ fn genBinOp(
const lhs_ty = self.typeOf(lhs_air);
const rhs_ty = self.typeOf(rhs_air);
const abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
if (lhs_ty.isRuntimeFloat() and switch (lhs_ty.floatBits(self.target.*)) {
16 => !self.hasFeature(.f16c),
32, 64 => false,
80, 128 => true,
else => unreachable,
}) {
var callee: ["__add?f3".len]u8 = undefined;
return self.genCall(.{ .lib = .{
.return_type = lhs_ty.toIntern(),
.param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
.callee = switch (air_tag) {
.add, .sub, .mul, .div_float => std.fmt.bufPrint(&callee, "__{s}{c}f3", .{
@tagName(air_tag)[0..3],
floatCompilerRtAbiName(lhs_ty.floatBits(self.target.*)),
}),
.min, .max => std.fmt.bufPrint(&callee, "{s}f{s}{s}", .{
floatLibcAbiPrefix(lhs_ty),
@tagName(air_tag),
floatLibcAbiSuffix(lhs_ty),
}),
else => return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
} catch unreachable,
} }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } });
}
if ((lhs_ty.scalarType(mod).isRuntimeFloat() and
lhs_ty.scalarType(mod).floatBits(self.target.*) == 80) or
lhs_ty.abiSize(mod) > @as(u6, if (self.hasFeature(.avx)) 32 else 16))
@ -6830,7 +6858,8 @@ fn genBinOp(
const mir_tag = @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
else => unreachable,
.Float => switch (lhs_ty.floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) {
16 => {
assert(self.hasFeature(.f16c));
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
@ -6873,7 +6902,7 @@ fn genBinOp(
Immediate.u(0b1_00),
);
return dst_mcv;
} else null,
},
32 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
.sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
@ -7134,181 +7163,184 @@ fn genBinOp(
else => null,
},
.Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen(mod)) {
1 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
16 => tag: {
assert(self.hasFeature(.f16c));
switch (lhs_ty.vectorLen(mod)) {
1 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .vp_, .unpcklwd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ss, .add },
.sub => .{ .v_ss, .sub },
.mul => .{ .v_ss, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
.max => .{ .v_ss, .max },
.min => .{ .v_ss, .max },
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
2 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.{ .vp_w, .insr },
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .vp_, .unpcklwd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ss, .add },
.sub => .{ .v_ss, .sub },
.mul => .{ .v_ss, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
.max => .{ .v_ss, .max },
.min => .{ .v_ss, .max },
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
2 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
.{ .vp_d, .insr },
dst_reg,
src_mcv.mem(.dword),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .v_ps, .unpckl },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(
.{ .v_ps, .movhl },
tmp_reg,
dst_reg,
dst_reg,
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
3...4 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
.{ .vp_d, .insr },
dst_reg,
src_mcv.mem(.dword),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ .v_ps, .unpckl },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(
.{ .v_ps, .movhl },
tmp_reg,
dst_reg,
dst_reg,
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
3...4 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
.{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
5...8 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
.{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
5...8 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.xword),
) else try self.asmRegisterRegister(
.{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg.to256(),
dst_reg.to256(),
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg.to256(),
Immediate.u(0b1_00),
);
return dst_mcv;
},
else => null,
} else null,
try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.xword),
) else try self.asmRegisterRegister(
.{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg.to256(),
dst_reg.to256(),
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg.to256(),
Immediate.u(0b1_00),
);
return dst_mcv;
},
else => break :tag null,
}
},
32 => switch (lhs_ty.vectorLen(mod)) {
1 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },

View File

@ -19,6 +19,99 @@ fn epsForType(comptime T: type) T {
};
}
test "add f16" {
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
try testAdd(f16);
try comptime testAdd(f16);
}
test "add f32/f64" {
try testAdd(f32);
try comptime testAdd(f32);
try testAdd(f64);
try comptime testAdd(f64);
}
test "add f80/f128/c_longdouble" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
try testAdd(f80);
try comptime testAdd(f80);
try testAdd(f128);
try comptime testAdd(f128);
try testAdd(c_longdouble);
try comptime testAdd(c_longdouble);
}
fn testAdd(comptime T: type) !void {
var one_point_two_five: T = 1.25;
var two_point_seven_five: T = 2.75;
try expect(one_point_two_five + two_point_seven_five == 4);
}
test "sub f16" {
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
try testSub(f16);
try comptime testSub(f16);
}
test "sub f32/f64" {
try testSub(f32);
try comptime testSub(f32);
try testSub(f64);
try comptime testSub(f64);
}
test "sub f80/f128/c_longdouble" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
try testSub(f80);
try comptime testSub(f80);
try testSub(f128);
try comptime testSub(f128);
try testSub(c_longdouble);
try comptime testSub(c_longdouble);
}
fn testSub(comptime T: type) !void {
var one_point_two_five: T = 1.25;
var two_point_seven_five: T = 2.75;
try expect(one_point_two_five - two_point_seven_five == -1.5);
}
test "mul f16" {
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
try testMul(f16);
try comptime testMul(f16);
}
test "mul f32/f64" {
try testMul(f32);
try comptime testMul(f32);
try testMul(f64);
try comptime testMul(f64);
}
test "mul f80/f128/c_longdouble" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
try testMul(f80);
try comptime testMul(f80);
try testMul(f128);
try comptime testMul(f128);
try testMul(c_longdouble);
try comptime testMul(c_longdouble);
}
fn testMul(comptime T: type) !void {
var one_point_two_five: T = 1.25;
var two_point_seven_five: T = 2.75;
try expect(one_point_two_five * two_point_seven_five == 3.4375);
}
test "cmp f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
@ -216,7 +309,7 @@ test "more @sqrt f16 tests" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
// TODO these are not all passing at comptime
try expect(@sqrt(@as(f16, 0.0)) == 0.0);
@ -269,7 +362,6 @@ test "@sin f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testSin(f16);
try comptime testSin(f16);
@ -339,7 +431,6 @@ test "@cos f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testCos(f16);
try comptime testCos(f16);
@ -409,7 +500,6 @@ test "@tan f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testTan(f16);
try comptime testTan(f16);
@ -479,7 +569,6 @@ test "@exp f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testExp(f16);
try comptime testExp(f16);
@ -549,7 +638,6 @@ test "@exp2 f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testExp2(f16);
try comptime testExp2(f16);
@ -619,7 +707,6 @@ test "@log f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testLog(f16);
try comptime testLog(f16);
@ -687,7 +774,6 @@ test "@log2 f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testLog2(f16);
try comptime testLog2(f16);
@ -761,7 +847,6 @@ test "@log10 f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
try testLog10(f16);
try comptime testLog10(f16);
@ -829,7 +914,7 @@ test "@abs f16" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
try testFabs(f16);
try comptime testFabs(f16);
@ -1186,7 +1271,7 @@ test "neg f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (builtin.os.tag == .freebsd) {
// TODO file issue to track this failure

View File

@ -7,8 +7,6 @@ const maxInt = std.math.maxInt;
const minInt = std.math.minInt;
const mem = std.mem;
const math = std.math;
const no_x86_64_hardware_f16_support = builtin.zig_backend == .stage2_x86_64 and
!std.Target.x86.featureSetHas(builtin.cpu.features, .f16c);
test "assignment operators" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
@ -1444,7 +1442,6 @@ test "@round f16" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
try testRound(f16, 12.0);
try comptime testRound(f16, 12.0);