x86_64: implement float division intrinsics

This commit is contained in:
Jacob Young 2023-03-18 01:37:13 -04:00 committed by Jakub Konka
parent b6eebb709f
commit c865c8fb2a
5 changed files with 614 additions and 576 deletions

View File

@ -3538,12 +3538,37 @@ fn genBinOp(
else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }),
}, lhs_ty, dst_mcv, src_mcv),
.div_float => try self.genBinOpMir(switch (lhs_ty.tag()) {
.div_float,
.div_exact,
=> try self.genBinOpMir(switch (lhs_ty.tag()) {
.f32 => .divss,
.f64 => .divsd,
else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }),
}, lhs_ty, dst_mcv, src_mcv),
.div_trunc,
.div_floor,
=> {
try self.genBinOpMir(switch (lhs_ty.tag()) {
.f32 => .divss,
.f64 => .divsd,
else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }),
}, lhs_ty, dst_mcv, src_mcv);
if (Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) {
const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
const dst_alias = registerAlias(dst_mcv.register, abi_size);
try self.asmRegisterRegisterImmediate(switch (lhs_ty.tag()) {
.f32 => .roundss,
.f64 => .roundsd,
else => unreachable,
}, dst_alias, dst_alias, Immediate.u(switch (tag) {
.div_trunc => 0b1_0_11,
.div_floor => 0b1_0_01,
else => unreachable,
}));
} else return self.fail("TODO implement round without sse4_1", .{});
},
.ptr_add,
.ptr_sub,
=> {

View File

@ -115,6 +115,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
.minss,
.movss,
.mulss,
.roundss,
.subss,
.ucomiss,
.addsd,
@ -124,6 +125,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
.minsd,
.movsd,
.mulsd,
.roundsd,
.subsd,
.ucomisd,
=> try emit.mirEncodeGeneric(tag, inst),

View File

@ -19,8 +19,8 @@ op1: Op,
op2: Op,
op3: Op,
op4: Op,
opc_len: u2,
opc: [3]u8,
opc_len: u3,
opc: [7]u8,
modrm_ext: u3,
mode: Mode,
@ -69,18 +69,19 @@ pub fn findByMnemonic(mnemonic: Mnemonic, args: struct {
var candidates: [10]Encoding = undefined;
var count: usize = 0;
for (table) |entry| {
const enc = Encoding{
var enc = Encoding{
.mnemonic = entry[0],
.op_en = entry[1],
.op1 = entry[2],
.op2 = entry[3],
.op3 = entry[4],
.op4 = entry[5],
.opc_len = entry[6],
.opc = .{ entry[7], entry[8], entry[9] },
.modrm_ext = entry[10],
.mode = entry[11],
.opc_len = @intCast(u3, entry[6].len),
.opc = undefined,
.modrm_ext = entry[7],
.mode = entry[8],
};
std.mem.copy(u8, &enc.opc, entry[6]);
if (enc.mnemonic == mnemonic and
input_op1.isSubset(enc.op1, enc.mode) and
input_op2.isSubset(enc.op2, enc.mode) and
@ -184,7 +185,7 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct {
if (match) {
if (prefixes.rex.w) {
switch (enc.mode) {
.fpu, .sse, .sse2, .none => {},
.fpu, .sse, .sse2, .sse4_1, .none => {},
.long, .rex => return enc,
}
} else if (prefixes.rex.present and !prefixes.rex.isSet()) {
@ -357,6 +358,9 @@ pub const Mnemonic = enum {
mulsd,
subsd,
ucomisd,
// SSE4.1
roundss,
roundsd,
// zig fmt: on
};
@ -550,7 +554,7 @@ pub const Op = enum {
else => {
if (op.isRegister() and target.isRegister()) {
switch (mode) {
.sse, .sse2 => return op.isFloatingPointRegister() and target.isFloatingPointRegister(),
.sse, .sse2, .sse4_1 => return op.isFloatingPointRegister() and target.isFloatingPointRegister(),
else => switch (target) {
.cl, .al, .ax, .eax, .rax => return op == target,
else => return op.bitSize() == target.bitSize(),
@ -592,4 +596,5 @@ pub const Mode = enum {
long,
sse,
sse2,
sse4_1,
};

View File

@ -123,6 +123,8 @@ pub const Inst = struct {
movss,
/// Multiply scalar single-precision floating-point values
mulss,
/// Round scalar single-precision floating-point values
roundss,
/// Subtract scalar single-precision floating-point values
subss,
/// Unordered compare scalar single-precision floating-point values
@ -141,6 +143,8 @@ pub const Inst = struct {
movsd,
/// Multiply scalar double-precision floating-point values
mulsd,
/// Round scalar double-precision floating-point values
roundsd,
/// Subtract scalar double-precision floating-point values
subsd,
/// Unordered compare scalar double-precision floating-point values

File diff suppressed because it is too large Load Diff