x64: check for floating-point intrinsics in codegen

This commit is contained in:
Jakub Konka 2022-05-19 17:36:04 +02:00
parent 283f40e4e9
commit 5cbfd5819e
4 changed files with 179 additions and 134 deletions

View File

@ -39,7 +39,7 @@ const RegisterLock = RegisterManager.RegisterLock;
const Register = bits.Register;
const gp = abi.RegisterClass.gp;
const avx = abi.RegisterClass.avx;
const sse = abi.RegisterClass.sse;
const InnerError = error{
OutOfMemory,
@ -881,15 +881,18 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
switch (elem_ty.zigTypeTag()) {
.Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}),
.Float => {
// TODO check if AVX available
const ptr_bytes: u64 = 32;
if (abi_size <= ptr_bytes) {
if (self.register_manager.tryAllocReg(inst, .{
.selector_mask = avx,
})) |reg| {
return MCValue{ .register = registerAlias(reg, abi_size) };
if (self.intrinsicsAllowed(elem_ty)) {
const ptr_bytes: u64 = 32;
if (abi_size <= ptr_bytes) {
if (self.register_manager.tryAllocReg(inst, .{
.selector_mask = sse,
})) |reg| {
return MCValue{ .register = registerAlias(reg, abi_size) };
}
}
}
return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{});
},
else => {
// Make sure the type can fit in a register before we try to allocate one.
@ -969,8 +972,11 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou
/// allocated. A second call to `copyToTmpRegister` may return the same register.
/// This can have a side effect of spilling instructions to the stack to free up a register.
fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
const mask = switch (ty.zigTypeTag()) {
.Float => avx,
const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
.Float => blk: {
if (self.intrinsicsAllowed(ty)) break :blk sse;
return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
},
else => gp,
};
const reg: Register = try self.register_manager.allocReg(null, .{
@ -985,8 +991,11 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
/// This can have a side effect of spilling instructions to the stack to free up a register.
/// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue {
const mask = switch (ty.zigTypeTag()) {
.Float => avx,
const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
.Float => blk: {
if (self.intrinsicsAllowed(ty)) break :blk sse;
return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
},
else => gp,
};
const reg: Register = try self.register_manager.allocReg(reg_owner, .{
@ -3469,27 +3478,32 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
},
.register => |src_reg| switch (dst_ty.zigTypeTag()) {
.Float => {
const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
.f32 => switch (mir_tag) {
.add => Mir.Inst.Tag.add_f32,
.cmp => Mir.Inst.Tag.cmp_f32,
else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
},
.f64 => switch (mir_tag) {
.add => Mir.Inst.Tag.add_f64,
.cmp => Mir.Inst.Tag.cmp_f64,
else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
},
else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = actual_tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = dst_reg.to128(),
.reg2 = src_reg.to128(),
}),
.data = undefined,
});
if (self.intrinsicsAllowed(dst_ty)) {
const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
.f32 => switch (mir_tag) {
.add => Mir.Inst.Tag.add_f32_avx,
.cmp => Mir.Inst.Tag.cmp_f32_avx,
else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
},
.f64 => switch (mir_tag) {
.add => Mir.Inst.Tag.add_f64_avx,
.cmp => Mir.Inst.Tag.cmp_f64_avx,
else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
},
else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = actual_tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = dst_reg.to128(),
.reg2 = src_reg.to128(),
}),
.data = undefined,
});
return;
}
return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{});
},
else => {
_ = try self.addInst(.{
@ -5326,24 +5340,29 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
.register => |reg| {
switch (ty.zigTypeTag()) {
.Float => {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32,
.f64 => .mov_f64,
else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = switch (ty.tag()) {
.f32 => .esp,
.f64 => .rsp,
else => unreachable,
},
.reg2 = reg.to128(),
.flags = 0b01,
}),
.data = .{ .imm = @bitCast(u32, -stack_offset) },
});
if (self.intrinsicsAllowed(ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx,
.f64 => .mov_f64_avx,
else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = switch (ty.tag()) {
.f32 => .esp,
.f64 => .rsp,
else => unreachable,
},
.reg2 = reg.to128(),
.flags = 0b01,
}),
.data = .{ .imm = @bitCast(u32, -stack_offset) },
});
return;
}
return self.fail("TODO genSetStackArg for register with no intrinsics", .{});
},
else => {
_ = try self.addInst(.{
@ -5505,24 +5524,29 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
switch (ty.zigTypeTag()) {
.Float => {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32,
.f64 => .mov_f64,
else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = switch (ty.tag()) {
.f32 => base_reg.to32(),
.f64 => base_reg.to64(),
else => unreachable,
},
.reg2 = reg.to128(),
.flags = 0b01,
}),
.data = .{ .imm = @bitCast(u32, -stack_offset) },
});
if (self.intrinsicsAllowed(ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx,
.f64 => .mov_f64_avx,
else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = switch (ty.tag()) {
.f32 => base_reg.to32(),
.f64 => base_reg.to64(),
else => unreachable,
},
.reg2 = reg.to128(),
.flags = 0b01,
}),
.data = .{ .imm = @bitCast(u32, -stack_offset) },
});
return;
}
return self.fail("TODO genSetStack for register for type float with no intrinsics", .{});
},
else => {
if (!math.isPowerOfTwo(abi_size)) {
@ -6026,21 +6050,25 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
},
},
.Float => {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32,
.f64 => .mov_f64,
else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = reg.to128(),
.reg2 = src_reg.to128(),
.flags = 0b10,
}),
.data = undefined,
});
return;
if (self.intrinsicsAllowed(ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx,
.f64 => .mov_f64_avx,
else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = reg.to128(),
.reg2 = src_reg.to128(),
.flags = 0b10,
}),
.data = undefined,
});
return;
}
return self.fail("TODO genSetReg from register for float with no intrinsics", .{});
},
else => {},
}
@ -6073,24 +6101,29 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
const base_reg = try self.register_manager.allocReg(null, .{ .selector_mask = gp });
try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv);
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32,
.f64 => .mov_f64,
else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
};
if (self.intrinsicsAllowed(ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx,
.f64 => .mov_f64_avx,
else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = reg.to128(),
.reg2 = switch (ty.tag()) {
.f32 => base_reg.to32(),
.f64 => base_reg.to64(),
else => unreachable,
},
}),
.data = .{ .imm = 0 },
});
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = reg.to128(),
.reg2 = switch (ty.tag()) {
.f32 => base_reg.to32(),
.f64 => base_reg.to64(),
else => unreachable,
},
}),
.data = .{ .imm = 0 },
});
return;
}
return self.fail("TODO genSetReg from memory for float with no intrinsics", .{});
},
else => {
if (x <= math.maxInt(i32)) {
@ -6183,24 +6216,27 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
},
},
.Float => {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32,
.f64 => .mov_f64,
else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = reg.to128(),
.reg2 = switch (ty.tag()) {
.f32 => .ebp,
.f64 => .rbp,
else => unreachable,
},
}),
.data = .{ .imm = @bitCast(u32, -off) },
});
return;
if (self.intrinsicsAllowed(ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx,
.f64 => .mov_f64_avx,
else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
};
_ = try self.addInst(.{
.tag = tag,
.ops = Mir.Inst.Ops.encode(.{
.reg1 = reg.to128(),
.reg2 = switch (ty.tag()) {
.f32 => .ebp,
.f64 => .rbp,
else => unreachable,
},
}),
.data = .{ .imm = @bitCast(u32, -off) },
});
return;
}
return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{});
},
else => {},
}
@ -6995,3 +7031,12 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
},
}
}
fn intrinsicsAllowed(self: *Self, ty: Type) bool {
return switch (ty.tag()) {
.f32,
.f64,
=> Target.x86.featureSetHasAny(self.target.cpu.features, .{ .avx, .avx2 }),
else => unreachable, // TODO finish this off
};
}

View File

@ -183,14 +183,14 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
.nop => try emit.mirNop(),
// AVX instructions
.mov_f64 => try emit.mirMovFloatAvx(.vmovsd, inst),
.mov_f32 => try emit.mirMovFloatAvx(.vmovss, inst),
.mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst),
.mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst),
.add_f64 => try emit.mirAddFloatAvx(.vaddsd, inst),
.add_f32 => try emit.mirAddFloatAvx(.vaddss, inst),
.add_f64_avx => try emit.mirAddFloatAvx(.vaddsd, inst),
.add_f32_avx => try emit.mirAddFloatAvx(.vaddss, inst),
.cmp_f64 => try emit.mirCmpFloatAvx(.vucomisd, inst),
.cmp_f32 => try emit.mirCmpFloatAvx(.vucomiss, inst),
.cmp_f64_avx => try emit.mirCmpFloatAvx(.vucomisd, inst),
.cmp_f32_avx => try emit.mirCmpFloatAvx(.vucomiss, inst),
// Pseudo-instructions
.call_extern => try emit.mirCallExtern(inst),

View File

@ -350,18 +350,18 @@ pub const Inst = struct {
/// 0b00 reg1, qword ptr [reg2 + imm32]
/// 0b01 qword ptr [reg1 + imm32], reg2
/// 0b10 reg1, reg2
mov_f64,
mov_f32,
mov_f64_avx,
mov_f32_avx,
/// ops flags: form:
/// 0b00 reg1, reg1, reg2
add_f64,
add_f32,
add_f64_avx,
add_f32_avx,
/// ops flags: form:
///
cmp_f64,
cmp_f32,
cmp_f64_avx,
cmp_f32_avx,
/// Pseudo-instructions
/// call extern function

View File

@ -383,11 +383,11 @@ pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
const avx_regs = [_]Register{
const sse_avx_regs = [_]Register{
.ymm0, .ymm1, .ymm2, .ymm3, .ymm4, .ymm5, .ymm6, .ymm7,
.ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
};
const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ avx_regs;
const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs;
pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
// Register classes
@ -401,7 +401,7 @@ pub const RegisterClass = struct {
}, true);
break :blk set;
};
pub const avx: RegisterBitSet = blk: {
pub const sse: RegisterBitSet = blk: {
var set = RegisterBitSet.initEmpty();
set.setRangeValue(.{
.start = caller_preserved_regs.len + callee_preserved_regs.len,