mirror of
https://github.com/ziglang/zig.git
synced 2026-02-21 16:54:52 +00:00
x64: check for floating-point intrinsics in codegen
This commit is contained in:
parent
283f40e4e9
commit
5cbfd5819e
@ -39,7 +39,7 @@ const RegisterLock = RegisterManager.RegisterLock;
|
||||
const Register = bits.Register;
|
||||
|
||||
const gp = abi.RegisterClass.gp;
|
||||
const avx = abi.RegisterClass.avx;
|
||||
const sse = abi.RegisterClass.sse;
|
||||
|
||||
const InnerError = error{
|
||||
OutOfMemory,
|
||||
@ -881,15 +881,18 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
|
||||
switch (elem_ty.zigTypeTag()) {
|
||||
.Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}),
|
||||
.Float => {
|
||||
// TODO check if AVX available
|
||||
const ptr_bytes: u64 = 32;
|
||||
if (abi_size <= ptr_bytes) {
|
||||
if (self.register_manager.tryAllocReg(inst, .{
|
||||
.selector_mask = avx,
|
||||
})) |reg| {
|
||||
return MCValue{ .register = registerAlias(reg, abi_size) };
|
||||
if (self.intrinsicsAllowed(elem_ty)) {
|
||||
const ptr_bytes: u64 = 32;
|
||||
if (abi_size <= ptr_bytes) {
|
||||
if (self.register_manager.tryAllocReg(inst, .{
|
||||
.selector_mask = sse,
|
||||
})) |reg| {
|
||||
return MCValue{ .register = registerAlias(reg, abi_size) };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{});
|
||||
},
|
||||
else => {
|
||||
// Make sure the type can fit in a register before we try to allocate one.
|
||||
@ -969,8 +972,11 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou
|
||||
/// allocated. A second call to `copyToTmpRegister` may return the same register.
|
||||
/// This can have a side effect of spilling instructions to the stack to free up a register.
|
||||
fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
|
||||
const mask = switch (ty.zigTypeTag()) {
|
||||
.Float => avx,
|
||||
const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
|
||||
.Float => blk: {
|
||||
if (self.intrinsicsAllowed(ty)) break :blk sse;
|
||||
return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
|
||||
},
|
||||
else => gp,
|
||||
};
|
||||
const reg: Register = try self.register_manager.allocReg(null, .{
|
||||
@ -985,8 +991,11 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
|
||||
/// This can have a side effect of spilling instructions to the stack to free up a register.
|
||||
/// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
|
||||
fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue {
|
||||
const mask = switch (ty.zigTypeTag()) {
|
||||
.Float => avx,
|
||||
const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
|
||||
.Float => blk: {
|
||||
if (self.intrinsicsAllowed(ty)) break :blk sse;
|
||||
return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
|
||||
},
|
||||
else => gp,
|
||||
};
|
||||
const reg: Register = try self.register_manager.allocReg(reg_owner, .{
|
||||
@ -3469,27 +3478,32 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
|
||||
},
|
||||
.register => |src_reg| switch (dst_ty.zigTypeTag()) {
|
||||
.Float => {
|
||||
const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
|
||||
.f32 => switch (mir_tag) {
|
||||
.add => Mir.Inst.Tag.add_f32,
|
||||
.cmp => Mir.Inst.Tag.cmp_f32,
|
||||
else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
|
||||
},
|
||||
.f64 => switch (mir_tag) {
|
||||
.add => Mir.Inst.Tag.add_f64,
|
||||
.cmp => Mir.Inst.Tag.cmp_f64,
|
||||
else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
|
||||
},
|
||||
else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = actual_tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = dst_reg.to128(),
|
||||
.reg2 = src_reg.to128(),
|
||||
}),
|
||||
.data = undefined,
|
||||
});
|
||||
if (self.intrinsicsAllowed(dst_ty)) {
|
||||
const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
|
||||
.f32 => switch (mir_tag) {
|
||||
.add => Mir.Inst.Tag.add_f32_avx,
|
||||
.cmp => Mir.Inst.Tag.cmp_f32_avx,
|
||||
else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
|
||||
},
|
||||
.f64 => switch (mir_tag) {
|
||||
.add => Mir.Inst.Tag.add_f64_avx,
|
||||
.cmp => Mir.Inst.Tag.cmp_f64_avx,
|
||||
else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
|
||||
},
|
||||
else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = actual_tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = dst_reg.to128(),
|
||||
.reg2 = src_reg.to128(),
|
||||
}),
|
||||
.data = undefined,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{});
|
||||
},
|
||||
else => {
|
||||
_ = try self.addInst(.{
|
||||
@ -5326,24 +5340,29 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
|
||||
.register => |reg| {
|
||||
switch (ty.zigTypeTag()) {
|
||||
.Float => {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32,
|
||||
.f64 => .mov_f64,
|
||||
else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = switch (ty.tag()) {
|
||||
.f32 => .esp,
|
||||
.f64 => .rsp,
|
||||
else => unreachable,
|
||||
},
|
||||
.reg2 = reg.to128(),
|
||||
.flags = 0b01,
|
||||
}),
|
||||
.data = .{ .imm = @bitCast(u32, -stack_offset) },
|
||||
});
|
||||
if (self.intrinsicsAllowed(ty)) {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32_avx,
|
||||
.f64 => .mov_f64_avx,
|
||||
else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = switch (ty.tag()) {
|
||||
.f32 => .esp,
|
||||
.f64 => .rsp,
|
||||
else => unreachable,
|
||||
},
|
||||
.reg2 = reg.to128(),
|
||||
.flags = 0b01,
|
||||
}),
|
||||
.data = .{ .imm = @bitCast(u32, -stack_offset) },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
return self.fail("TODO genSetStackArg for register with no intrinsics", .{});
|
||||
},
|
||||
else => {
|
||||
_ = try self.addInst(.{
|
||||
@ -5505,24 +5524,29 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
|
||||
|
||||
switch (ty.zigTypeTag()) {
|
||||
.Float => {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32,
|
||||
.f64 => .mov_f64,
|
||||
else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = switch (ty.tag()) {
|
||||
.f32 => base_reg.to32(),
|
||||
.f64 => base_reg.to64(),
|
||||
else => unreachable,
|
||||
},
|
||||
.reg2 = reg.to128(),
|
||||
.flags = 0b01,
|
||||
}),
|
||||
.data = .{ .imm = @bitCast(u32, -stack_offset) },
|
||||
});
|
||||
if (self.intrinsicsAllowed(ty)) {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32_avx,
|
||||
.f64 => .mov_f64_avx,
|
||||
else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = switch (ty.tag()) {
|
||||
.f32 => base_reg.to32(),
|
||||
.f64 => base_reg.to64(),
|
||||
else => unreachable,
|
||||
},
|
||||
.reg2 = reg.to128(),
|
||||
.flags = 0b01,
|
||||
}),
|
||||
.data = .{ .imm = @bitCast(u32, -stack_offset) },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
return self.fail("TODO genSetStack for register for type float with no intrinsics", .{});
|
||||
},
|
||||
else => {
|
||||
if (!math.isPowerOfTwo(abi_size)) {
|
||||
@ -6026,21 +6050,25 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
|
||||
},
|
||||
},
|
||||
.Float => {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32,
|
||||
.f64 => .mov_f64,
|
||||
else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = reg.to128(),
|
||||
.reg2 = src_reg.to128(),
|
||||
.flags = 0b10,
|
||||
}),
|
||||
.data = undefined,
|
||||
});
|
||||
return;
|
||||
if (self.intrinsicsAllowed(ty)) {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32_avx,
|
||||
.f64 => .mov_f64_avx,
|
||||
else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = reg.to128(),
|
||||
.reg2 = src_reg.to128(),
|
||||
.flags = 0b10,
|
||||
}),
|
||||
.data = undefined,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
return self.fail("TODO genSetReg from register for float with no intrinsics", .{});
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
@ -6073,24 +6101,29 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
|
||||
const base_reg = try self.register_manager.allocReg(null, .{ .selector_mask = gp });
|
||||
try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv);
|
||||
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32,
|
||||
.f64 => .mov_f64,
|
||||
else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
if (self.intrinsicsAllowed(ty)) {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32_avx,
|
||||
.f64 => .mov_f64_avx,
|
||||
else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = reg.to128(),
|
||||
.reg2 = switch (ty.tag()) {
|
||||
.f32 => base_reg.to32(),
|
||||
.f64 => base_reg.to64(),
|
||||
else => unreachable,
|
||||
},
|
||||
}),
|
||||
.data = .{ .imm = 0 },
|
||||
});
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = reg.to128(),
|
||||
.reg2 = switch (ty.tag()) {
|
||||
.f32 => base_reg.to32(),
|
||||
.f64 => base_reg.to64(),
|
||||
else => unreachable,
|
||||
},
|
||||
}),
|
||||
.data = .{ .imm = 0 },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
return self.fail("TODO genSetReg from memory for float with no intrinsics", .{});
|
||||
},
|
||||
else => {
|
||||
if (x <= math.maxInt(i32)) {
|
||||
@ -6183,24 +6216,27 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
|
||||
},
|
||||
},
|
||||
.Float => {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32,
|
||||
.f64 => .mov_f64,
|
||||
else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = reg.to128(),
|
||||
.reg2 = switch (ty.tag()) {
|
||||
.f32 => .ebp,
|
||||
.f64 => .rbp,
|
||||
else => unreachable,
|
||||
},
|
||||
}),
|
||||
.data = .{ .imm = @bitCast(u32, -off) },
|
||||
});
|
||||
return;
|
||||
if (self.intrinsicsAllowed(ty)) {
|
||||
const tag: Mir.Inst.Tag = switch (ty.tag()) {
|
||||
.f32 => .mov_f32_avx,
|
||||
.f64 => .mov_f64_avx,
|
||||
else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
|
||||
};
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag,
|
||||
.ops = Mir.Inst.Ops.encode(.{
|
||||
.reg1 = reg.to128(),
|
||||
.reg2 = switch (ty.tag()) {
|
||||
.f32 => .ebp,
|
||||
.f64 => .rbp,
|
||||
else => unreachable,
|
||||
},
|
||||
}),
|
||||
.data = .{ .imm = @bitCast(u32, -off) },
|
||||
});
|
||||
return;
|
||||
}
|
||||
return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{});
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
@ -6995,3 +7031,12 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn intrinsicsAllowed(self: *Self, ty: Type) bool {
|
||||
return switch (ty.tag()) {
|
||||
.f32,
|
||||
.f64,
|
||||
=> Target.x86.featureSetHasAny(self.target.cpu.features, .{ .avx, .avx2 }),
|
||||
else => unreachable, // TODO finish this off
|
||||
};
|
||||
}
|
||||
|
||||
@ -183,14 +183,14 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
|
||||
.nop => try emit.mirNop(),
|
||||
|
||||
// AVX instructions
|
||||
.mov_f64 => try emit.mirMovFloatAvx(.vmovsd, inst),
|
||||
.mov_f32 => try emit.mirMovFloatAvx(.vmovss, inst),
|
||||
.mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst),
|
||||
.mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst),
|
||||
|
||||
.add_f64 => try emit.mirAddFloatAvx(.vaddsd, inst),
|
||||
.add_f32 => try emit.mirAddFloatAvx(.vaddss, inst),
|
||||
.add_f64_avx => try emit.mirAddFloatAvx(.vaddsd, inst),
|
||||
.add_f32_avx => try emit.mirAddFloatAvx(.vaddss, inst),
|
||||
|
||||
.cmp_f64 => try emit.mirCmpFloatAvx(.vucomisd, inst),
|
||||
.cmp_f32 => try emit.mirCmpFloatAvx(.vucomiss, inst),
|
||||
.cmp_f64_avx => try emit.mirCmpFloatAvx(.vucomisd, inst),
|
||||
.cmp_f32_avx => try emit.mirCmpFloatAvx(.vucomiss, inst),
|
||||
|
||||
// Pseudo-instructions
|
||||
.call_extern => try emit.mirCallExtern(inst),
|
||||
|
||||
@ -350,18 +350,18 @@ pub const Inst = struct {
|
||||
/// 0b00 reg1, qword ptr [reg2 + imm32]
|
||||
/// 0b01 qword ptr [reg1 + imm32], reg2
|
||||
/// 0b10 reg1, reg2
|
||||
mov_f64,
|
||||
mov_f32,
|
||||
mov_f64_avx,
|
||||
mov_f32_avx,
|
||||
|
||||
/// ops flags: form:
|
||||
/// 0b00 reg1, reg1, reg2
|
||||
add_f64,
|
||||
add_f32,
|
||||
add_f64_avx,
|
||||
add_f32_avx,
|
||||
|
||||
/// ops flags: form:
|
||||
///
|
||||
cmp_f64,
|
||||
cmp_f32,
|
||||
cmp_f64_avx,
|
||||
cmp_f32_avx,
|
||||
|
||||
/// Pseudo-instructions
|
||||
/// call extern function
|
||||
|
||||
@ -383,11 +383,11 @@ pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8
|
||||
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
|
||||
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
|
||||
|
||||
const avx_regs = [_]Register{
|
||||
const sse_avx_regs = [_]Register{
|
||||
.ymm0, .ymm1, .ymm2, .ymm3, .ymm4, .ymm5, .ymm6, .ymm7,
|
||||
.ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
|
||||
};
|
||||
const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ avx_regs;
|
||||
const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs;
|
||||
pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
|
||||
|
||||
// Register classes
|
||||
@ -401,7 +401,7 @@ pub const RegisterClass = struct {
|
||||
}, true);
|
||||
break :blk set;
|
||||
};
|
||||
pub const avx: RegisterBitSet = blk: {
|
||||
pub const sse: RegisterBitSet = blk: {
|
||||
var set = RegisterBitSet.initEmpty();
|
||||
set.setRangeValue(.{
|
||||
.start = caller_preserved_regs.len + callee_preserved_regs.len,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user