x86_64: implement a bunch of floating point stuff

This commit is contained in:
Jacob Young 2023-04-28 20:39:38 -04:00
parent 7c9891d7b7
commit 4ec49da5f6
7 changed files with 486 additions and 116 deletions

View File

@ -1297,9 +1297,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.ceil,
.round,
.trunc_float,
.neg,
=> try self.airUnaryMath(inst),
.neg => try self.airNeg(inst),
.add_with_overflow => try self.airAddSubWithOverflow(inst),
.sub_with_overflow => try self.airAddSubWithOverflow(inst),
.mul_with_overflow => try self.airMulWithOverflow(inst),
@ -1881,7 +1882,7 @@ pub fn spillRegisters(self: *Self, registers: []const Register) !void {
/// allocated. A second call to `copyToTmpRegister` may return the same register.
/// This can have a side effect of spilling instructions to the stack to free up a register.
fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
const reg: Register = try self.register_manager.allocReg(null, try self.regClassForType(ty));
const reg = try self.register_manager.allocReg(null, try self.regClassForType(ty));
try self.genSetReg(reg, ty, mcv);
return reg;
}
@ -1924,16 +1925,48 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void {
fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
_ = ty_op;
return self.fail("TODO implement airFptrunc for {}", .{self.target.cpu.arch});
// return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
const dst_ty = self.air.typeOfIndex(inst);
const src_ty = self.air.typeOf(ty_op.operand);
if (dst_ty.floatBits(self.target.*) != 32 or src_ty.floatBits(self.target.*) != 64 or
!Target.x86.featureSetHas(self.target.cpu.features, .sse2))
return self.fail("TODO implement airFptrunc from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?),
dst_ty.fmt(self.bin_file.options.module.?),
});
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
src_mcv
else
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv);
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
_ = ty_op;
return self.fail("TODO implement airFpext for {}", .{self.target.cpu.arch});
// return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
const dst_ty = self.air.typeOfIndex(inst);
const src_ty = self.air.typeOf(ty_op.operand);
if (dst_ty.floatBits(self.target.*) != 64 or src_ty.floatBits(self.target.*) != 32 or
!Target.x86.featureSetHas(self.target.cpu.features, .sse2))
return self.fail("TODO implement airFpext from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?),
dst_ty.fmt(self.bin_file.options.module.?),
});
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
src_mcv
else
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
try self.genBinOpMir(.cvtss2sd, src_ty, dst_mcv, src_mcv);
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
@ -3953,10 +3986,65 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
fn airNeg(self: *Self, inst: Air.Inst.Index) !void {
const un_op = self.air.instructions.items(.data)[inst].un_op;
const ty = self.air.typeOf(un_op);
const ty_bits = ty.floatBits(self.target.*);
var arena = std.heap.ArenaAllocator.init(self.gpa);
defer arena.deinit();
const ExpectedContents = union {
f16: Value.Payload.Float_16,
f32: Value.Payload.Float_32,
f64: Value.Payload.Float_64,
f80: Value.Payload.Float_80,
f128: Value.Payload.Float_128,
};
var stack align(@alignOf(ExpectedContents)) =
std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator());
var vec_pl = Type.Payload.Array{
.base = .{ .tag = .vector },
.data = .{
.len = @divExact(128, ty_bits),
.elem_type = ty,
},
};
const vec_ty = Type.initPayload(&vec_pl.base);
var sign_pl = Value.Payload.SubValue{
.base = .{ .tag = .repeated },
.data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*),
};
const sign_val = Value.initPayload(&sign_pl.base);
const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
const src_mcv = try self.resolveInst(un_op);
const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
src_mcv
else
try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
try self.genBinOpMir(switch (ty_bits) {
32 => .xorps,
64 => .xorpd,
else => return self.fail("TODO implement airNeg for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
}, vec_ty, dst_mcv, sign_mcv);
return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
}
fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void {
const un_op = self.air.instructions.items(.data)[inst].un_op;
_ = un_op;
return self.fail("TODO implement airUnaryMath for {}", .{self.target.cpu.arch});
return self.fail("TODO implement airUnaryMath for {}", .{
self.air.instructions.items(.tag)[inst],
});
//return self.finishAir(inst, result, .{ un_op, .none, .none });
}
@ -4109,7 +4197,6 @@ fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerErro
fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const elem_ty = self.air.typeOfIndex(inst);
const elem_size = elem_ty.abiSize(self.target.*);
const result: MCValue = result: {
if (!elem_ty.hasRuntimeBitsIgnoreComptime()) break :result .none;
@ -4117,14 +4204,20 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
const ptr_ty = self.air.typeOf(ty_op.operand);
const elem_size = elem_ty.abiSize(self.target.*);
const elem_rc = try self.regClassForType(elem_ty);
const ptr_rc = try self.regClassForType(ptr_ty);
const ptr_mcv = try self.resolveInst(ty_op.operand);
const dst_mcv = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and
self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
// The MCValue that holds the pointer can be re-used as the value.
ptr_mcv
else
try self.allocRegOrMem(inst, true);
const ptr_ty = self.air.typeOf(ty_op.operand);
if (ptr_ty.ptrInfo().data.host_size > 0) {
try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
} else {
@ -4346,17 +4439,9 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
switch (src_mcv) {
.load_frame => |frame_addr| {
const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*));
const limb_abi_size = @min(field_abi_size, 8);
const limb_abi_bits = limb_abi_size * 8;
const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size);
const field_bit_off = field_off % limb_abi_bits;
if (field_bit_off == 0) {
const off_mcv = MCValue{ .load_frame = .{
.index = frame_addr.index,
.off = frame_addr.off + field_byte_off,
} };
if (field_off % 8 == 0) {
const off_mcv =
src_mcv.address().offset(@intCast(i32, @divExact(field_off, 8))).deref();
if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv;
const dst_mcv = try self.allocRegOrMem(inst, true);
@ -4364,6 +4449,12 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
break :result dst_mcv;
}
const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*));
const limb_abi_size = @min(field_abi_size, 8);
const limb_abi_bits = limb_abi_size * 8;
const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size);
const field_bit_off = field_off % limb_abi_bits;
if (field_abi_size > 8) {
return self.fail("TODO implement struct_field_val with large packed field", .{});
}
@ -5181,24 +5272,69 @@ fn genBinOp(
switch (tag) {
.add,
.addwrap,
=> try self.genBinOpMir(switch (lhs_ty.tag()) {
=> try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
else => .add,
.f32 => .addss,
.f64 => .addsd,
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
.addss
else
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
.addsd
else
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
else => return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
},
}, lhs_ty, dst_mcv, src_mcv),
.sub,
.subwrap,
=> try self.genBinOpMir(switch (lhs_ty.tag()) {
=> try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
else => .sub,
.f32 => .subss,
.f64 => .subsd,
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
.subss
else
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
.subsd
else
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
else => return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
},
}, lhs_ty, dst_mcv, src_mcv),
.mul => try self.genBinOpMir(switch (lhs_ty.tag()) {
.f32 => .mulss,
.f64 => .mulsd,
.mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }),
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
.mulss
else
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
.mulsd
else
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
else => return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
},
}, lhs_ty, dst_mcv, src_mcv),
.div_float,
@ -5206,12 +5342,27 @@ fn genBinOp(
.div_trunc,
.div_floor,
=> {
try self.genBinOpMir(switch (lhs_ty.tag()) {
.f32 => .divss,
.f64 => .divsd,
try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) {
else => return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
.divss
else
return self.fail("TODO implement genBinOp for {s} {} without sse", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
.divsd
else
return self.fail("TODO implement genBinOp for {s} {} without sse2", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
else => return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
},
}, lhs_ty, dst_mcv, src_mcv);
switch (tag) {
.div_float,
@ -5222,16 +5373,18 @@ fn genBinOp(
=> if (Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) {
const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
const dst_alias = registerAlias(dst_mcv.register, abi_size);
try self.asmRegisterRegisterImmediate(switch (lhs_ty.tag()) {
.f32 => .roundss,
.f64 => .roundsd,
try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) {
32 => .roundss,
64 => .roundsd,
else => unreachable,
}, dst_alias, dst_alias, Immediate.u(switch (tag) {
.div_trunc => 0b1_0_11,
.div_floor => 0b1_0_01,
else => unreachable,
}));
} else return self.fail("TODO implement round without sse4_1", .{}),
} else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{
@tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
else => unreachable,
}
},
@ -5453,39 +5606,68 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
)),
else => unreachable,
},
.register_offset,
.eflags,
.register_offset,
.memory,
.indirect,
.load_direct,
.lea_direct,
.load_got,
.lea_got,
.load_tlv,
.lea_tlv,
.load_frame,
.lea_frame,
=> {
assert(abi_size <= 8);
blk: {
return self.asmRegisterMemory(
mir_tag,
registerAlias(dst_reg, abi_size),
Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
.memory => |addr| .{
.base = .{ .reg = .ds },
.disp = math.cast(i32, addr) orelse break :blk,
},
.indirect => |reg_off| .{
.base = .{ .reg = reg_off.reg },
.disp = reg_off.off,
},
.load_frame => |frame_addr| .{
.base = .{ .frame = frame_addr.index },
.disp = frame_addr.off,
},
else => break :blk,
}),
);
}
const dst_reg_lock = self.register_manager.lockReg(dst_reg);
defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock);
const reg = try self.copyToTmpRegister(ty, src_mcv);
return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg });
},
.indirect, .load_frame => try self.asmRegisterMemory(
mir_tag,
registerAlias(dst_reg, abi_size),
Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
.indirect => |reg_off| .{
.base = .{ .reg = reg_off.reg },
.disp = reg_off.off,
switch (src_mcv) {
.eflags,
.register_offset,
.lea_direct,
.lea_got,
.lea_tlv,
.lea_frame,
=> {
const reg = try self.copyToTmpRegister(ty, src_mcv);
return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg });
},
.load_frame => |frame_addr| .{
.base = .{ .frame = frame_addr.index },
.disp = frame_addr.off,
.memory,
.load_direct,
.load_got,
.load_tlv,
=> {
const addr_reg = try self.copyToTmpRegister(ty, src_mcv.address());
return self.genBinOpMir(mir_tag, ty, dst_mcv, .{
.indirect = .{ .reg = addr_reg },
});
},
else => unreachable,
}),
),
}
},
}
},
.memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => {
@ -6175,10 +6357,25 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
try self.genBinOpMir(switch (ty.tag()) {
try self.genBinOpMir(switch (ty.zigTypeTag()) {
else => .cmp,
.f32 => .ucomiss,
.f64 => .ucomisd,
.Float => switch (ty.floatBits(self.target.*)) {
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
.ucomiss
else
return self.fail("TODO implement airCmp for {} without sse", .{
ty.fmt(self.bin_file.options.module.?),
}),
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
.ucomisd
else
return self.fail("TODO implement airCmp for {} without sse2", .{
ty.fmt(self.bin_file.options.module.?),
}),
else => return self.fail("TODO implement airCmp for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
},
}, ty, dst_mcv, src_mcv);
const signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned;
@ -7608,7 +7805,8 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
const dst_rc = try self.regClassForType(dst_ty);
const src_rc = try self.regClassForType(src_ty);
const operand = try self.resolveInst(ty_op.operand);
if (dst_rc.eql(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand;
if (dst_rc.supersetOf(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand))
break :result operand;
const operand_lock = switch (operand) {
.register => |reg| self.register_manager.lockReg(reg),
@ -7648,9 +7846,59 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void {
fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
_ = ty_op;
return self.fail("TODO implement airIntToFloat for {}", .{self.target.cpu.arch});
//return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
const src_ty = self.air.typeOf(ty_op.operand);
const src_bits = @intCast(u32, src_ty.bitSize(self.target.*));
const src_signedness =
if (src_ty.isAbiInt()) src_ty.intInfo(self.target.*).signedness else .unsigned;
const dst_ty = self.air.typeOfIndex(inst);
const src_size = std.math.divCeil(u32, @max(switch (src_signedness) {
.signed => src_bits,
.unsigned => src_bits + 1,
}, 32), 8) catch unreachable;
if (src_size > 8) return self.fail("TODO implement airIntToFloat from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?),
dst_ty.fmt(self.bin_file.options.module.?),
});
const src_mcv = try self.resolveInst(ty_op.operand);
const src_reg = switch (src_mcv) {
.register => |reg| reg,
else => try self.copyToTmpRegister(src_ty, src_mcv),
};
const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
defer self.register_manager.unlockReg(src_lock);
if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg);
const dst_reg = try self.register_manager.allocReg(inst, try self.regClassForType(dst_ty));
const dst_mcv = MCValue{ .register = dst_reg };
const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
defer self.register_manager.unlockReg(dst_lock);
try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) {
32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
.cvtsi2ss
else
return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{
src_ty.fmt(self.bin_file.options.module.?),
dst_ty.fmt(self.bin_file.options.module.?),
}),
64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
.cvtsi2sd
else
return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{
src_ty.fmt(self.bin_file.options.module.?),
dst_ty.fmt(self.bin_file.options.module.?),
}),
else => return self.fail("TODO implement airIntToFloat from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?),
dst_ty.fmt(self.bin_file.options.module.?),
}),
}, dst_reg.to128(), registerAlias(src_reg, src_size));
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void {
@ -8717,6 +8965,7 @@ fn resolveCallingConventionValues(
},
.C => {
var param_reg_i: usize = 0;
var param_sse_reg_i: usize = 0;
result.stack_align = 16;
switch (self.target.os.tag) {
@ -8734,26 +8983,39 @@ fn resolveCallingConventionValues(
// TODO: is this even possible for C calling convention?
result.return_value = InstTracking.init(.none);
} else {
const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*));
if (ret_ty_size <= 8) {
const aliased_reg = registerAlias(ret_reg, ret_ty_size);
result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none };
} else {
const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i];
param_reg_i += 1;
result.return_value = .{
.short = .{ .indirect = .{ .reg = ret_reg } },
.long = .{ .indirect = .{ .reg = ret_indirect_reg } },
};
const classes = switch (self.target.os.tag) {
.windows => &[1]abi.Class{abi.classifyWindows(ret_ty, self.target.*)},
else => mem.sliceTo(&abi.classifySystemV(ret_ty, self.target.*, .ret), .none),
};
if (classes.len > 1) {
return self.fail("TODO handle multiple classes per type", .{});
}
const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
result.return_value = switch (classes[0]) {
.integer => InstTracking.init(.{ .register = registerAlias(
ret_reg,
@intCast(u32, ret_ty.abiSize(self.target.*)),
) }),
.float, .sse => InstTracking.init(.{ .register = .xmm0 }),
.memory => ret: {
const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i];
param_reg_i += 1;
break :ret .{
.short = .{ .indirect = .{ .reg = ret_reg } },
.long = .{ .indirect = .{ .reg = ret_indirect_reg } },
};
},
else => |class| return self.fail("TODO handle calling convention class {s}", .{
@tagName(class),
}),
};
}
// Input params
for (param_types, result.args) |ty, *arg| {
assert(ty.hasRuntimeBitsIgnoreComptime());
const classes: []const abi.Class = switch (self.target.os.tag) {
const classes = switch (self.target.os.tag) {
.windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)},
else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*, .arg), .none),
};
@ -8761,13 +9023,29 @@ fn resolveCallingConventionValues(
return self.fail("TODO handle multiple classes per type", .{});
}
switch (classes[0]) {
.integer => blk: {
if (param_reg_i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk;
const param_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i];
.integer => if (param_reg_i < abi.getCAbiIntParamRegs(self.target.*).len) {
arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i] };
param_reg_i += 1;
arg.* = .{ .register = param_reg };
continue;
},
.float, .sse => switch (self.target.os.tag) {
.windows => if (param_reg_i < 4) {
arg.* = .{ .register = @intToEnum(
Register,
@enumToInt(Register.xmm0) + param_reg_i,
) };
param_reg_i += 1;
continue;
},
else => if (param_sse_reg_i < 8) {
arg.* = .{ .register = @intToEnum(
Register,
@enumToInt(Register.xmm0) + param_sse_reg_i,
) };
param_sse_reg_i += 1;
continue;
},
},
.memory => {}, // fallthrough
else => |class| return self.fail("TODO handle calling convention class {s}", .{
@tagName(class),

View File

@ -58,7 +58,7 @@ pub fn findByMnemonic(
next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| {
switch (data.mode) {
.rex => if (!rex_required) continue,
.long, .sse2_long => {},
.long, .sse_long, .sse2_long => {},
else => if (rex_required) continue,
}
for (input_ops, data.ops) |input_op, data_op|
@ -90,7 +90,7 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct {
if (prefixes.rex.w) {
switch (data.mode) {
.short, .fpu, .sse, .sse2, .sse4_1, .none => continue,
.long, .sse2_long, .rex => {},
.long, .sse_long, .sse2_long, .rex => {},
}
} else if (prefixes.rex.present and !prefixes.rex.isSet()) {
switch (data.mode) {
@ -138,7 +138,7 @@ pub fn modRmExt(encoding: Encoding) u3 {
pub fn operandBitSize(encoding: Encoding) u64 {
switch (encoding.data.mode) {
.short => return 16,
.long, .sse2_long => return 64,
.long, .sse_long, .sse2_long => return 64,
else => {},
}
const bit_size: u64 = switch (encoding.data.op_en) {
@ -163,7 +163,7 @@ pub fn format(
_ = options;
_ = fmt;
switch (encoding.data.mode) {
.long, .sse2_long => try writer.writeAll("REX.W + "),
.long, .sse_long, .sse2_long => try writer.writeAll("REX.W + "),
else => {},
}
@ -269,21 +269,25 @@ pub const Mnemonic = enum {
// SSE
addss,
cmpss,
cvtsi2ss,
divss,
maxss, minss,
movss,
mulss,
subss,
ucomiss,
xorps,
// SSE2
addsd,
//cmpsd,
cvtsd2ss, cvtsi2sd, cvtss2sd,
divsd,
maxsd, minsd,
movq, //movd, movsd,
mulsd,
subsd,
ucomisd,
xorpd,
// SSE4.1
roundss,
roundsd,
@ -318,7 +322,7 @@ pub const Op = enum {
m,
moffs,
sreg,
xmm, xmm_m32, xmm_m64,
xmm, xmm_m32, xmm_m64, xmm_m128,
// zig fmt: on
pub fn fromOperand(operand: Instruction.Operand) Op {
@ -400,7 +404,7 @@ pub const Op = enum {
.imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32,
.imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64,
.m80 => 80,
.m128, .xmm => 128,
.m128, .xmm, .xmm_m128 => 128,
};
}
@ -423,8 +427,8 @@ pub const Op = enum {
.al, .ax, .eax, .rax,
.r8, .r16, .r32, .r64,
.rm8, .rm16, .rm32, .rm64,
.xmm, .xmm_m32, .xmm_m64,
=> true,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128,
=> true,
else => false,
};
// zig fmt: on
@ -449,7 +453,7 @@ pub const Op = enum {
.rm8, .rm16, .rm32, .rm64,
.m8, .m16, .m32, .m64, .m80, .m128,
.m,
.xmm_m32, .xmm_m64,
.xmm_m32, .xmm_m64, .xmm_m128,
=> true,
else => false,
};
@ -470,13 +474,13 @@ pub const Op = enum {
.r8, .r16, .r32, .r64 => .general_purpose,
.rm8, .rm16, .rm32, .rm64 => .general_purpose,
.sreg => .segment,
.xmm, .xmm_m32, .xmm_m64 => .floating_point,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point,
};
}
pub fn isFloatingPointRegister(op: Op) bool {
return switch (op) {
.xmm, .xmm_m32, .xmm_m64 => true,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => true,
else => false,
};
}
@ -535,6 +539,7 @@ pub const Mode = enum {
rex,
long,
sse,
sse_long,
sse2,
sse2_long,
sse4_1,

View File

@ -95,6 +95,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
.addss,
.cmpss,
.cvtsi2ss,
.divss,
.maxss,
.minss,
@ -103,8 +104,12 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
.roundss,
.subss,
.ucomiss,
.xorps,
.addsd,
.cmpsd,
.cvtsd2ss,
.cvtsi2sd,
.cvtss2sd,
.divsd,
.maxsd,
.minsd,
@ -113,6 +118,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
.roundsd,
.subsd,
.ucomisd,
.xorpd,
=> try lower.mirGeneric(inst),
.cmps,

View File

@ -170,6 +170,8 @@ pub const Inst = struct {
addss,
/// Compare scalar single-precision floating-point values
cmpss,
/// Convert doubleword integer to scalar single-precision floating-point value
cvtsi2ss,
/// Divide scalar single-precision floating-point values
divss,
/// Return maximum single-precision floating-point value
@ -186,10 +188,18 @@ pub const Inst = struct {
subss,
/// Unordered compare scalar single-precision floating-point values
ucomiss,
/// Bitwise logical xor of packed single precision floating-point values
xorps,
/// Add double precision floating point values
addsd,
/// Compare scalar double-precision floating-point values
cmpsd,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
cvtsd2ss,
/// Convert doubleword integer to scalar double-precision floating-point value
cvtsi2sd,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
cvtss2sd,
/// Divide scalar double-precision floating-point values
divsd,
/// Return maximum double-precision floating-point value
@ -206,6 +216,8 @@ pub const Inst = struct {
subsd,
/// Unordered compare scalar double-precision floating-point values
ucomisd,
/// Bitwise logical xor of packed double precision floating-point values
xorpd,
/// Compare string operands
cmps,

View File

@ -323,7 +323,7 @@ pub const Instruction = struct {
var rex = Rex{};
rex.present = inst.encoding.data.mode == .rex;
switch (inst.encoding.data.mode) {
.long, .sse2_long => rex.w = true,
.long, .sse_long, .sse2_long => rex.w = true,
else => {},
}

View File

@ -834,6 +834,9 @@ pub const table = [_]Entry{
.{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse },
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse },
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse_long },
.{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .sse },
.{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .sse },
@ -849,11 +852,20 @@ pub const table = [_]Entry{
.{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse },
.{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .sse },
// SSE2
.{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 },
.{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 },
.{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 },
.{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2 },
.{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2_long },
.{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .sse2 },
.{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .sse2 },
.{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .sse2 },
@ -878,6 +890,8 @@ pub const table = [_]Entry{
.{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .sse2 },
.{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .sse2 },
// SSE4.1
.{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .sse4_1 },
.{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .sse4_1 },

View File

@ -291,6 +291,20 @@ pub fn generateSymbol(
},
},
.Pointer => switch (typed_value.val.tag()) {
.null_value => {
switch (target.cpu.arch.ptrBitWidth()) {
32 => {
mem.writeInt(u32, try code.addManyAsArray(4), 0, endian);
if (typed_value.ty.isSlice()) try code.appendNTimes(0xaa, 4);
},
64 => {
mem.writeInt(u64, try code.addManyAsArray(8), 0, endian);
if (typed_value.ty.isSlice()) try code.appendNTimes(0xaa, 8);
},
else => unreachable,
}
return Result.ok;
},
.zero, .one, .int_u64, .int_big_positive => {
switch (target.cpu.arch.ptrBitWidth()) {
32 => {
@ -397,30 +411,15 @@ pub fn generateSymbol(
},
}
},
.elem_ptr => {
const elem_ptr = typed_value.val.castTag(.elem_ptr).?.data;
const elem_size = typed_value.ty.childType().abiSize(target);
const addend = @intCast(u32, elem_ptr.index * elem_size);
const array_ptr = elem_ptr.array_ptr;
switch (array_ptr.tag()) {
.decl_ref => {
const decl_index = array_ptr.castTag(.decl_ref).?.data;
return lowerDeclRef(bin_file, src_loc, typed_value, decl_index, code, debug_output, .{
.parent_atom_index = reloc_info.parent_atom_index,
.addend = (reloc_info.addend orelse 0) + addend,
});
},
else => return Result{
.fail = try ErrorMsg.create(
bin_file.allocator,
src_loc,
"TODO implement generateSymbol for pointer type value: '{s}'",
.{@tagName(typed_value.val.tag())},
),
},
}
},
.elem_ptr => return lowerParentPtr(
bin_file,
src_loc,
typed_value,
typed_value.val,
code,
debug_output,
reloc_info,
),
else => return Result{
.fail = try ErrorMsg.create(
bin_file.allocator,
@ -838,9 +837,62 @@ pub fn generateSymbol(
}
}
fn lowerParentPtr(
bin_file: *link.File,
src_loc: Module.SrcLoc,
typed_value: TypedValue,
parent_ptr: Value,
code: *std.ArrayList(u8),
debug_output: DebugInfoOutput,
reloc_info: RelocInfo,
) CodeGenError!Result {
const target = bin_file.options.target;
switch (parent_ptr.tag()) {
.elem_ptr => {
const elem_ptr = parent_ptr.castTag(.elem_ptr).?.data;
return lowerParentPtr(
bin_file,
src_loc,
typed_value,
elem_ptr.array_ptr,
code,
debug_output,
reloc_info.offset(@intCast(u32, elem_ptr.index * elem_ptr.elem_ty.abiSize(target))),
);
},
.decl_ref => {
const decl_index = parent_ptr.castTag(.decl_ref).?.data;
return lowerDeclRef(
bin_file,
src_loc,
typed_value,
decl_index,
code,
debug_output,
reloc_info,
);
},
else => |t| {
return Result{
.fail = try ErrorMsg.create(
bin_file.allocator,
src_loc,
"TODO implement lowerParentPtr for type '{s}'",
.{@tagName(t)},
),
};
},
}
}
const RelocInfo = struct {
parent_atom_index: u32,
addend: ?u32 = null,
fn offset(ri: RelocInfo, addend: u32) RelocInfo {
return .{ .parent_atom_index = ri.parent_atom_index, .addend = (ri.addend orelse 0) + addend };
}
};
fn lowerDeclRef(
@ -1095,6 +1147,9 @@ pub fn genTypedValue(
.Slice => {},
else => {
switch (typed_value.val.tag()) {
.null_value => {
return GenResult.mcv(.{ .immediate = 0 });
},
.int_u64 => {
return GenResult.mcv(.{ .immediate = typed_value.val.toUnsignedInt(target) });
},