mirror of
https://github.com/ziglang/zig.git
synced 2026-01-21 06:45:24 +00:00
x86_64: fix float min/max behavior
This commit is contained in:
parent
b9d2e0e308
commit
403c2d91be
@ -1271,6 +1271,27 @@ fn asmRegisterRegisterRegister(
|
||||
});
|
||||
}
|
||||
|
||||
fn asmRegisterRegisterRegisterRegister(
|
||||
self: *Self,
|
||||
tag: Mir.Inst.FixedTag,
|
||||
reg1: Register,
|
||||
reg2: Register,
|
||||
reg3: Register,
|
||||
reg4: Register,
|
||||
) !void {
|
||||
_ = try self.addInst(.{
|
||||
.tag = tag[1],
|
||||
.ops = .rrrr,
|
||||
.data = .{ .rrrr = .{
|
||||
.fixes = tag[0],
|
||||
.r1 = reg1,
|
||||
.r2 = reg2,
|
||||
.r3 = reg3,
|
||||
.r4 = reg4,
|
||||
} },
|
||||
});
|
||||
}
|
||||
|
||||
fn asmRegisterRegisterRegisterImmediate(
|
||||
self: *Self,
|
||||
tag: Mir.Inst.FixedTag,
|
||||
@ -6224,12 +6245,26 @@ fn genBinOp(
|
||||
lhs_air: Air.Inst.Ref,
|
||||
rhs_air: Air.Inst.Ref,
|
||||
) !MCValue {
|
||||
const lhs_mcv = try self.resolveInst(lhs_air);
|
||||
const rhs_mcv = try self.resolveInst(rhs_air);
|
||||
const lhs_ty = self.air.typeOf(lhs_air);
|
||||
const rhs_ty = self.air.typeOf(rhs_air);
|
||||
const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
|
||||
|
||||
const maybe_mask_reg = switch (air_tag) {
|
||||
else => null,
|
||||
.max, .min => if (lhs_ty.scalarType().isRuntimeFloat()) registerAlias(
|
||||
if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
|
||||
try self.register_manager.getReg(.xmm0, null);
|
||||
break :mask .xmm0;
|
||||
} else try self.register_manager.allocReg(null, sse),
|
||||
abi_size,
|
||||
) else null,
|
||||
};
|
||||
const mask_lock =
|
||||
if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
|
||||
defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const lhs_mcv = try self.resolveInst(lhs_air);
|
||||
const rhs_mcv = try self.resolveInst(rhs_air);
|
||||
switch (lhs_mcv) {
|
||||
.immediate => |imm| switch (imm) {
|
||||
0 => switch (air_tag) {
|
||||
@ -6300,7 +6335,16 @@ fn genBinOp(
|
||||
};
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
|
||||
const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv;
|
||||
const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg|
|
||||
if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and
|
||||
self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: {
|
||||
try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv);
|
||||
break :src .{ .register = mask_reg };
|
||||
}
|
||||
else
|
||||
unmat_src_mcv;
|
||||
|
||||
if (!vec_op) {
|
||||
switch (air_tag) {
|
||||
.add,
|
||||
@ -7009,18 +7053,26 @@ fn genBinOp(
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
|
||||
const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias(
|
||||
if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?,
|
||||
abi_size,
|
||||
) else null;
|
||||
const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
|
||||
defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
if (self.hasFeature(.avx)) {
|
||||
const src1_alias =
|
||||
const lhs_reg =
|
||||
if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
mir_tag,
|
||||
dst_reg,
|
||||
src1_alias,
|
||||
lhs_reg,
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
dst_reg,
|
||||
src1_alias,
|
||||
lhs_reg,
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
@ -7041,9 +7093,10 @@ fn genBinOp(
|
||||
try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
|
||||
);
|
||||
}
|
||||
|
||||
switch (air_tag) {
|
||||
.add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {},
|
||||
.div_trunc, .div_floor => try self.genRound(
|
||||
.div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) try self.genRound(
|
||||
lhs_ty,
|
||||
dst_reg,
|
||||
.{ .register = dst_reg },
|
||||
@ -7052,11 +7105,240 @@ fn genBinOp(
|
||||
.div_floor => 0b1_0_01,
|
||||
else => unreachable,
|
||||
},
|
||||
),
|
||||
) else return self.fail("TODO implement genBinOp for {s} {} without sse4_1 feature", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
.bit_and, .bit_or, .xor => {},
|
||||
.max, .min => {}, // TODO: unordered select
|
||||
.max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) {
|
||||
const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size);
|
||||
|
||||
try self.asmRegisterRegisterRegisterImmediate(
|
||||
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .{ .v_ss, .cmp },
|
||||
64 => .{ .v_sd, .cmp },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1 => .{ .v_ss, .cmp },
|
||||
2...8 => .{ .v_ps, .cmp },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1 => .{ .v_sd, .cmp },
|
||||
2...4 => .{ .v_pd, .cmp },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
mask_reg,
|
||||
rhs_copy_reg,
|
||||
rhs_copy_reg,
|
||||
Immediate.u(3), // unord
|
||||
);
|
||||
try self.asmRegisterRegisterRegisterRegister(
|
||||
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .{ .v_ps, .blendv },
|
||||
64 => .{ .v_pd, .blendv },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1...8 => .{ .v_ps, .blendv },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1...4 => .{ .v_pd, .blendv },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
lhs_copy_reg.?,
|
||||
mask_reg,
|
||||
);
|
||||
} else {
|
||||
const has_blend = self.hasFeature(.sse4_1);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._ss, .cmp },
|
||||
64 => .{ ._sd, .cmp },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1 => .{ ._ss, .cmp },
|
||||
2...4 => .{ ._ps, .cmp },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1 => .{ ._sd, .cmp },
|
||||
2 => .{ ._pd, .cmp },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
mask_reg,
|
||||
mask_reg,
|
||||
Immediate.u(if (has_blend) 3 else 7), // unord, ord
|
||||
);
|
||||
if (has_blend) try self.asmRegisterRegisterRegister(
|
||||
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._ps, .blendv },
|
||||
64 => .{ ._pd, .blendv },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1...4 => .{ ._ps, .blendv },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1...2 => .{ ._pd, .blendv },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
dst_reg,
|
||||
lhs_copy_reg.?,
|
||||
mask_reg,
|
||||
) else {
|
||||
try self.asmRegisterRegister(
|
||||
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._ps, .@"and" },
|
||||
64 => .{ ._pd, .@"and" },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1...4 => .{ ._ps, .@"and" },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1...2 => .{ ._pd, .@"and" },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
dst_reg,
|
||||
mask_reg,
|
||||
);
|
||||
try self.asmRegisterRegister(
|
||||
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._ps, .andn },
|
||||
64 => .{ ._pd, .andn },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1...4 => .{ ._ps, .andn },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1...2 => .{ ._pd, .andn },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
mask_reg,
|
||||
lhs_copy_reg.?,
|
||||
);
|
||||
try self.asmRegisterRegister(
|
||||
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._ps, .@"or" },
|
||||
64 => .{ ._pd, .@"or" },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
|
||||
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1...4 => .{ ._ps, .@"or" },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1...2 => .{ ._pd, .@"or" },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
|
||||
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
dst_reg,
|
||||
mask_reg,
|
||||
);
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
return dst_mcv;
|
||||
}
|
||||
|
||||
@ -9282,7 +9564,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
|
||||
else => null,
|
||||
},
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
.Float => switch (ty.scalarType().floatBits(self.target.*)) {
|
||||
16, 128 => switch (abi_size) {
|
||||
2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
|
||||
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
|
||||
|
||||
@ -178,7 +178,7 @@ pub fn format(
|
||||
try writer.print("+{s} ", .{tag});
|
||||
},
|
||||
.m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}),
|
||||
.mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi, .mvr => try writer.writeAll("/r "),
|
||||
.mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr => try writer.writeAll("/r "),
|
||||
}
|
||||
|
||||
switch (encoding.data.op_en) {
|
||||
@ -202,7 +202,8 @@ pub fn format(
|
||||
};
|
||||
try writer.print("{s} ", .{tag});
|
||||
},
|
||||
.np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm, .mvr => {},
|
||||
.rvmr => try writer.writeAll("/is4 "),
|
||||
.np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {},
|
||||
}
|
||||
|
||||
try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
|
||||
@ -270,7 +271,7 @@ pub const Mnemonic = enum {
|
||||
addps, addss,
|
||||
andps,
|
||||
andnps,
|
||||
cmpss,
|
||||
cmpps, cmpss,
|
||||
cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
|
||||
divps, divss,
|
||||
maxps, maxss,
|
||||
@ -290,7 +291,7 @@ pub const Mnemonic = enum {
|
||||
addpd, addsd,
|
||||
andpd,
|
||||
andnpd,
|
||||
//cmpsd,
|
||||
cmppd, //cmpsd,
|
||||
cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd,
|
||||
cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd,
|
||||
cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si,
|
||||
@ -315,6 +316,7 @@ pub const Mnemonic = enum {
|
||||
// SSE3
|
||||
movddup, movshdup, movsldup,
|
||||
// SSE4.1
|
||||
blendpd, blendps, blendvpd, blendvps,
|
||||
extractps,
|
||||
insertps,
|
||||
pextrb, pextrd, pextrq,
|
||||
@ -325,7 +327,9 @@ pub const Mnemonic = enum {
|
||||
// AVX
|
||||
vaddpd, vaddps, vaddsd, vaddss,
|
||||
vandnpd, vandnps, vandpd, vandps,
|
||||
vblendpd, vblendps, vblendvpd, vblendvps,
|
||||
vbroadcastf128, vbroadcastsd, vbroadcastss,
|
||||
vcmppd, vcmpps, vcmpsd, vcmpss,
|
||||
vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
|
||||
vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
|
||||
vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si,
|
||||
@ -385,7 +389,7 @@ pub const OpEn = enum {
|
||||
fd, td,
|
||||
m1, mc, mi, mr, rm,
|
||||
rmi, mri, mrc,
|
||||
vmi, rvm, rvmi, mvr,
|
||||
rm0, vmi, rvm, rvmr, rvmi, mvr,
|
||||
// zig fmt: on
|
||||
};
|
||||
|
||||
@ -407,7 +411,7 @@ pub const Op = enum {
|
||||
moffs,
|
||||
sreg,
|
||||
st, mm, mm_m64,
|
||||
xmm, xmm_m32, xmm_m64, xmm_m128,
|
||||
xmm0, xmm, xmm_m32, xmm_m64, xmm_m128,
|
||||
ymm, ymm_m256,
|
||||
// zig fmt: on
|
||||
|
||||
@ -436,7 +440,9 @@ pub const Op = enum {
|
||||
.segment => .sreg,
|
||||
.x87 => .st,
|
||||
.mmx => .mm,
|
||||
.sse => switch (reg.bitSize()) {
|
||||
.sse => if (reg == .xmm0)
|
||||
.xmm0
|
||||
else switch (reg.bitSize()) {
|
||||
128 => .xmm,
|
||||
256 => .ymm,
|
||||
else => unreachable,
|
||||
@ -494,7 +500,7 @@ pub const Op = enum {
|
||||
.eax, .r32, .rm32, .r32_m16 => unreachable,
|
||||
.rax, .r64, .rm64, .r64_m16 => unreachable,
|
||||
.st, .mm, .mm_m64 => unreachable,
|
||||
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
|
||||
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
|
||||
.ymm, .ymm_m256 => unreachable,
|
||||
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
|
||||
.unity => 1,
|
||||
@ -516,7 +522,7 @@ pub const Op = enum {
|
||||
.eax, .r32, .rm32, .r32_m8, .r32_m16 => 32,
|
||||
.rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64,
|
||||
.st => 80,
|
||||
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
|
||||
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
|
||||
.ymm, .ymm_m256 => 256,
|
||||
};
|
||||
}
|
||||
@ -526,7 +532,8 @@ pub const Op = enum {
|
||||
.none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
|
||||
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
|
||||
.rel8, .rel16, .rel32 => unreachable,
|
||||
.al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable,
|
||||
.al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64 => unreachable,
|
||||
.st, .mm, .xmm0, .xmm, .ymm => unreachable,
|
||||
.m8, .rm8, .r32_m8 => 8,
|
||||
.m16, .rm16, .r32_m16, .r64_m16 => 16,
|
||||
.m32, .rm32, .xmm_m32 => 32,
|
||||
@ -558,7 +565,7 @@ pub const Op = enum {
|
||||
.rm8, .rm16, .rm32, .rm64,
|
||||
.r32_m8, .r32_m16, .r64_m16,
|
||||
.st, .mm, .mm_m64,
|
||||
.xmm, .xmm_m32, .xmm_m64, .xmm_m128,
|
||||
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128,
|
||||
.ymm, .ymm_m256,
|
||||
=> true,
|
||||
else => false,
|
||||
@ -612,7 +619,7 @@ pub const Op = enum {
|
||||
.sreg => .segment,
|
||||
.st => .x87,
|
||||
.mm, .mm_m64 => .mmx,
|
||||
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
|
||||
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
|
||||
.ymm, .ymm_m256 => .sse,
|
||||
};
|
||||
}
|
||||
@ -629,7 +636,7 @@ pub const Op = enum {
|
||||
else => {
|
||||
if (op.isRegister() and target.isRegister()) {
|
||||
return switch (target) {
|
||||
.cl, .al, .ax, .eax, .rax => op == target,
|
||||
.cl, .al, .ax, .eax, .rax, .xmm0 => op == target,
|
||||
else => op.class() == target.class() and op.regBitSize() == target.regBitSize(),
|
||||
};
|
||||
}
|
||||
|
||||
@ -377,6 +377,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
||||
.r => inst.data.r.fixes,
|
||||
.rr => inst.data.rr.fixes,
|
||||
.rrr => inst.data.rrr.fixes,
|
||||
.rrrr => inst.data.rrrr.fixes,
|
||||
.rrri => inst.data.rrri.fixes,
|
||||
.rri_s, .rri_u => inst.data.rri.fixes,
|
||||
.ri_s, .ri_u => inst.data.ri.fixes,
|
||||
@ -430,6 +431,12 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
||||
.{ .reg = inst.data.rrr.r2 },
|
||||
.{ .reg = inst.data.rrr.r3 },
|
||||
},
|
||||
.rrrr => &.{
|
||||
.{ .reg = inst.data.rrrr.r1 },
|
||||
.{ .reg = inst.data.rrrr.r2 },
|
||||
.{ .reg = inst.data.rrrr.r3 },
|
||||
.{ .reg = inst.data.rrrr.r4 },
|
||||
},
|
||||
.rrri => &.{
|
||||
.{ .reg = inst.data.rrri.r1 },
|
||||
.{ .reg = inst.data.rrri.r2 },
|
||||
|
||||
@ -596,6 +596,16 @@ pub const Inst = struct {
|
||||
/// Replicate single floating-point values
|
||||
movsldup,
|
||||
|
||||
/// Blend packed single-precision floating-point values
|
||||
/// Blend scalar single-precision floating-point values
|
||||
/// Blend packed double-precision floating-point values
|
||||
/// Blend scalar double-precision floating-point values
|
||||
blend,
|
||||
/// Variable blend packed single-precision floating-point values
|
||||
/// Variable blend scalar single-precision floating-point values
|
||||
/// Variable blend packed double-precision floating-point values
|
||||
/// Variable blend scalar double-precision floating-point values
|
||||
blendv,
|
||||
/// Extract packed floating-point values
|
||||
extract,
|
||||
/// Insert scalar single-precision floating-point value
|
||||
@ -651,6 +661,9 @@ pub const Inst = struct {
|
||||
/// Register, register, register operands.
|
||||
/// Uses `rrr` payload.
|
||||
rrr,
|
||||
/// Register, register, register, register operands.
|
||||
/// Uses `rrrr` payload.
|
||||
rrrr,
|
||||
/// Register, register, register, immediate (byte) operands.
|
||||
/// Uses `rrri` payload.
|
||||
rrri,
|
||||
@ -870,6 +883,13 @@ pub const Inst = struct {
|
||||
r2: Register,
|
||||
r3: Register,
|
||||
},
|
||||
rrrr: struct {
|
||||
fixes: Fixes = ._,
|
||||
r1: Register,
|
||||
r2: Register,
|
||||
r3: Register,
|
||||
r4: Register,
|
||||
},
|
||||
rrri: struct {
|
||||
fixes: Fixes = ._,
|
||||
r1: Register,
|
||||
|
||||
@ -226,8 +226,8 @@ pub const Instruction = struct {
|
||||
else => {
|
||||
const mem_op = switch (data.op_en) {
|
||||
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
|
||||
.rm, .rmi, .vmi => inst.ops[1],
|
||||
.rvm, .rvmi => inst.ops[2],
|
||||
.rm, .rmi, .rm0, .vmi => inst.ops[1],
|
||||
.rvm, .rvmr, .rvmi => inst.ops[2],
|
||||
else => unreachable,
|
||||
};
|
||||
switch (mem_op) {
|
||||
@ -235,7 +235,7 @@ pub const Instruction = struct {
|
||||
const rm = switch (data.op_en) {
|
||||
.m, .mi, .m1, .mc, .vmi => enc.modRmExt(),
|
||||
.mr, .mri, .mrc => inst.ops[1].reg.lowEnc(),
|
||||
.rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(),
|
||||
.rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0].reg.lowEnc(),
|
||||
.mvr => inst.ops[2].reg.lowEnc(),
|
||||
else => unreachable,
|
||||
};
|
||||
@ -245,7 +245,7 @@ pub const Instruction = struct {
|
||||
const op = switch (data.op_en) {
|
||||
.m, .mi, .m1, .mc, .vmi => .none,
|
||||
.mr, .mri, .mrc => inst.ops[1],
|
||||
.rm, .rmi, .rvm, .rvmi => inst.ops[0],
|
||||
.rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
|
||||
.mvr => inst.ops[2],
|
||||
else => unreachable,
|
||||
};
|
||||
@ -257,6 +257,7 @@ pub const Instruction = struct {
|
||||
switch (data.op_en) {
|
||||
.mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
|
||||
.rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder),
|
||||
.rvmr => try encoder.imm8(@as(u8, inst.ops[3].reg.enc()) << 4),
|
||||
.rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder),
|
||||
else => {},
|
||||
}
|
||||
@ -298,7 +299,7 @@ pub const Instruction = struct {
|
||||
.i, .zi, .o, .oi, .d, .np => null,
|
||||
.fd => inst.ops[1].mem.base().reg,
|
||||
.td => inst.ops[0].mem.base().reg,
|
||||
.rm, .rmi => if (inst.ops[1].isSegmentRegister())
|
||||
.rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister())
|
||||
switch (inst.ops[1]) {
|
||||
.reg => |reg| reg,
|
||||
.mem => |mem| mem.base().reg,
|
||||
@ -314,7 +315,7 @@ pub const Instruction = struct {
|
||||
}
|
||||
else
|
||||
null,
|
||||
.vmi, .rvm, .rvmi, .mvr => unreachable,
|
||||
.vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
|
||||
};
|
||||
if (segment_override) |seg| {
|
||||
legacy.setSegmentOverride(seg);
|
||||
@ -333,23 +334,23 @@ pub const Instruction = struct {
|
||||
switch (op_en) {
|
||||
.np, .i, .zi, .fd, .td, .d => {},
|
||||
.o, .oi => rex.b = inst.ops[0].reg.isExtended(),
|
||||
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc => {
|
||||
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => {
|
||||
const r_op = switch (op_en) {
|
||||
.rm, .rmi => inst.ops[0],
|
||||
.rm, .rmi, .rm0 => inst.ops[0],
|
||||
.mr, .mri, .mrc => inst.ops[1],
|
||||
else => .none,
|
||||
};
|
||||
rex.r = r_op.isBaseExtended();
|
||||
|
||||
const b_x_op = switch (op_en) {
|
||||
.rm, .rmi => inst.ops[1],
|
||||
.rm, .rmi, .rm0 => inst.ops[1],
|
||||
.m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
|
||||
else => unreachable,
|
||||
};
|
||||
rex.b = b_x_op.isBaseExtended();
|
||||
rex.x = b_x_op.isIndexExtended();
|
||||
},
|
||||
.vmi, .rvm, .rvmi, .mvr => unreachable,
|
||||
.vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
|
||||
}
|
||||
|
||||
try encoder.rex(rex);
|
||||
@ -367,9 +368,9 @@ pub const Instruction = struct {
|
||||
switch (op_en) {
|
||||
.np, .i, .zi, .fd, .td, .d => {},
|
||||
.o, .oi => vex.b = inst.ops[0].reg.isExtended(),
|
||||
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi, .mvr => {
|
||||
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => {
|
||||
const r_op = switch (op_en) {
|
||||
.rm, .rmi, .rvm, .rvmi => inst.ops[0],
|
||||
.rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
|
||||
.mr, .mri, .mrc => inst.ops[1],
|
||||
.mvr => inst.ops[2],
|
||||
.m, .mi, .m1, .mc, .vmi => .none,
|
||||
@ -378,9 +379,9 @@ pub const Instruction = struct {
|
||||
vex.r = r_op.isBaseExtended();
|
||||
|
||||
const b_x_op = switch (op_en) {
|
||||
.rm, .rmi, .vmi => inst.ops[1],
|
||||
.rm, .rmi, .rm0, .vmi => inst.ops[1],
|
||||
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
|
||||
.rvm, .rvmi => inst.ops[2],
|
||||
.rvm, .rvmr, .rvmi => inst.ops[2],
|
||||
else => unreachable,
|
||||
};
|
||||
vex.b = b_x_op.isBaseExtended();
|
||||
@ -408,7 +409,7 @@ pub const Instruction = struct {
|
||||
switch (op_en) {
|
||||
else => {},
|
||||
.vmi => vex.v = inst.ops[0].reg,
|
||||
.rvm, .rvmi => vex.v = inst.ops[1].reg,
|
||||
.rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg,
|
||||
}
|
||||
|
||||
try encoder.vex(vex);
|
||||
|
||||
@ -846,6 +846,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .none, .sse },
|
||||
|
||||
.{ .cmpps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .none, .sse },
|
||||
|
||||
.{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse },
|
||||
|
||||
.{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse },
|
||||
@ -917,6 +919,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .cmppd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 },
|
||||
@ -1085,6 +1089,14 @@ pub const table = [_]Entry{
|
||||
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
|
||||
|
||||
// SSE4.1
|
||||
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .blendps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .blendvps, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 },
|
||||
@ -1146,11 +1158,33 @@ pub const table = [_]Entry{
|
||||
.{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vblendpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_128_wig, .avx },
|
||||
.{ .vblendpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vblendps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_128_wig, .avx },
|
||||
.{ .vblendps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vblendvpd, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_128_w0, .avx },
|
||||
.{ .vblendvpd, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_256_w0, .avx },
|
||||
|
||||
.{ .vblendvps, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_128_w0, .avx },
|
||||
.{ .vblendvps, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_256_w0, .avx },
|
||||
|
||||
.{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
|
||||
.{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx },
|
||||
|
||||
.{ .vcmppd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vcmppd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vcmpps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vcmpps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vcmpsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vcmpss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
|
||||
@ -24,7 +24,8 @@ test "@max" {
|
||||
|
||||
test "@max on vectors" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
@ -72,7 +73,8 @@ test "@min" {
|
||||
|
||||
test "@min for vectors" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user