x86_64: redo movement, float negation, and @fabs

This commit is contained in:
Jacob Young 2023-05-14 05:12:46 -04:00
parent b6d6102850
commit 6c6d8d67cf
8 changed files with 359 additions and 118 deletions

View File

@ -4681,61 +4681,136 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
}
fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
const tag = self.air.instructions.items(.tag)[inst];
const un_op = self.air.instructions.items(.data)[inst].un_op;
const ty = self.air.typeOf(un_op);
const ty_bits = ty.floatBits(self.target.*);
const abi_size: u32 = switch (ty.abiSize(self.target.*)) {
1...16 => 16,
17...32 => 32,
else => return self.fail("TODO implement airFloatSign for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
};
const scalar_bits = ty.scalarType().floatBits(self.target.*);
const src_mcv = try self.resolveInst(un_op);
const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
src_mcv
else if (self.hasFeature(.avx))
.{ .register = try self.register_manager.allocReg(inst, sse) }
else
try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
const dst_reg = dst_mcv.getReg().?;
const dst_lock = self.register_manager.lockReg(dst_reg);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
var arena = std.heap.ArenaAllocator.init(self.gpa);
defer arena.deinit();
const ExpectedContents = union {
f16: Value.Payload.Float_16,
f32: Value.Payload.Float_32,
f64: Value.Payload.Float_64,
f80: Value.Payload.Float_80,
f128: Value.Payload.Float_128,
const ExpectedContents = struct {
scalar: union {
i64: Value.Payload.I64,
big: struct {
limbs: [
@max(
std.math.big.int.Managed.default_capacity,
std.math.big.int.calcTwosCompLimbCount(128),
)
]std.math.big.Limb,
pl: Value.Payload.BigInt,
},
},
repeated: Value.Payload.SubValue,
};
var stack align(@alignOf(ExpectedContents)) =
std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator());
var int_pl = Type.Payload.Bits{
.base = .{ .tag = .int_signed },
.data = scalar_bits,
};
var vec_pl = Type.Payload.Array{
.base = .{ .tag = .vector },
.data = .{
.len = @divExact(128, ty_bits),
.elem_type = ty,
.len = @divExact(abi_size * 8, scalar_bits),
.elem_type = Type.initPayload(&int_pl.base),
},
};
const vec_ty = Type.initPayload(&vec_pl.base);
var sign_pl = Value.Payload.SubValue{
.base = .{ .tag = .repeated },
.data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*),
const sign_val = switch (tag) {
.neg => try vec_ty.minInt(stack.get(), self.target.*),
.fabs => try vec_ty.maxInt(stack.get(), self.target.*),
else => unreachable,
};
const sign_val = Value.initPayload(&sign_pl.base);
const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
const src_mcv = try self.resolveInst(un_op);
const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
src_mcv
const sign_mem = if (sign_mcv.isMemory())
sign_mcv.mem(Memory.PtrSize.fromSize(abi_size))
else
try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
.base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) },
});
const tag = self.air.instructions.items(.tag)[inst];
try self.genBinOpMir(switch (ty_bits) {
// No point using an extra prefix byte for *pd which performs the same operation.
16, 32, 64, 128 => switch (tag) {
.neg => .{ ._ps, .xor },
.fabs => .{ ._ps, .andn },
if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
switch (scalar_bits) {
16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
.neg => .{ .vp_, .xor },
.fabs => .{ .vp_, .@"and" },
else => unreachable,
} else switch (tag) {
.neg => .{ .v_ps, .xor },
.fabs => .{ .v_ps, .@"and" },
else => unreachable,
},
32 => switch (tag) {
.neg => .{ .v_ps, .xor },
.fabs => .{ .v_ps, .@"and" },
else => unreachable,
},
64 => switch (tag) {
.neg => .{ .v_pd, .xor },
.fabs => .{ .v_pd, .@"and" },
else => unreachable,
},
80 => return self.fail("TODO implement airFloatSign for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
else => unreachable,
},
80 => return self.fail("TODO implement airFloatSign for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
else => unreachable,
}, vec_ty, dst_mcv, sign_mcv);
registerAlias(dst_reg, abi_size),
registerAlias(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv), abi_size),
sign_mem,
) else try self.asmRegisterMemory(
switch (scalar_bits) {
16, 128 => switch (tag) {
.neg => .{ .p_, .xor },
.fabs => .{ .p_, .@"and" },
else => unreachable,
},
32 => switch (tag) {
.neg => .{ ._ps, .xor },
.fabs => .{ ._ps, .@"and" },
else => unreachable,
},
64 => switch (tag) {
.neg => .{ ._pd, .xor },
.fabs => .{ ._pd, .@"and" },
else => unreachable,
},
80 => return self.fail("TODO implement airFloatSign for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
else => unreachable,
},
registerAlias(dst_reg, abi_size),
sign_mem,
);
return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
}
@ -8593,7 +8668,6 @@ const MoveStrategy = union(enum) {
const InsertExtract = struct {
insert: Mir.Inst.FixedTag,
extract: Mir.Inst.FixedTag,
imm: Immediate,
};
};
fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
@ -8603,17 +8677,15 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
.insert = .{ .vp_w, .insr },
.extract = .{ .vp_w, .extr },
.imm = Immediate.u(0),
} } else .{ .insert_extract = .{
.insert = .{ .p_w, .insr },
.extract = .{ .p_w, .extr },
.imm = Immediate.u(0),
} },
32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } },
64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } },
128 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
else => {},
},
.Vector => switch (ty.childType().zigTypeTag()) {
@ -8622,101 +8694,120 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
.insert = .{ .vp_b, .insr },
.extract = .{ .vp_b, .extr },
.imm = Immediate.u(0),
} } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
.insert = .{ .p_b, .insr },
.extract = .{ .p_b, .extr },
.imm = Immediate.u(0),
} },
2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
.insert = .{ .vp_w, .insr },
.extract = .{ .vp_w, .extr },
.imm = Immediate.u(0),
} } else .{ .insert_extract = .{
.insert = .{ .p_w, .insr },
.extract = .{ .p_w, .extr },
.imm = Immediate.u(0),
} },
3...4 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_ss, .mov }
.{ .v_d, .mov }
else
.{ ._ss, .mov } },
.{ ._d, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_sd, .mov }
.{ .v_q, .mov }
else
.{ ._sd, .mov } },
.{ ._q, .mov } },
9...16 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
17...32 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
16 => switch (ty.vectorLen()) {
1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
.insert = .{ .vp_w, .insr },
.extract = .{ .vp_w, .extr },
.imm = Immediate.u(0),
} } else .{ .insert_extract = .{
.insert = .{ .p_w, .insr },
.extract = .{ .p_w, .extr },
.imm = Immediate.u(0),
} },
2 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_ss, .mov }
.{ .v_d, .mov }
else
.{ ._ss, .mov } },
.{ ._d, .mov } },
3...4 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_sd, .mov }
.{ .v_q, .mov }
else
.{ ._sd, .mov } },
.{ ._q, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_ps, .mov }
else
.{ ._ps, .mov } },
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
9...16 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
32 => switch (ty.vectorLen()) {
1 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_ss, .mov }
.{ .v_d, .mov }
else
.{ ._ss, .mov } },
.{ ._d, .mov } },
2 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_sd, .mov }
.{ .v_q, .mov }
else
.{ ._sd, .mov } },
.{ ._q, .mov } },
3...4 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
5...8 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
64 => switch (ty.vectorLen()) {
1 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_sd, .mov }
.{ .v_q, .mov }
else
.{ ._sd, .mov } },
.{ ._q, .mov } },
2 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
3...4 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
128 => switch (ty.vectorLen()) {
1 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
2 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
256 => switch (ty.vectorLen()) {
1 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
else => {},
},
.Float => switch (ty.childType().floatBits(self.target.*)) {
16 => switch (ty.vectorLen()) {
1 => {},
1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
.insert = .{ .vp_w, .insr },
.extract = .{ .vp_w, .extr },
} } else .{ .insert_extract = .{
.insert = .{ .p_w, .insr },
.extract = .{ .p_w, .extr },
} },
2 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_ss, .mov }
.{ .v_d, .mov }
else
.{ ._ss, .mov } },
.{ ._d, .mov } },
3...4 => return .{ .move = if (self.hasFeature(.avx))
.{ .v_sd, .mov }
.{ .v_q, .mov }
else
.{ ._sd, .mov } },
.{ ._q, .mov } },
5...8 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
9...16 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
32 => switch (ty.vectorLen()) {
@ -8741,18 +8832,18 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
else
.{ ._sd, .mov } },
2 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
3...4 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } },
else => {},
},
128 => switch (ty.vectorLen()) {
1 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
2 => if (self.hasFeature(.avx))
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
else => {},
@ -8860,29 +8951,69 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
);
}
},
.register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister(
if ((dst_reg.class() == .sse) == (src_reg.class() == .sse))
switch (ty.zigTypeTag()) {
else => .{ ._, .mov },
.Float, .Vector => .{ ._ps, .mova },
}
else switch (abi_size) {
2 => return try self.asmRegisterRegisterImmediate(
if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr },
registerAlias(dst_reg, 4),
registerAlias(src_reg, 4),
Immediate.u(0),
.register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
.general_purpose => switch (src_reg.class()) {
.general_purpose => try self.asmRegisterRegister(
.{ ._, .mov },
registerAlias(dst_reg, abi_size),
registerAlias(src_reg, abi_size),
),
4 => .{ ._d, .mov },
8 => .{ ._q, .mov },
else => return self.fail(
"unsupported register copy from {s} to {s}",
.{ @tagName(src_reg), @tagName(dst_reg) },
.segment => try self.asmRegisterRegister(
.{ ._, .mov },
registerAlias(dst_reg, abi_size),
src_reg,
),
.sse => try self.asmRegisterRegister(
switch (abi_size) {
1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
else => unreachable,
},
registerAlias(dst_reg, @max(abi_size, 4)),
src_reg.to128(),
),
.x87, .mmx => unreachable,
},
registerAlias(dst_reg, abi_size),
registerAlias(src_reg, abi_size),
),
.segment => try self.asmRegisterRegister(
.{ ._, .mov },
dst_reg,
switch (src_reg.class()) {
.general_purpose, .segment => registerAlias(src_reg, abi_size),
.sse => try self.copyToTmpRegister(ty, src_mcv),
.x87, .mmx => unreachable,
},
),
.sse => switch (src_reg.class()) {
.general_purpose => try self.asmRegisterRegister(
switch (abi_size) {
1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
else => unreachable,
},
dst_reg.to128(),
registerAlias(src_reg, @max(abi_size, 4)),
),
.segment => try self.genSetReg(
dst_reg,
ty,
.{ .register = try self.copyToTmpRegister(ty, src_mcv) },
),
.sse => try self.asmRegisterRegister(
switch (ty.scalarType().zigTypeTag()) {
else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
.Float => switch (ty.floatBits(self.target.*)) {
else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
},
},
registerAlias(dst_reg, abi_size),
registerAlias(src_reg, abi_size),
),
.x87, .mmx => unreachable,
},
.x87, .mmx => unreachable,
},
.register_offset,
.indirect,
.load_frame,
@ -8918,14 +9049,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
ie.insert,
dst_alias,
src_mem,
ie.imm,
Immediate.u(0),
),
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
ie.insert,
dst_alias,
dst_alias,
src_mem,
ie.imm,
Immediate.u(0),
),
}
},
@ -8947,14 +9078,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
ie.insert,
dst_alias,
src_mem,
ie.imm,
Immediate.u(0),
),
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
ie.insert,
dst_alias,
dst_alias,
src_mem,
ie.imm,
Immediate.u(0),
),
}
},
@ -8994,14 +9125,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
ie.insert,
dst_alias,
src_mem,
ie.imm,
Immediate.u(0),
),
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
ie.insert,
dst_alias,
dst_alias,
src_mem,
ie.imm,
Immediate.u(0),
),
}
},
@ -9129,7 +9260,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
ie.extract,
dst_mem,
src_alias,
ie.imm,
Immediate.u(0),
),
}
},
@ -10499,7 +10630,7 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void {
if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
const dst_mcv = try self.allocRegOrMem(inst, true);
try self.genCopy(src_ty, dst_mcv, src_mcv);
try self.genCopy(union_ty, dst_mcv, src_mcv);
break :result dst_mcv;
}
@ -11000,7 +11131,15 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
reg.to64()
else
unreachable,
.segment, .x87, .mmx => unreachable,
.segment => if (size_bytes <= 2)
reg
else
unreachable,
.x87 => unreachable,
.mmx => if (size_bytes <= 8)
reg
else
unreachable,
.sse => if (size_bytes <= 16)
reg.to128()
else if (size_bytes <= 32)

View File

@ -261,7 +261,8 @@ pub const Mnemonic = enum {
// X87
fisttp, fld,
// MMX
movd,
movd, movq,
pand, pandn, por, pxor,
// SSE
addps, addss,
andps,
@ -293,7 +294,8 @@ pub const Mnemonic = enum {
maxpd, maxsd,
minpd, minsd,
movapd,
movq, //movd, movsd,
movdqa, movdqu,
//movsd,
movupd,
mulpd, mulsd,
orpd,
@ -316,6 +318,7 @@ pub const Mnemonic = enum {
roundpd, roundps, roundsd, roundss,
// AVX
vaddpd, vaddps, vaddsd, vaddss,
vandnpd, vandnps, vandpd, vandps,
vbroadcastf128, vbroadcastsd, vbroadcastss,
vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
@ -327,22 +330,31 @@ pub const Mnemonic = enum {
vmaxpd, vmaxps, vmaxsd, vmaxss,
vminpd, vminps, vminsd, vminss,
vmovapd, vmovaps,
vmovddup, vmovhlps, vmovlhps,
vmovd,
vmovddup,
vmovdqa, vmovdqu,
vmovhlps, vmovlhps,
vmovq,
vmovsd,
vmovshdup, vmovsldup,
vmovss,
vmovupd, vmovups,
vmulpd, vmulps, vmulsd, vmulss,
vorpd, vorps,
vpand, vpandn,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
vpor,
vpshufhw, vpshuflw,
vpsrld, vpsrlq, vpsrlw,
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
vpxor,
vroundpd, vroundps, vroundsd, vroundss,
vshufpd, vshufps,
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
vsubpd, vsubps, vsubsd, vsubss,
vxorpd, vxorps,
// F16C
vcvtph2ps, vcvtps2ph,
// FMA

View File

@ -236,6 +236,14 @@ pub const Inst = struct {
/// VEX-Encoded ___
v_,
/// VEX-Encoded ___ Byte
v_b,
/// VEX-Encoded ___ Word
v_w,
/// VEX-Encoded ___ Doubleword
v_d,
/// VEX-Encoded ___ QuadWord
v_q,
/// VEX-Encoded Packed ___
vp_,
/// VEX-Encoded Packed ___ Byte
@ -526,6 +534,10 @@ pub const Inst = struct {
cvttps2dq,
/// Convert with truncation scalar double-precision floating-point value to doubleword integer
cvttsd2si,
/// Move aligned packed integer values
movdqa,
/// Move unaligned packed integer values
movdqu,
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
/// Packed interleave shuffle of pairs of double-precision floating-point values
shuf,

View File

@ -970,11 +970,16 @@ pub const table = [_]Entry{
.{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 },
.{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 },
.{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 },
.{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 },
.{ .movdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .none, .sse2 },
.{ .movdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .none, .sse2 },
.{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 },
.{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 },
.{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 },
@ -987,10 +992,16 @@ pub const table = [_]Entry{
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
.{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 },
.{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 },
.{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 },
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
.{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 },
.{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 },
.{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 },
@ -1012,6 +1023,8 @@ pub const table = [_]Entry{
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
.{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 },
.{ .pxor, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .none, .sse2 },
.{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 },
.{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 },
@ -1070,6 +1083,18 @@ pub const table = [_]Entry{
.{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
.{ .vandnpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
.{ .vandnpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
.{ .vandnps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
.{ .vandnps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
.{ .vandpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
.{ .vandpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
.{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
.{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
.{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
.{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
@ -1169,13 +1194,31 @@ pub const table = [_]Entry{
.{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
.{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx },
.{ .vmovd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w0, .avx },
.{ .vmovq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w1, .avx },
.{ .vmovd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w0, .avx },
.{ .vmovq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w1, .avx },
.{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
.{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx },
.{ .vmovdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
.{ .vmovdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
.{ .vmovdqa, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
.{ .vmovdqa, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
.{ .vmovdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
.{ .vmovdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
.{ .vmovdqu, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
.{ .vmovdqu, .mr, &.{ .ymm_m256, .ymm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
.{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
.{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx },
.{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx },
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
@ -1212,6 +1255,16 @@ pub const table = [_]Entry{
.{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
.{ .vorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
.{ .vorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
.{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
.{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
.{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx },
.{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx },
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
@ -1225,6 +1278,8 @@ pub const table = [_]Entry{
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
.{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx },
.{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx },
.{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx },
@ -1242,6 +1297,8 @@ pub const table = [_]Entry{
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx },
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx },
.{ .vpxor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_128_wig, .avx },
.{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx },
.{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx },
@ -1278,6 +1335,12 @@ pub const table = [_]Entry{
.{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
.{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
.{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
.{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
.{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
// F16C
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
@ -1313,6 +1376,12 @@ pub const table = [_]Entry{
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
.{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 },
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
.{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
@ -1329,5 +1398,7 @@ pub const table = [_]Entry{
.{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 },
.{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 },
};
// zig fmt: on

View File

@ -5433,8 +5433,18 @@ pub const Type = extern union {
}
}
// Works for vectors and vectors of integers.
pub fn maxInt(ty: Type, arena: Allocator, target: Target) !Value {
const scalar = try maxIntScalar(ty.scalarType(), arena, target);
if (ty.zigTypeTag() == .Vector and scalar.tag() != .the_only_possible_value) {
return Value.Tag.repeated.create(arena, scalar);
} else {
return scalar;
}
}
/// Asserts that self.zigTypeTag() == .Int.
pub fn maxInt(self: Type, arena: Allocator, target: Target) !Value {
pub fn maxIntScalar(self: Type, arena: Allocator, target: Target) !Value {
assert(self.zigTypeTag() == .Int);
const info = self.intInfo(target);

View File

@ -532,7 +532,6 @@ fn testFabs() !void {
test "@fabs with vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;

View File

@ -1612,7 +1612,6 @@ test "absFloat" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;

View File

@ -65,7 +65,6 @@ test "cast negative integer to pointer" {
test "casting to union with a macro" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO