mirror of
https://github.com/ziglang/zig.git
synced 2026-02-14 21:38:33 +00:00
x86_64: redo movement, float negation, and @fabs
This commit is contained in:
parent
b6d6102850
commit
6c6d8d67cf
@ -4681,61 +4681,136 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
|
||||
}
|
||||
|
||||
fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const tag = self.air.instructions.items(.tag)[inst];
|
||||
const un_op = self.air.instructions.items(.data)[inst].un_op;
|
||||
const ty = self.air.typeOf(un_op);
|
||||
const ty_bits = ty.floatBits(self.target.*);
|
||||
const abi_size: u32 = switch (ty.abiSize(self.target.*)) {
|
||||
1...16 => 16,
|
||||
17...32 => 32,
|
||||
else => return self.fail("TODO implement airFloatSign for {}", .{
|
||||
ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
};
|
||||
const scalar_bits = ty.scalarType().floatBits(self.target.*);
|
||||
|
||||
const src_mcv = try self.resolveInst(un_op);
|
||||
const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
|
||||
defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
|
||||
src_mcv
|
||||
else if (self.hasFeature(.avx))
|
||||
.{ .register = try self.register_manager.allocReg(inst, sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_lock = self.register_manager.lockReg(dst_reg);
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(self.gpa);
|
||||
defer arena.deinit();
|
||||
|
||||
const ExpectedContents = union {
|
||||
f16: Value.Payload.Float_16,
|
||||
f32: Value.Payload.Float_32,
|
||||
f64: Value.Payload.Float_64,
|
||||
f80: Value.Payload.Float_80,
|
||||
f128: Value.Payload.Float_128,
|
||||
const ExpectedContents = struct {
|
||||
scalar: union {
|
||||
i64: Value.Payload.I64,
|
||||
big: struct {
|
||||
limbs: [
|
||||
@max(
|
||||
std.math.big.int.Managed.default_capacity,
|
||||
std.math.big.int.calcTwosCompLimbCount(128),
|
||||
)
|
||||
]std.math.big.Limb,
|
||||
pl: Value.Payload.BigInt,
|
||||
},
|
||||
},
|
||||
repeated: Value.Payload.SubValue,
|
||||
};
|
||||
var stack align(@alignOf(ExpectedContents)) =
|
||||
std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator());
|
||||
|
||||
var int_pl = Type.Payload.Bits{
|
||||
.base = .{ .tag = .int_signed },
|
||||
.data = scalar_bits,
|
||||
};
|
||||
var vec_pl = Type.Payload.Array{
|
||||
.base = .{ .tag = .vector },
|
||||
.data = .{
|
||||
.len = @divExact(128, ty_bits),
|
||||
.elem_type = ty,
|
||||
.len = @divExact(abi_size * 8, scalar_bits),
|
||||
.elem_type = Type.initPayload(&int_pl.base),
|
||||
},
|
||||
};
|
||||
const vec_ty = Type.initPayload(&vec_pl.base);
|
||||
|
||||
var sign_pl = Value.Payload.SubValue{
|
||||
.base = .{ .tag = .repeated },
|
||||
.data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*),
|
||||
const sign_val = switch (tag) {
|
||||
.neg => try vec_ty.minInt(stack.get(), self.target.*),
|
||||
.fabs => try vec_ty.maxInt(stack.get(), self.target.*),
|
||||
else => unreachable,
|
||||
};
|
||||
const sign_val = Value.initPayload(&sign_pl.base);
|
||||
|
||||
const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
|
||||
|
||||
const src_mcv = try self.resolveInst(un_op);
|
||||
const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
|
||||
src_mcv
|
||||
const sign_mem = if (sign_mcv.isMemory())
|
||||
sign_mcv.mem(Memory.PtrSize.fromSize(abi_size))
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
|
||||
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
|
||||
.base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) },
|
||||
});
|
||||
|
||||
const tag = self.air.instructions.items(.tag)[inst];
|
||||
try self.genBinOpMir(switch (ty_bits) {
|
||||
// No point using an extra prefix byte for *pd which performs the same operation.
|
||||
16, 32, 64, 128 => switch (tag) {
|
||||
.neg => .{ ._ps, .xor },
|
||||
.fabs => .{ ._ps, .andn },
|
||||
if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
|
||||
switch (scalar_bits) {
|
||||
16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
|
||||
.neg => .{ .vp_, .xor },
|
||||
.fabs => .{ .vp_, .@"and" },
|
||||
else => unreachable,
|
||||
} else switch (tag) {
|
||||
.neg => .{ .v_ps, .xor },
|
||||
.fabs => .{ .v_ps, .@"and" },
|
||||
else => unreachable,
|
||||
},
|
||||
32 => switch (tag) {
|
||||
.neg => .{ .v_ps, .xor },
|
||||
.fabs => .{ .v_ps, .@"and" },
|
||||
else => unreachable,
|
||||
},
|
||||
64 => switch (tag) {
|
||||
.neg => .{ .v_pd, .xor },
|
||||
.fabs => .{ .v_pd, .@"and" },
|
||||
else => unreachable,
|
||||
},
|
||||
80 => return self.fail("TODO implement airFloatSign for {}", .{
|
||||
ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => unreachable,
|
||||
},
|
||||
80 => return self.fail("TODO implement airFloatSign for {}", .{
|
||||
ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => unreachable,
|
||||
}, vec_ty, dst_mcv, sign_mcv);
|
||||
registerAlias(dst_reg, abi_size),
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv), abi_size),
|
||||
sign_mem,
|
||||
) else try self.asmRegisterMemory(
|
||||
switch (scalar_bits) {
|
||||
16, 128 => switch (tag) {
|
||||
.neg => .{ .p_, .xor },
|
||||
.fabs => .{ .p_, .@"and" },
|
||||
else => unreachable,
|
||||
},
|
||||
32 => switch (tag) {
|
||||
.neg => .{ ._ps, .xor },
|
||||
.fabs => .{ ._ps, .@"and" },
|
||||
else => unreachable,
|
||||
},
|
||||
64 => switch (tag) {
|
||||
.neg => .{ ._pd, .xor },
|
||||
.fabs => .{ ._pd, .@"and" },
|
||||
else => unreachable,
|
||||
},
|
||||
80 => return self.fail("TODO implement airFloatSign for {}", .{
|
||||
ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
else => unreachable,
|
||||
},
|
||||
registerAlias(dst_reg, abi_size),
|
||||
sign_mem,
|
||||
);
|
||||
return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
|
||||
}
|
||||
|
||||
@ -8593,7 +8668,6 @@ const MoveStrategy = union(enum) {
|
||||
const InsertExtract = struct {
|
||||
insert: Mir.Inst.FixedTag,
|
||||
extract: Mir.Inst.FixedTag,
|
||||
imm: Immediate,
|
||||
};
|
||||
};
|
||||
fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
|
||||
@ -8603,17 +8677,15 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
|
||||
16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_w, .insr },
|
||||
.extract = .{ .vp_w, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} } else .{ .insert_extract = .{
|
||||
.insert = .{ .p_w, .insr },
|
||||
.extract = .{ .p_w, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} },
|
||||
32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } },
|
||||
64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } },
|
||||
128 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
.Vector => switch (ty.childType().zigTypeTag()) {
|
||||
@ -8622,101 +8694,120 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
|
||||
1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_b, .insr },
|
||||
.extract = .{ .vp_b, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
|
||||
.insert = .{ .p_b, .insr },
|
||||
.extract = .{ .p_b, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} },
|
||||
2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_w, .insr },
|
||||
.extract = .{ .vp_w, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} } else .{ .insert_extract = .{
|
||||
.insert = .{ .p_w, .insr },
|
||||
.extract = .{ .p_w, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_ss, .mov }
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._ss, .mov } },
|
||||
.{ ._d, .mov } },
|
||||
5...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_sd, .mov }
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
.{ ._q, .mov } },
|
||||
9...16 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
17...32 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
16 => switch (ty.vectorLen()) {
|
||||
1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_w, .insr },
|
||||
.extract = .{ .vp_w, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} } else .{ .insert_extract = .{
|
||||
.insert = .{ .p_w, .insr },
|
||||
.extract = .{ .p_w, .extr },
|
||||
.imm = Immediate.u(0),
|
||||
} },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_ss, .mov }
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._ss, .mov } },
|
||||
.{ ._d, .mov } },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_sd, .mov }
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
.{ ._q, .mov } },
|
||||
5...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_ps, .mov }
|
||||
else
|
||||
.{ ._ps, .mov } },
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
9...16 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_ss, .mov }
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._ss, .mov } },
|
||||
.{ ._d, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_sd, .mov }
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
.{ ._q, .mov } },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
5...8 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
64 => switch (ty.vectorLen()) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_sd, .mov }
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
.{ ._q, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
3...4 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
128 => switch (ty.vectorLen()) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
2 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
256 => switch (ty.vectorLen()) {
|
||||
1 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
.Float => switch (ty.childType().floatBits(self.target.*)) {
|
||||
16 => switch (ty.vectorLen()) {
|
||||
1 => {},
|
||||
1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_w, .insr },
|
||||
.extract = .{ .vp_w, .extr },
|
||||
} } else .{ .insert_extract = .{
|
||||
.insert = .{ .p_w, .insr },
|
||||
.extract = .{ .p_w, .extr },
|
||||
} },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_ss, .mov }
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._ss, .mov } },
|
||||
.{ ._d, .mov } },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_sd, .mov }
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
.{ ._q, .mov } },
|
||||
5...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
9...16 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
32 => switch (ty.vectorLen()) {
|
||||
@ -8741,18 +8832,18 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
|
||||
if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
|
||||
else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
|
||||
3...4 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
|
||||
return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } },
|
||||
else => {},
|
||||
},
|
||||
128 => switch (ty.vectorLen()) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
2 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
|
||||
return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
else => {},
|
||||
@ -8860,29 +8951,69 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
);
|
||||
}
|
||||
},
|
||||
.register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister(
|
||||
if ((dst_reg.class() == .sse) == (src_reg.class() == .sse))
|
||||
switch (ty.zigTypeTag()) {
|
||||
else => .{ ._, .mov },
|
||||
.Float, .Vector => .{ ._ps, .mova },
|
||||
}
|
||||
else switch (abi_size) {
|
||||
2 => return try self.asmRegisterRegisterImmediate(
|
||||
if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr },
|
||||
registerAlias(dst_reg, 4),
|
||||
registerAlias(src_reg, 4),
|
||||
Immediate.u(0),
|
||||
.register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
|
||||
.general_purpose => switch (src_reg.class()) {
|
||||
.general_purpose => try self.asmRegisterRegister(
|
||||
.{ ._, .mov },
|
||||
registerAlias(dst_reg, abi_size),
|
||||
registerAlias(src_reg, abi_size),
|
||||
),
|
||||
4 => .{ ._d, .mov },
|
||||
8 => .{ ._q, .mov },
|
||||
else => return self.fail(
|
||||
"unsupported register copy from {s} to {s}",
|
||||
.{ @tagName(src_reg), @tagName(dst_reg) },
|
||||
.segment => try self.asmRegisterRegister(
|
||||
.{ ._, .mov },
|
||||
registerAlias(dst_reg, abi_size),
|
||||
src_reg,
|
||||
),
|
||||
.sse => try self.asmRegisterRegister(
|
||||
switch (abi_size) {
|
||||
1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
|
||||
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
|
||||
else => unreachable,
|
||||
},
|
||||
registerAlias(dst_reg, @max(abi_size, 4)),
|
||||
src_reg.to128(),
|
||||
),
|
||||
.x87, .mmx => unreachable,
|
||||
},
|
||||
registerAlias(dst_reg, abi_size),
|
||||
registerAlias(src_reg, abi_size),
|
||||
),
|
||||
.segment => try self.asmRegisterRegister(
|
||||
.{ ._, .mov },
|
||||
dst_reg,
|
||||
switch (src_reg.class()) {
|
||||
.general_purpose, .segment => registerAlias(src_reg, abi_size),
|
||||
.sse => try self.copyToTmpRegister(ty, src_mcv),
|
||||
.x87, .mmx => unreachable,
|
||||
},
|
||||
),
|
||||
.sse => switch (src_reg.class()) {
|
||||
.general_purpose => try self.asmRegisterRegister(
|
||||
switch (abi_size) {
|
||||
1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
|
||||
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
|
||||
else => unreachable,
|
||||
},
|
||||
dst_reg.to128(),
|
||||
registerAlias(src_reg, @max(abi_size, 4)),
|
||||
),
|
||||
.segment => try self.genSetReg(
|
||||
dst_reg,
|
||||
ty,
|
||||
.{ .register = try self.copyToTmpRegister(ty, src_mcv) },
|
||||
),
|
||||
.sse => try self.asmRegisterRegister(
|
||||
switch (ty.scalarType().zigTypeTag()) {
|
||||
else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
|
||||
32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
|
||||
64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
|
||||
},
|
||||
},
|
||||
registerAlias(dst_reg, abi_size),
|
||||
registerAlias(src_reg, abi_size),
|
||||
),
|
||||
.x87, .mmx => unreachable,
|
||||
},
|
||||
.x87, .mmx => unreachable,
|
||||
},
|
||||
.register_offset,
|
||||
.indirect,
|
||||
.load_frame,
|
||||
@ -8918,14 +9049,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
ie.insert,
|
||||
dst_alias,
|
||||
src_mem,
|
||||
ie.imm,
|
||||
Immediate.u(0),
|
||||
),
|
||||
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
|
||||
ie.insert,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
src_mem,
|
||||
ie.imm,
|
||||
Immediate.u(0),
|
||||
),
|
||||
}
|
||||
},
|
||||
@ -8947,14 +9078,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
ie.insert,
|
||||
dst_alias,
|
||||
src_mem,
|
||||
ie.imm,
|
||||
Immediate.u(0),
|
||||
),
|
||||
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
|
||||
ie.insert,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
src_mem,
|
||||
ie.imm,
|
||||
Immediate.u(0),
|
||||
),
|
||||
}
|
||||
},
|
||||
@ -8994,14 +9125,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
ie.insert,
|
||||
dst_alias,
|
||||
src_mem,
|
||||
ie.imm,
|
||||
Immediate.u(0),
|
||||
),
|
||||
.vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
|
||||
ie.insert,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
src_mem,
|
||||
ie.imm,
|
||||
Immediate.u(0),
|
||||
),
|
||||
}
|
||||
},
|
||||
@ -9129,7 +9260,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
|
||||
ie.extract,
|
||||
dst_mem,
|
||||
src_alias,
|
||||
ie.imm,
|
||||
Immediate.u(0),
|
||||
),
|
||||
}
|
||||
},
|
||||
@ -10499,7 +10630,7 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void {
|
||||
if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
|
||||
|
||||
const dst_mcv = try self.allocRegOrMem(inst, true);
|
||||
try self.genCopy(src_ty, dst_mcv, src_mcv);
|
||||
try self.genCopy(union_ty, dst_mcv, src_mcv);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
@ -11000,7 +11131,15 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
|
||||
reg.to64()
|
||||
else
|
||||
unreachable,
|
||||
.segment, .x87, .mmx => unreachable,
|
||||
.segment => if (size_bytes <= 2)
|
||||
reg
|
||||
else
|
||||
unreachable,
|
||||
.x87 => unreachable,
|
||||
.mmx => if (size_bytes <= 8)
|
||||
reg
|
||||
else
|
||||
unreachable,
|
||||
.sse => if (size_bytes <= 16)
|
||||
reg.to128()
|
||||
else if (size_bytes <= 32)
|
||||
|
||||
@ -261,7 +261,8 @@ pub const Mnemonic = enum {
|
||||
// X87
|
||||
fisttp, fld,
|
||||
// MMX
|
||||
movd,
|
||||
movd, movq,
|
||||
pand, pandn, por, pxor,
|
||||
// SSE
|
||||
addps, addss,
|
||||
andps,
|
||||
@ -293,7 +294,8 @@ pub const Mnemonic = enum {
|
||||
maxpd, maxsd,
|
||||
minpd, minsd,
|
||||
movapd,
|
||||
movq, //movd, movsd,
|
||||
movdqa, movdqu,
|
||||
//movsd,
|
||||
movupd,
|
||||
mulpd, mulsd,
|
||||
orpd,
|
||||
@ -316,6 +318,7 @@ pub const Mnemonic = enum {
|
||||
roundpd, roundps, roundsd, roundss,
|
||||
// AVX
|
||||
vaddpd, vaddps, vaddsd, vaddss,
|
||||
vandnpd, vandnps, vandpd, vandps,
|
||||
vbroadcastf128, vbroadcastsd, vbroadcastss,
|
||||
vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
|
||||
vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
|
||||
@ -327,22 +330,31 @@ pub const Mnemonic = enum {
|
||||
vmaxpd, vmaxps, vmaxsd, vmaxss,
|
||||
vminpd, vminps, vminsd, vminss,
|
||||
vmovapd, vmovaps,
|
||||
vmovddup, vmovhlps, vmovlhps,
|
||||
vmovd,
|
||||
vmovddup,
|
||||
vmovdqa, vmovdqu,
|
||||
vmovhlps, vmovlhps,
|
||||
vmovq,
|
||||
vmovsd,
|
||||
vmovshdup, vmovsldup,
|
||||
vmovss,
|
||||
vmovupd, vmovups,
|
||||
vmulpd, vmulps, vmulsd, vmulss,
|
||||
vorpd, vorps,
|
||||
vpand, vpandn,
|
||||
vpextrb, vpextrd, vpextrq, vpextrw,
|
||||
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
|
||||
vpor,
|
||||
vpshufhw, vpshuflw,
|
||||
vpsrld, vpsrlq, vpsrlw,
|
||||
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
|
||||
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
|
||||
vpxor,
|
||||
vroundpd, vroundps, vroundsd, vroundss,
|
||||
vshufpd, vshufps,
|
||||
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
|
||||
vsubpd, vsubps, vsubsd, vsubss,
|
||||
vxorpd, vxorps,
|
||||
// F16C
|
||||
vcvtph2ps, vcvtps2ph,
|
||||
// FMA
|
||||
|
||||
@ -236,6 +236,14 @@ pub const Inst = struct {
|
||||
|
||||
/// VEX-Encoded ___
|
||||
v_,
|
||||
/// VEX-Encoded ___ Byte
|
||||
v_b,
|
||||
/// VEX-Encoded ___ Word
|
||||
v_w,
|
||||
/// VEX-Encoded ___ Doubleword
|
||||
v_d,
|
||||
/// VEX-Encoded ___ QuadWord
|
||||
v_q,
|
||||
/// VEX-Encoded Packed ___
|
||||
vp_,
|
||||
/// VEX-Encoded Packed ___ Byte
|
||||
@ -526,6 +534,10 @@ pub const Inst = struct {
|
||||
cvttps2dq,
|
||||
/// Convert with truncation scalar double-precision floating-point value to doubleword integer
|
||||
cvttsd2si,
|
||||
/// Move aligned packed integer values
|
||||
movdqa,
|
||||
/// Move unaligned packed integer values
|
||||
movdqu,
|
||||
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
|
||||
/// Packed interleave shuffle of pairs of double-precision floating-point values
|
||||
shuf,
|
||||
|
||||
@ -970,11 +970,16 @@ pub const table = [_]Entry{
|
||||
.{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 },
|
||||
.{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
|
||||
|
||||
.{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 },
|
||||
.{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
|
||||
.{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 },
|
||||
|
||||
.{ .movdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .none, .sse2 },
|
||||
.{ .movdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .none, .sse2 },
|
||||
|
||||
.{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 },
|
||||
.{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 },
|
||||
|
||||
.{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 },
|
||||
.{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 },
|
||||
|
||||
@ -987,10 +992,16 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 },
|
||||
@ -1012,6 +1023,8 @@ pub const table = [_]Entry{
|
||||
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
|
||||
.{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pxor, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .none, .sse2 },
|
||||
|
||||
.{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 },
|
||||
@ -1070,6 +1083,18 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vandnpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vandnpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vandnps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vandnps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vandpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vandpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
|
||||
@ -1169,13 +1194,31 @@ pub const table = [_]Entry{
|
||||
.{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmovd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w0, .avx },
|
||||
.{ .vmovq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w1, .avx },
|
||||
.{ .vmovd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w0, .avx },
|
||||
.{ .vmovq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w1, .avx },
|
||||
|
||||
.{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmovdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovdqa, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovdqa, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmovdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovdqu, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovdqu, .mr, &.{ .ymm_m256, .ymm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
|
||||
@ -1212,6 +1255,16 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
|
||||
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
|
||||
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
|
||||
@ -1225,6 +1278,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx },
|
||||
.{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx },
|
||||
@ -1242,6 +1297,8 @@ pub const table = [_]Entry{
|
||||
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpxor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
@ -1278,6 +1335,12 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
// F16C
|
||||
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
|
||||
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
|
||||
@ -1313,6 +1376,12 @@ pub const table = [_]Entry{
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
|
||||
@ -1329,5 +1398,7 @@ pub const table = [_]Entry{
|
||||
.{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 },
|
||||
};
|
||||
// zig fmt: on
|
||||
|
||||
12
src/type.zig
12
src/type.zig
@ -5433,8 +5433,18 @@ pub const Type = extern union {
|
||||
}
|
||||
}
|
||||
|
||||
// Works for vectors and vectors of integers.
|
||||
pub fn maxInt(ty: Type, arena: Allocator, target: Target) !Value {
|
||||
const scalar = try maxIntScalar(ty.scalarType(), arena, target);
|
||||
if (ty.zigTypeTag() == .Vector and scalar.tag() != .the_only_possible_value) {
|
||||
return Value.Tag.repeated.create(arena, scalar);
|
||||
} else {
|
||||
return scalar;
|
||||
}
|
||||
}
|
||||
|
||||
/// Asserts that self.zigTypeTag() == .Int.
|
||||
pub fn maxInt(self: Type, arena: Allocator, target: Target) !Value {
|
||||
pub fn maxIntScalar(self: Type, arena: Allocator, target: Target) !Value {
|
||||
assert(self.zigTypeTag() == .Int);
|
||||
const info = self.intInfo(target);
|
||||
|
||||
|
||||
@ -532,7 +532,6 @@ fn testFabs() !void {
|
||||
|
||||
test "@fabs with vectors" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
|
||||
|
||||
@ -1612,7 +1612,6 @@ test "absFloat" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
|
||||
|
||||
|
||||
@ -65,7 +65,6 @@ test "cast negative integer to pointer" {
|
||||
|
||||
test "casting to union with a macro" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user