mirror of
https://github.com/ziglang/zig.git
synced 2025-12-17 19:53:06 +00:00
Merge pull request #22913 from jacobly0/x86_64-rewrite
x86_64: rewrite unsafe int vector multiplication
This commit is contained in:
commit
d2e70ef84a
File diff suppressed because it is too large
Load Diff
@ -78,7 +78,7 @@ pub fn findByMnemonic(
|
|||||||
),
|
),
|
||||||
.x86_64 => false,
|
.x86_64 => false,
|
||||||
},
|
},
|
||||||
inline .@"invpcid 64bit", .@"rdpid 64bit" => |tag| switch (target.cpu.arch) {
|
inline .@"invpcid 64bit", .@"rdpid 64bit", .@"prefetchi 64bit" => |tag| switch (target.cpu.arch) {
|
||||||
else => unreachable,
|
else => unreachable,
|
||||||
.x86 => false,
|
.x86 => false,
|
||||||
.x86_64 => std.Target.x86.featureSetHas(
|
.x86_64 => std.Target.x86.featureSetHas(
|
||||||
@ -86,6 +86,7 @@ pub fn findByMnemonic(
|
|||||||
@field(std.Target.x86.Feature, @tagName(tag)[0 .. @tagName(tag).len - " 64bit".len]),
|
@field(std.Target.x86.Feature, @tagName(tag)[0 .. @tagName(tag).len - " 64bit".len]),
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
.prefetch => std.Target.x86.featureSetHasAny(target.cpu.features, .{ .sse, .prfchw, .prefetchi, .prefetchwt1 }),
|
||||||
inline else => |tag| has_features: {
|
inline else => |tag| has_features: {
|
||||||
comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' ');
|
comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' ');
|
||||||
comptime var features: []const std.Target.x86.Feature = &.{};
|
comptime var features: []const std.Target.x86.Feature = &.{};
|
||||||
@ -375,6 +376,7 @@ pub const Mnemonic = enum {
|
|||||||
orps,
|
orps,
|
||||||
pextrw, pinsrw,
|
pextrw, pinsrw,
|
||||||
pmaxsw, pmaxub, pminsw, pminub, pmovmskb,
|
pmaxsw, pmaxub, pminsw, pminub, pmovmskb,
|
||||||
|
prefetchit0, prefetchit1, prefetchnta, prefetcht0, prefetcht1, prefetcht2, prefetchw, prefetchwt1,
|
||||||
shufps,
|
shufps,
|
||||||
sqrtps, sqrtss,
|
sqrtps, sqrtss,
|
||||||
stmxcsr,
|
stmxcsr,
|
||||||
@ -459,6 +461,7 @@ pub const Mnemonic = enum {
|
|||||||
vhaddpd, vhaddps,
|
vhaddpd, vhaddps,
|
||||||
vinsertf128, vinsertps,
|
vinsertf128, vinsertps,
|
||||||
vlddqu, vldmxcsr,
|
vlddqu, vldmxcsr,
|
||||||
|
vmaskmovpd, vmaskmovps,
|
||||||
vmaxpd, vmaxps, vmaxsd, vmaxss,
|
vmaxpd, vmaxps, vmaxsd, vmaxss,
|
||||||
vminpd, vminps, vminsd, vminss,
|
vminpd, vminps, vminsd, vminss,
|
||||||
vmovapd, vmovaps,
|
vmovapd, vmovaps,
|
||||||
@ -481,6 +484,7 @@ pub const Mnemonic = enum {
|
|||||||
vpblendvb, vpblendw, vpclmulqdq,
|
vpblendvb, vpblendw, vpclmulqdq,
|
||||||
vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw,
|
vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw,
|
||||||
vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw,
|
vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw,
|
||||||
|
vperm2f128, vpermilpd, vpermilps,
|
||||||
vpextrb, vpextrd, vpextrq, vpextrw,
|
vpextrb, vpextrd, vpextrq, vpextrw,
|
||||||
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
|
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
|
||||||
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
|
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
|
||||||
@ -521,6 +525,9 @@ pub const Mnemonic = enum {
|
|||||||
// AVX2
|
// AVX2
|
||||||
vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw,
|
vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw,
|
||||||
vextracti128, vinserti128, vpblendd,
|
vextracti128, vinserti128, vpblendd,
|
||||||
|
vperm2i128, vpermd, vpermpd, vpermps, vpermq,
|
||||||
|
vpmaskmovd, vpmaskmovq,
|
||||||
|
vpsllvd, vpsllvq, vpsravd, vpsrlvd, vpsrlvq,
|
||||||
// ADX
|
// ADX
|
||||||
adcx, adox,
|
adcx, adox,
|
||||||
// AESKLE
|
// AESKLE
|
||||||
@ -557,8 +564,7 @@ pub const Op = enum {
|
|||||||
r32_m8, r32_m16, r64_m16,
|
r32_m8, r32_m16, r64_m16,
|
||||||
m8, m16, m32, m64, m80, m128, m256,
|
m8, m16, m32, m64, m80, m128, m256,
|
||||||
rel8, rel16, rel32,
|
rel8, rel16, rel32,
|
||||||
m,
|
m, moffs, mrip8,
|
||||||
moffs,
|
|
||||||
sreg,
|
sreg,
|
||||||
st0, st, mm, mm_m64,
|
st0, st, mm, mm_m64,
|
||||||
xmm0, xmm, xmm_m8, xmm_m16, xmm_m32, xmm_m64, xmm_m128,
|
xmm0, xmm, xmm_m8, xmm_m16, xmm_m32, xmm_m64, xmm_m128,
|
||||||
@ -612,7 +618,7 @@ pub const Op = enum {
|
|||||||
|
|
||||||
.mem => |mem| switch (mem) {
|
.mem => |mem| switch (mem) {
|
||||||
.moffs => .moffs,
|
.moffs => .moffs,
|
||||||
.sib, .rip => switch (mem.bitSize(target)) {
|
.sib => switch (mem.bitSize(target)) {
|
||||||
0 => .m,
|
0 => .m,
|
||||||
8 => .m8,
|
8 => .m8,
|
||||||
16 => .m16,
|
16 => .m16,
|
||||||
@ -623,6 +629,16 @@ pub const Op = enum {
|
|||||||
256 => .m256,
|
256 => .m256,
|
||||||
else => unreachable,
|
else => unreachable,
|
||||||
},
|
},
|
||||||
|
.rip => switch (mem.bitSize(target)) {
|
||||||
|
0, 8 => .mrip8,
|
||||||
|
16 => .m16,
|
||||||
|
32 => .m32,
|
||||||
|
64 => .m64,
|
||||||
|
80 => .m80,
|
||||||
|
128 => .m128,
|
||||||
|
256 => .m256,
|
||||||
|
else => unreachable,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
.imm => |imm| switch (imm) {
|
.imm => |imm| switch (imm) {
|
||||||
@ -675,7 +691,7 @@ pub const Op = enum {
|
|||||||
|
|
||||||
pub fn immBitSize(op: Op) u64 {
|
pub fn immBitSize(op: Op) u64 {
|
||||||
return switch (op) {
|
return switch (op) {
|
||||||
.none, .moffs, .m, .sreg => unreachable,
|
.none, .m, .moffs, .mrip8, .sreg => unreachable,
|
||||||
.al, .cl, .dx, .rip, .eip, .ip, .r8, .rm8, .r32_m8 => unreachable,
|
.al, .cl, .dx, .rip, .eip, .ip, .r8, .rm8, .r32_m8 => unreachable,
|
||||||
.ax, .r16, .rm16 => unreachable,
|
.ax, .r16, .rm16 => unreachable,
|
||||||
.eax, .r32, .rm32, .r32_m16 => unreachable,
|
.eax, .r32, .rm32, .r32_m16 => unreachable,
|
||||||
@ -695,7 +711,7 @@ pub const Op = enum {
|
|||||||
|
|
||||||
pub fn regBitSize(op: Op) u64 {
|
pub fn regBitSize(op: Op) u64 {
|
||||||
return switch (op) {
|
return switch (op) {
|
||||||
.none, .moffs, .m, .sreg => unreachable,
|
.none, .m, .moffs, .mrip8, .sreg => unreachable,
|
||||||
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
|
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
|
||||||
.rel8, .rel16, .rel32 => unreachable,
|
.rel8, .rel16, .rel32 => unreachable,
|
||||||
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
|
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
|
||||||
@ -711,13 +727,13 @@ pub const Op = enum {
|
|||||||
|
|
||||||
pub fn memBitSize(op: Op) u64 {
|
pub fn memBitSize(op: Op) u64 {
|
||||||
return switch (op) {
|
return switch (op) {
|
||||||
.none, .moffs, .m, .sreg => unreachable,
|
.none, .m, .moffs, .sreg => unreachable,
|
||||||
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
|
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
|
||||||
.rel8, .rel16, .rel32 => unreachable,
|
.rel8, .rel16, .rel32 => unreachable,
|
||||||
.al, .cl, .r8, .ax, .dx, .ip, .r16, .eax, .eip, .r32, .rax, .rip, .r64 => unreachable,
|
.al, .cl, .r8, .ax, .dx, .ip, .r16, .eax, .eip, .r32, .rax, .rip, .r64 => unreachable,
|
||||||
.st0, .st, .mm, .xmm0, .xmm, .ymm => unreachable,
|
.st0, .st, .mm, .xmm0, .xmm, .ymm => unreachable,
|
||||||
.cr, .dr => unreachable,
|
.cr, .dr => unreachable,
|
||||||
.m8, .rm8, .r32_m8, .xmm_m8 => 8,
|
.mrip8, .m8, .rm8, .r32_m8, .xmm_m8 => 8,
|
||||||
.m16, .rm16, .r32_m16, .r64_m16, .xmm_m16 => 16,
|
.m16, .rm16, .r32_m16, .r64_m16, .xmm_m16 => 16,
|
||||||
.m32, .rm32, .xmm_m32 => 32,
|
.m32, .rm32, .xmm_m32 => 32,
|
||||||
.m64, .rm64, .mm_m64, .xmm_m64 => 64,
|
.m64, .rm64, .mm_m64, .xmm_m64 => 64,
|
||||||
@ -778,7 +794,7 @@ pub const Op = enum {
|
|||||||
.rm8, .rm16, .rm32, .rm64,
|
.rm8, .rm16, .rm32, .rm64,
|
||||||
.r32_m8, .r32_m16, .r64_m16,
|
.r32_m8, .r32_m16, .r64_m16,
|
||||||
.m8, .m16, .m32, .m64, .m80, .m128, .m256,
|
.m8, .m16, .m32, .m64, .m80, .m128, .m256,
|
||||||
.m,
|
.m, .moffs, .mrip8,
|
||||||
.mm_m64,
|
.mm_m64,
|
||||||
.xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128,
|
.xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128,
|
||||||
.ymm_m256,
|
.ymm_m256,
|
||||||
@ -816,11 +832,7 @@ pub const Op = enum {
|
|||||||
/// Given an operand `op` checks if `target` is a subset for the purposes of the encoding.
|
/// Given an operand `op` checks if `target` is a subset for the purposes of the encoding.
|
||||||
pub fn isSubset(op: Op, target: Op) bool {
|
pub fn isSubset(op: Op, target: Op) bool {
|
||||||
switch (op) {
|
switch (op) {
|
||||||
.moffs, .sreg => return op == target,
|
.none, .m, .moffs, .sreg => return op == target,
|
||||||
.none => switch (target) {
|
|
||||||
.none => return true,
|
|
||||||
else => return false,
|
|
||||||
},
|
|
||||||
else => {
|
else => {
|
||||||
if (op.isRegister() and target.isRegister()) {
|
if (op.isRegister() and target.isRegister()) {
|
||||||
return switch (target.toReg()) {
|
return switch (target.toReg()) {
|
||||||
@ -831,6 +843,7 @@ pub const Op = enum {
|
|||||||
if (op.isMemory() and target.isMemory()) {
|
if (op.isMemory() and target.isMemory()) {
|
||||||
switch (target) {
|
switch (target) {
|
||||||
.m => return true,
|
.m => return true,
|
||||||
|
.moffs, .mrip8 => return op == target,
|
||||||
else => return op.memBitSize() == target.memBitSize(),
|
else => return op.memBitSize() == target.memBitSize(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -957,6 +970,10 @@ pub const Feature = enum {
|
|||||||
@"pclmul avx",
|
@"pclmul avx",
|
||||||
pku,
|
pku,
|
||||||
popcnt,
|
popcnt,
|
||||||
|
prefetch,
|
||||||
|
@"prefetchi 64bit",
|
||||||
|
prefetchwt1,
|
||||||
|
prfchw,
|
||||||
rdrnd,
|
rdrnd,
|
||||||
rdseed,
|
rdseed,
|
||||||
@"rdpid 32bit",
|
@"rdpid 32bit",
|
||||||
@ -997,7 +1014,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
|
|||||||
}
|
}
|
||||||
|
|
||||||
const mnemonic_to_encodings_map = init: {
|
const mnemonic_to_encodings_map = init: {
|
||||||
@setEvalBranchQuota(5_600);
|
@setEvalBranchQuota(5_800);
|
||||||
const mnemonic_count = @typeInfo(Mnemonic).@"enum".fields.len;
|
const mnemonic_count = @typeInfo(Mnemonic).@"enum".fields.len;
|
||||||
var mnemonic_map: [mnemonic_count][]Data = @splat(&.{});
|
var mnemonic_map: [mnemonic_count][]Data = @splat(&.{});
|
||||||
const encodings = @import("encodings.zig");
|
const encodings = @import("encodings.zig");
|
||||||
|
|||||||
@ -34,8 +34,18 @@ pub const Inst = struct {
|
|||||||
/// ___ 4
|
/// ___ 4
|
||||||
_4,
|
_4,
|
||||||
|
|
||||||
|
/// ___ With NTA Hint
|
||||||
|
_nta,
|
||||||
/// System Call ___
|
/// System Call ___
|
||||||
sys_,
|
sys_,
|
||||||
|
/// ___ With T0 Hint
|
||||||
|
_t0,
|
||||||
|
/// ___ With T1 Hint
|
||||||
|
_t1,
|
||||||
|
/// ___ With T2 Hint
|
||||||
|
_t2,
|
||||||
|
/// ___ With Intent to Write and T1 Hint
|
||||||
|
_wt1,
|
||||||
|
|
||||||
/// ___ crement Shadow Stack Pointer Doubleword
|
/// ___ crement Shadow Stack Pointer Doubleword
|
||||||
_csspd,
|
_csspd,
|
||||||
@ -198,6 +208,7 @@ pub const Inst = struct {
|
|||||||
//_b,
|
//_b,
|
||||||
/// ___ Word
|
/// ___ Word
|
||||||
/// ___ For Writing
|
/// ___ For Writing
|
||||||
|
/// ___ With Intent to Write
|
||||||
_w,
|
_w,
|
||||||
/// ___ Doubleword
|
/// ___ Doubleword
|
||||||
//_d,
|
//_d,
|
||||||
@ -756,6 +767,8 @@ pub const Inst = struct {
|
|||||||
/// Swap GS base register
|
/// Swap GS base register
|
||||||
swapgs,
|
swapgs,
|
||||||
/// Test condition
|
/// Test condition
|
||||||
|
/// Logical compare
|
||||||
|
/// Packed bit test
|
||||||
@"test",
|
@"test",
|
||||||
/// Undefined instruction
|
/// Undefined instruction
|
||||||
ud,
|
ud,
|
||||||
@ -973,6 +986,9 @@ pub const Inst = struct {
|
|||||||
/// Move unaligned packed single-precision floating-point values
|
/// Move unaligned packed single-precision floating-point values
|
||||||
/// Move unaligned packed double-precision floating-point values
|
/// Move unaligned packed double-precision floating-point values
|
||||||
movu,
|
movu,
|
||||||
|
/// Prefetch data into caches
|
||||||
|
/// Prefetch data into caches with intent to write
|
||||||
|
prefetch,
|
||||||
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
|
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
|
||||||
/// Packed interleave shuffle of pairs of double-precision floating-point values
|
/// Packed interleave shuffle of pairs of double-precision floating-point values
|
||||||
/// Shuffle packed doublewords
|
/// Shuffle packed doublewords
|
||||||
@ -1053,6 +1069,7 @@ pub const Inst = struct {
|
|||||||
/// Blend scalar single-precision floating-point values
|
/// Blend scalar single-precision floating-point values
|
||||||
/// Blend packed double-precision floating-point values
|
/// Blend packed double-precision floating-point values
|
||||||
/// Blend scalar double-precision floating-point values
|
/// Blend scalar double-precision floating-point values
|
||||||
|
/// Blend packed dwords
|
||||||
blend,
|
blend,
|
||||||
/// Variable blend packed single-precision floating-point values
|
/// Variable blend packed single-precision floating-point values
|
||||||
/// Variable blend scalar single-precision floating-point values
|
/// Variable blend scalar single-precision floating-point values
|
||||||
@ -1127,20 +1144,37 @@ pub const Inst = struct {
|
|||||||
sha256rnds,
|
sha256rnds,
|
||||||
|
|
||||||
// AVX
|
// AVX
|
||||||
|
/// Load with broadcast floating-point data
|
||||||
|
/// Load integer and broadcast
|
||||||
|
broadcast,
|
||||||
|
/// Conditional SIMD packed loads and stores
|
||||||
|
/// Condition SIMD integer packed loads and stores
|
||||||
|
maskmov,
|
||||||
|
/// Permute floating-point values
|
||||||
|
/// Permute integer values
|
||||||
|
perm2,
|
||||||
|
/// Permute in-lane pairs of double-precision floating-point values
|
||||||
|
/// Permute in-lane quadruples of single-precision floating-point values
|
||||||
|
permil,
|
||||||
|
|
||||||
|
// BMI
|
||||||
/// Bit field extract
|
/// Bit field extract
|
||||||
bextr,
|
bextr,
|
||||||
/// Extract lowest set isolated bit
|
/// Extract lowest set isolated bit
|
||||||
/// Get mask up to lowest set bit
|
/// Get mask up to lowest set bit
|
||||||
/// Reset lowest set bit
|
/// Reset lowest set bit
|
||||||
bls,
|
bls,
|
||||||
/// Load with broadcast floating-point data
|
|
||||||
/// Load integer and broadcast
|
|
||||||
broadcast,
|
|
||||||
/// Zero high bits starting with specified bit position
|
|
||||||
bzhi,
|
|
||||||
/// Count the number of trailing zero bits
|
/// Count the number of trailing zero bits
|
||||||
tzcnt,
|
tzcnt,
|
||||||
|
|
||||||
|
// BMI2
|
||||||
|
/// Zero high bits starting with specified bit position
|
||||||
|
bzhi,
|
||||||
|
/// Parallel bits deposit
|
||||||
|
pdep,
|
||||||
|
/// Parallel bits extract
|
||||||
|
pext,
|
||||||
|
|
||||||
// F16C
|
// F16C
|
||||||
/// Convert 16-bit floating-point values to single-precision floating-point values
|
/// Convert 16-bit floating-point values to single-precision floating-point values
|
||||||
cvtph2,
|
cvtph2,
|
||||||
@ -1164,6 +1198,19 @@ pub const Inst = struct {
|
|||||||
/// Fused multiply-add of scalar double-precision floating-point values
|
/// Fused multiply-add of scalar double-precision floating-point values
|
||||||
fmadd231,
|
fmadd231,
|
||||||
|
|
||||||
|
// AVX2
|
||||||
|
/// Permute packed doubleword elements
|
||||||
|
/// Permute packed qword elements
|
||||||
|
/// Permute double-precision floating-point elements
|
||||||
|
/// Permute single-precision floating-point elements
|
||||||
|
perm,
|
||||||
|
/// Variable bit shift left logical
|
||||||
|
sllv,
|
||||||
|
/// Variable bit shift right arithmetic
|
||||||
|
srav,
|
||||||
|
/// Variable bit shift right logical
|
||||||
|
srlv,
|
||||||
|
|
||||||
// ADX
|
// ADX
|
||||||
/// Unsigned integer addition of two operands with overflow flag
|
/// Unsigned integer addition of two operands with overflow flag
|
||||||
ado,
|
ado,
|
||||||
|
|||||||
@ -1370,6 +1370,18 @@ pub const table = [_]Entry{
|
|||||||
.{ .pmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
|
.{ .pmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
|
||||||
.{ .pmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
|
.{ .pmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
|
||||||
|
|
||||||
|
.{ .prefetchit0, .m, &.{ .mrip8 }, &.{ 0x0f, 0x18 }, 7, .none, .@"prefetchi 64bit" },
|
||||||
|
.{ .prefetchit1, .m, &.{ .mrip8 }, &.{ 0x0f, 0x18 }, 6, .none, .@"prefetchi 64bit" },
|
||||||
|
|
||||||
|
.{ .prefetchnta, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 0, .none, .prefetch },
|
||||||
|
.{ .prefetcht0, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 1, .none, .prefetch },
|
||||||
|
.{ .prefetcht1, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 2, .none, .prefetch },
|
||||||
|
.{ .prefetcht2, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 3, .none, .prefetch },
|
||||||
|
|
||||||
|
.{ .prefetchw, .m, &.{ .m8 }, &.{ 0x0f, 0x0d }, 1, .none, .prfchw },
|
||||||
|
|
||||||
|
.{ .prefetchwt1, .m, &.{ .m8 }, &.{ 0x0f, 0x0d }, 2, .none, .prefetchwt1 },
|
||||||
|
|
||||||
.{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse },
|
.{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse },
|
||||||
|
|
||||||
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
|
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
|
||||||
@ -1932,6 +1944,15 @@ pub const table = [_]Entry{
|
|||||||
|
|
||||||
.{ .vldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .vex_lz_wig, .avx },
|
.{ .vldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .vex_lz_wig, .avx },
|
||||||
|
|
||||||
|
.{ .vmaskmovps, .rvm, &.{ .xmm, .xmm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x2c }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vmaskmovps, .rvm, &.{ .ymm, .ymm, .m256 }, &.{ 0x66, 0x0f, 0x38, 0x2c }, 0, .vex_256_w0, .avx },
|
||||||
|
.{ .vmaskmovpd, .rvm, &.{ .xmm, .xmm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x2d }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vmaskmovpd, .rvm, &.{ .ymm, .ymm, .m256 }, &.{ 0x66, 0x0f, 0x38, 0x2d }, 0, .vex_256_w0, .avx },
|
||||||
|
.{ .vmaskmovps, .mvr, &.{ .m128, .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x2e }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vmaskmovps, .mvr, &.{ .m256, .ymm, .ymm }, &.{ 0x66, 0x0f, 0x38, 0x2e }, 0, .vex_256_w0, .avx },
|
||||||
|
.{ .vmaskmovpd, .mvr, &.{ .m128, .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x2f }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vmaskmovpd, .mvr, &.{ .m256, .ymm, .ymm }, &.{ 0x66, 0x0f, 0x38, 0x2f }, 0, .vex_256_w0, .avx },
|
||||||
|
|
||||||
.{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
|
.{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
|
||||||
.{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
|
.{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
|
||||||
|
|
||||||
@ -2097,6 +2118,18 @@ pub const table = [_]Entry{
|
|||||||
|
|
||||||
.{ .vpcmpgtq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_128_wig, .avx },
|
.{ .vpcmpgtq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_128_wig, .avx },
|
||||||
|
|
||||||
|
.{ .vperm2f128, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x06 }, 0, .vex_256_w0, .avx },
|
||||||
|
|
||||||
|
.{ .vpermilpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x0d }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vpermilpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x0d }, 0, .vex_256_w0, .avx },
|
||||||
|
.{ .vpermilpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x05 }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vpermilpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x05 }, 0, .vex_256_w0, .avx },
|
||||||
|
|
||||||
|
.{ .vpermilpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x0c }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vpermilps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x04 }, 0, .vex_128_w0, .avx },
|
||||||
|
.{ .vpermilps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x0c }, 0, .vex_256_w0, .avx },
|
||||||
|
.{ .vpermilps, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x04 }, 0, .vex_256_w0, .avx },
|
||||||
|
|
||||||
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
|
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
|
||||||
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
|
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
|
||||||
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
|
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
|
||||||
@ -2418,6 +2451,25 @@ pub const table = [_]Entry{
|
|||||||
|
|
||||||
.{ .vpcmpgtq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpcmpgtq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_256_wig, .avx2 },
|
||||||
|
|
||||||
|
.{ .vperm2i128, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x46 }, 0, .vex_256_w0, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpermd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x36 }, 0, .vex_256_w0, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpermpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x01 }, 0, .vex_256_w1, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpermps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x16 }, 0, .vex_256_w0, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpermq, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x00 }, 0, .vex_256_w1, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpmaskmovd, .rvm, &.{ .xmm, .xmm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_128_w0, .avx2 },
|
||||||
|
.{ .vpmaskmovd, .rvm, &.{ .ymm, .ymm, .m256 }, &.{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_256_w0, .avx2 },
|
||||||
|
.{ .vpmaskmovq, .rvm, &.{ .xmm, .xmm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_128_w1, .avx2 },
|
||||||
|
.{ .vpmaskmovq, .rvm, &.{ .ymm, .ymm, .m256 }, &.{ 0x66, 0x0f, 0x38, 0x8c }, 0, .vex_256_w1, .avx2 },
|
||||||
|
.{ .vpmaskmovd, .mvr, &.{ .m128, .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x8e }, 0, .vex_128_w0, .avx2 },
|
||||||
|
.{ .vpmaskmovd, .mvr, &.{ .m256, .ymm, .ymm }, &.{ 0x66, 0x0f, 0x38, 0x8e }, 0, .vex_256_w0, .avx2 },
|
||||||
|
.{ .vpmaskmovq, .mvr, &.{ .m128, .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x8e }, 0, .vex_128_w1, .avx2 },
|
||||||
|
.{ .vpmaskmovq, .mvr, &.{ .m256, .ymm, .ymm }, &.{ 0x66, 0x0f, 0x38, 0x8e }, 0, .vex_256_w1, .avx2 },
|
||||||
|
|
||||||
.{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx2 },
|
.{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx2 },
|
||||||
.{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx2 },
|
.{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx2 },
|
||||||
.{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx2 },
|
.{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx2 },
|
||||||
@ -2477,11 +2529,19 @@ pub const table = [_]Entry{
|
|||||||
|
|
||||||
.{ .vpslldq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 7, .vex_256_wig, .avx2 },
|
.{ .vpslldq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 7, .vex_256_wig, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpsllvd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x47 }, 0, .vex_128_w0, .avx2 },
|
||||||
|
.{ .vpsllvq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x47 }, 0, .vex_128_w1, .avx2 },
|
||||||
|
.{ .vpsllvd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x47 }, 0, .vex_256_w0, .avx2 },
|
||||||
|
.{ .vpsllvq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x47 }, 0, .vex_256_w1, .avx2 },
|
||||||
|
|
||||||
.{ .vpsraw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe1 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpsraw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe1 }, 0, .vex_256_wig, .avx2 },
|
||||||
.{ .vpsraw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 4, .vex_256_wig, .avx2 },
|
.{ .vpsraw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 4, .vex_256_wig, .avx2 },
|
||||||
.{ .vpsrad, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe2 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpsrad, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe2 }, 0, .vex_256_wig, .avx2 },
|
||||||
.{ .vpsrad, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 4, .vex_256_wig, .avx2 },
|
.{ .vpsrad, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 4, .vex_256_wig, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpsravd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x46 }, 0, .vex_128_w0, .avx2 },
|
||||||
|
.{ .vpsravd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x46 }, 0, .vex_256_w0, .avx2 },
|
||||||
|
|
||||||
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
|
||||||
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
|
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
|
||||||
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
|
||||||
@ -2489,7 +2549,12 @@ pub const table = [_]Entry{
|
|||||||
.{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 },
|
||||||
.{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 },
|
.{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 },
|
||||||
|
|
||||||
.{ .vpsrldq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 3, .vex_128_wig, .avx2 },
|
.{ .vpsrldq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 3, .vex_256_wig, .avx2 },
|
||||||
|
|
||||||
|
.{ .vpsrlvd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x45 }, 0, .vex_128_w0, .avx2 },
|
||||||
|
.{ .vpsrlvq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x45 }, 0, .vex_128_w1, .avx2 },
|
||||||
|
.{ .vpsrlvd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x45 }, 0, .vex_256_w0, .avx2 },
|
||||||
|
.{ .vpsrlvq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x45 }, 0, .vex_256_w1, .avx2 },
|
||||||
|
|
||||||
.{ .vpsubb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpsubb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_256_wig, .avx2 },
|
||||||
.{ .vpsubw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_256_wig, .avx2 },
|
.{ .vpsubw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_256_wig, .avx2 },
|
||||||
|
|||||||
@ -55,6 +55,17 @@ fn DoubleBits(comptime Type: type) type {
|
|||||||
.vector => |vector| @Vector(vector.len, ResultScalar),
|
.vector => |vector| @Vector(vector.len, ResultScalar),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
fn RoundBitsUp(comptime Type: type, comptime multiple: u16) type {
|
||||||
|
const ResultScalar = switch (@typeInfo(Scalar(Type))) {
|
||||||
|
.int => |int| @Type(.{ .int = .{ .signedness = int.signedness, .bits = std.mem.alignForward(u16, int.bits, multiple) } }),
|
||||||
|
.float => Scalar(Type),
|
||||||
|
else => @compileError(@typeName(Type)),
|
||||||
|
};
|
||||||
|
return switch (@typeInfo(Type)) {
|
||||||
|
else => ResultScalar,
|
||||||
|
.vector => |vector| @Vector(vector.len, ResultScalar),
|
||||||
|
};
|
||||||
|
}
|
||||||
// inline to avoid a runtime `@splat`
|
// inline to avoid a runtime `@splat`
|
||||||
inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type {
|
inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type {
|
||||||
return switch (@typeInfo(Type)) {
|
return switch (@typeInfo(Type)) {
|
||||||
@ -17962,6 +17973,78 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
|
|||||||
try testArgs(f128, nan(f128), nan(f128));
|
try testArgs(f128, nan(f128), nan(f128));
|
||||||
}
|
}
|
||||||
fn testIntVectors() !void {
|
fn testIntVectors() !void {
|
||||||
|
try testArgs(@Vector(1, i4), .{
|
||||||
|
0x1,
|
||||||
|
}, .{
|
||||||
|
0x3,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(2, i4), .{
|
||||||
|
-0x1, 0x7,
|
||||||
|
}, .{
|
||||||
|
-0x7, 0x6,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(4, i4), .{
|
||||||
|
-0x1, 0x2, -0x3, -0x6,
|
||||||
|
}, .{
|
||||||
|
-0x2, -0x6, -0x4, 0x1,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(8, i4), .{
|
||||||
|
-0x4, 0x6, -0x4, -0x1, -0x1, 0x6, 0x5, 0x2,
|
||||||
|
}, .{
|
||||||
|
0x2, 0x4, -0x3, -0x6, 0x1, -0x5, -0x1, 0x2,
|
||||||
|
});
|
||||||
|
// workaround https://github.com/ziglang/zig/issues/22914
|
||||||
|
// TODO: try testArgs(@Vector(16, i4), .{
|
||||||
|
// 0x4, 0x1, -0x7, -0x2, -0x7, 0x4, -0x4, -0x8, -0x1, 0x0, -0x8, 0x5, -0x5, 0x3, 0x3, 0x2,
|
||||||
|
// }, .{
|
||||||
|
// 0x7, -0x7, -0x6, -0x1, 0x3, -0x5, -0x3, -0x6, 0x4, 0x4, -0x2, 0x7, -0x2, 0x6, -0x4, -0x1,
|
||||||
|
// });
|
||||||
|
try testArgs(@Vector(16, i4), .{
|
||||||
|
0x7, -0x7, -0x6, -0x1, 0x3, -0x5, -0x3, -0x6, 0x4, 0x4, -0x2, 0x7, -0x2, 0x6, -0x4, -0x1,
|
||||||
|
}, .{
|
||||||
|
0x4, 0x1, -0x7, -0x2, -0x7, 0x4, -0x4, -0x8, -0x1, 0x1, -0x8, 0x5, -0x5, 0x3, 0x3, 0x2,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(32, i4), .{
|
||||||
|
0x0, 0x4, 0x0, -0x6, -0x7, 0x4, -0x3, 0x4, -0x5, 0x2, 0x3, 0x2, -0x6, -0x4, -0x4, -0x3,
|
||||||
|
0x7, -0x5, -0x3, 0x2, -0x4, 0x4, -0x1, 0x6, -0x7, -0x1, -0x6, -0x2, -0x4, -0x2, 0x5, 0x0,
|
||||||
|
}, .{
|
||||||
|
0x5, 0x1, 0x5, 0x7, 0x1, -0x3, 0x3, 0x3, 0x5, 0x4, 0x1, 0x5, 0x4, -0x8, -0x3, -0x6,
|
||||||
|
-0x2, 0x3, 0x1, 0x2, 0x4, 0x4, -0x8, 0x2, 0x6, -0x1, 0x1, 0x3, -0x1, -0x3, 0x7, -0x7,
|
||||||
|
});
|
||||||
|
|
||||||
|
try testArgs(@Vector(1, u4), .{
|
||||||
|
0xe,
|
||||||
|
}, .{
|
||||||
|
0xc,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(2, u4), .{
|
||||||
|
0x2, 0x5,
|
||||||
|
}, .{
|
||||||
|
0x9, 0xe,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(4, u4), .{
|
||||||
|
0x2, 0xb, 0xc, 0x7,
|
||||||
|
}, .{
|
||||||
|
0x2, 0xa, 0x8, 0x1,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(8, u4), .{
|
||||||
|
0xf, 0x9, 0x0, 0x6, 0x8, 0x7, 0xd, 0x7,
|
||||||
|
}, .{
|
||||||
|
0xb, 0xb, 0x3, 0x6, 0x1, 0x5, 0x4, 0xd,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(16, u4), .{
|
||||||
|
0x5, 0x1, 0xa, 0x6, 0xb, 0x3, 0x0, 0x7, 0x8, 0x0, 0x9, 0xe, 0x2, 0x9, 0x2, 0x5,
|
||||||
|
}, .{
|
||||||
|
0x4, 0x9, 0x4, 0x8, 0x5, 0x7, 0xf, 0x8, 0x3, 0xc, 0x6, 0x9, 0xd, 0xd, 0x2, 0xd,
|
||||||
|
});
|
||||||
|
try testArgs(@Vector(32, u4), .{
|
||||||
|
0xa, 0x5, 0xd, 0x4, 0xe, 0xf, 0xf, 0x2, 0xb, 0x3, 0x9, 0x2, 0x1, 0x9, 0x6, 0x8,
|
||||||
|
0x7, 0xc, 0x3, 0x5, 0x4, 0xb, 0x5, 0x4, 0x8, 0x2, 0x5, 0x9, 0xf, 0x6, 0x7, 0x7,
|
||||||
|
}, .{
|
||||||
|
0xb, 0xf, 0xf, 0xf, 0xb, 0xf, 0xd, 0xc, 0x1, 0xa, 0x1, 0xd, 0x7, 0x4, 0x4, 0x8,
|
||||||
|
0x2, 0xb, 0xb, 0x4, 0xa, 0x7, 0x6, 0xd, 0xb, 0xb, 0x6, 0xb, 0x1, 0x8, 0xa, 0x6,
|
||||||
|
});
|
||||||
|
|
||||||
try testArgs(@Vector(1, i8), .{
|
try testArgs(@Vector(1, i8), .{
|
||||||
-0x54,
|
-0x54,
|
||||||
}, .{
|
}, .{
|
||||||
@ -19013,6 +19096,7 @@ inline fn mulUnsafe(comptime Type: type, lhs: Type, rhs: Type) DoubleBits(Type)
|
|||||||
test mulUnsafe {
|
test mulUnsafe {
|
||||||
const test_mul_unsafe = binary(mulUnsafe, .{});
|
const test_mul_unsafe = binary(mulUnsafe, .{});
|
||||||
try test_mul_unsafe.testInts();
|
try test_mul_unsafe.testInts();
|
||||||
|
try test_mul_unsafe.testIntVectors();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) {
|
inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) {
|
||||||
@ -19189,6 +19273,14 @@ test clz {
|
|||||||
try test_clz.testIntVectors();
|
try test_clz.testIntVectors();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline fn byteSwap(comptime Type: type, rhs: Type) RoundBitsUp(Type, 8) {
|
||||||
|
return @byteSwap(@as(RoundBitsUp(Type, 8), rhs));
|
||||||
|
}
|
||||||
|
test byteSwap {
|
||||||
|
const test_byte_swap = unary(byteSwap, .{});
|
||||||
|
try test_byte_swap.testInts();
|
||||||
|
}
|
||||||
|
|
||||||
inline fn sqrt(comptime Type: type, rhs: Type) @TypeOf(@sqrt(rhs)) {
|
inline fn sqrt(comptime Type: type, rhs: Type) @TypeOf(@sqrt(rhs)) {
|
||||||
return @sqrt(rhs);
|
return @sqrt(rhs);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user