mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
lib/headers: update to clang 10.x C headers
upstream revision: 3cce3790072249cbe51b96cea26bc78019c11fd0
This commit is contained in:
parent
97b2ac598b
commit
74872263cc
10
lib/include/__clang_cuda_intrinsics.h
vendored
10
lib/include/__clang_cuda_intrinsics.h
vendored
@ -211,7 +211,15 @@ inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
|
|||||||
return __nvvm_vote_ballot_sync(mask, pred);
|
return __nvvm_vote_ballot_sync(mask, pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
|
inline __device__ unsigned int __activemask() {
|
||||||
|
#if CUDA_VERSION < 9020
|
||||||
|
return __nvvm_vote_ballot(1);
|
||||||
|
#else
|
||||||
|
unsigned int mask;
|
||||||
|
asm volatile("activemask.b32 %0;" : "=r"(mask));
|
||||||
|
return mask;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
|
inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
|
||||||
return __nvvm_fns(mask, base, offset);
|
return __nvvm_fns(mask, base, offset);
|
||||||
|
|||||||
143
lib/include/altivec.h
vendored
143
lib/include/altivec.h
vendored
@ -2761,8 +2761,8 @@ static __inline__ vector double __ATTRS_o_ai vec_xl_len(double *__a,
|
|||||||
return (vector double)__builtin_vsx_lxvl(__a, (__b << 56));
|
return (vector double)__builtin_vsx_lxvl(__a, (__b << 56));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ vector double __ATTRS_o_ai vec_xl_len_r(unsigned char *__a,
|
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||||
size_t __b) {
|
vec_xl_len_r(unsigned char *__a, size_t __b) {
|
||||||
vector unsigned char __res =
|
vector unsigned char __res =
|
||||||
(vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56));
|
(vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56));
|
||||||
#ifdef __LITTLE_ENDIAN__
|
#ifdef __LITTLE_ENDIAN__
|
||||||
@ -2876,9 +2876,10 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
|
|||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
#define vec_ctf(__a, __b) \
|
#define vec_ctf(__a, __b) \
|
||||||
_Generic((__a), vector int \
|
_Generic((__a), vector int \
|
||||||
: (vector float)__builtin_altivec_vcfsx((__a), (__b)), \
|
: (vector float)__builtin_altivec_vcfsx((vector int)(__a), (__b)), \
|
||||||
vector unsigned int \
|
vector unsigned int \
|
||||||
: (vector float)__builtin_altivec_vcfux((vector int)(__a), (__b)), \
|
: (vector float)__builtin_altivec_vcfux((vector unsigned int)(__a), \
|
||||||
|
(__b)), \
|
||||||
vector unsigned long long \
|
vector unsigned long long \
|
||||||
: (__builtin_convertvector((vector unsigned long long)(__a), \
|
: (__builtin_convertvector((vector unsigned long long)(__a), \
|
||||||
vector double) * \
|
vector double) * \
|
||||||
@ -2892,9 +2893,10 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
|
|||||||
#else
|
#else
|
||||||
#define vec_ctf(__a, __b) \
|
#define vec_ctf(__a, __b) \
|
||||||
_Generic((__a), vector int \
|
_Generic((__a), vector int \
|
||||||
: (vector float)__builtin_altivec_vcfsx((__a), (__b)), \
|
: (vector float)__builtin_altivec_vcfsx((vector int)(__a), (__b)), \
|
||||||
vector unsigned int \
|
vector unsigned int \
|
||||||
: (vector float)__builtin_altivec_vcfux((vector int)(__a), (__b)))
|
: (vector float)__builtin_altivec_vcfux((vector unsigned int)(__a), \
|
||||||
|
(__b)))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* vec_vcfsx */
|
/* vec_vcfsx */
|
||||||
@ -2910,10 +2912,11 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
|
|||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
#define vec_cts(__a, __b) \
|
#define vec_cts(__a, __b) \
|
||||||
_Generic((__a), vector float \
|
_Generic((__a), vector float \
|
||||||
: __builtin_altivec_vctsxs((__a), (__b)), vector double \
|
: __builtin_altivec_vctsxs((vector float)(__a), (__b)), \
|
||||||
|
vector double \
|
||||||
: __extension__({ \
|
: __extension__({ \
|
||||||
vector double __ret = \
|
vector double __ret = \
|
||||||
(__a) * \
|
(vector double)(__a) * \
|
||||||
(vector double)(vector unsigned long long)((0x3ffULL + (__b)) \
|
(vector double)(vector unsigned long long)((0x3ffULL + (__b)) \
|
||||||
<< 52); \
|
<< 52); \
|
||||||
__builtin_convertvector(__ret, vector signed long long); \
|
__builtin_convertvector(__ret, vector signed long long); \
|
||||||
@ -2931,10 +2934,11 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
|
|||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
#define vec_ctu(__a, __b) \
|
#define vec_ctu(__a, __b) \
|
||||||
_Generic((__a), vector float \
|
_Generic((__a), vector float \
|
||||||
: __builtin_altivec_vctuxs((__a), (__b)), vector double \
|
: __builtin_altivec_vctuxs((vector float)(__a), (__b)), \
|
||||||
|
vector double \
|
||||||
: __extension__({ \
|
: __extension__({ \
|
||||||
vector double __ret = \
|
vector double __ret = \
|
||||||
(__a) * \
|
(vector double)(__a) * \
|
||||||
(vector double)(vector unsigned long long)((0x3ffULL + __b) \
|
(vector double)(vector unsigned long long)((0x3ffULL + __b) \
|
||||||
<< 52); \
|
<< 52); \
|
||||||
__builtin_convertvector(__ret, vector unsigned long long); \
|
__builtin_convertvector(__ret, vector unsigned long long); \
|
||||||
@ -3286,9 +3290,7 @@ static __inline__ vector double __ATTRS_o_ai vec_div(vector double __a,
|
|||||||
|
|
||||||
/* vec_dss */
|
/* vec_dss */
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__)) vec_dss(int __a) {
|
#define vec_dss __builtin_altivec_dss
|
||||||
__builtin_altivec_dss(__a);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* vec_dssall */
|
/* vec_dssall */
|
||||||
|
|
||||||
@ -6301,19 +6303,20 @@ static __inline__ vector float __ATTRS_o_ai vec_or(vector float __a,
|
|||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
static __inline__ vector double __ATTRS_o_ai vec_or(vector bool long long __a,
|
static __inline__ vector double __ATTRS_o_ai vec_or(vector bool long long __a,
|
||||||
vector double __b) {
|
vector double __b) {
|
||||||
return (vector unsigned long long)__a | (vector unsigned long long)__b;
|
return (vector double)((vector unsigned long long)__a |
|
||||||
|
(vector unsigned long long)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a,
|
static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a,
|
||||||
vector bool long long __b) {
|
vector bool long long __b) {
|
||||||
return (vector unsigned long long)__a | (vector unsigned long long)__b;
|
return (vector double)((vector unsigned long long)__a |
|
||||||
|
(vector unsigned long long)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a,
|
static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a,
|
||||||
vector double __b) {
|
vector double __b) {
|
||||||
vector unsigned long long __res =
|
return (vector double)((vector unsigned long long)__a |
|
||||||
(vector unsigned long long)__a | (vector unsigned long long)__b;
|
(vector unsigned long long)__b);
|
||||||
return (vector double)__res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ vector signed long long __ATTRS_o_ai
|
static __inline__ vector signed long long __ATTRS_o_ai
|
||||||
@ -14781,7 +14784,7 @@ static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
|
|||||||
static __inline__ int __ATTRS_o_ai vec_all_ne(vector float __a,
|
static __inline__ int __ATTRS_o_ai vec_all_ne(vector float __a,
|
||||||
vector float __b) {
|
vector float __b) {
|
||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b);
|
return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ, __a, __b);
|
||||||
#else
|
#else
|
||||||
return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b);
|
return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b);
|
||||||
#endif
|
#endif
|
||||||
@ -16361,27 +16364,32 @@ vec_xl(signed long long __offset, unsigned char *__ptr) {
|
|||||||
|
|
||||||
static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset,
|
static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset,
|
||||||
signed short *__ptr) {
|
signed short *__ptr) {
|
||||||
return *(unaligned_vec_sshort *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_sshort *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai vector unsigned short
|
static inline __ATTRS_o_ai vector unsigned short
|
||||||
vec_xl(signed long long __offset, unsigned short *__ptr) {
|
vec_xl(signed long long __offset, unsigned short *__ptr) {
|
||||||
return *(unaligned_vec_ushort *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_ushort *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset,
|
static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset,
|
||||||
signed int *__ptr) {
|
signed int *__ptr) {
|
||||||
return *(unaligned_vec_sint *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_sint *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset,
|
static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset,
|
||||||
unsigned int *__ptr) {
|
unsigned int *__ptr) {
|
||||||
return *(unaligned_vec_uint *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_uint *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset,
|
static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset,
|
||||||
float *__ptr) {
|
float *__ptr) {
|
||||||
return *(unaligned_vec_float *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_float *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
@ -16391,17 +16399,20 @@ typedef vector double unaligned_vec_double __attribute__((aligned(1)));
|
|||||||
|
|
||||||
static inline __ATTRS_o_ai vector signed long long
|
static inline __ATTRS_o_ai vector signed long long
|
||||||
vec_xl(signed long long __offset, signed long long *__ptr) {
|
vec_xl(signed long long __offset, signed long long *__ptr) {
|
||||||
return *(unaligned_vec_sll *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_sll *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai vector unsigned long long
|
static inline __ATTRS_o_ai vector unsigned long long
|
||||||
vec_xl(signed long long __offset, unsigned long long *__ptr) {
|
vec_xl(signed long long __offset, unsigned long long *__ptr) {
|
||||||
return *(unaligned_vec_ull *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_ull *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset,
|
static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset,
|
||||||
double *__ptr) {
|
double *__ptr) {
|
||||||
return *(unaligned_vec_double *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_double *)__addr;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -16411,12 +16422,14 @@ typedef vector unsigned __int128 unaligned_vec_ui128
|
|||||||
__attribute__((aligned(1)));
|
__attribute__((aligned(1)));
|
||||||
static inline __ATTRS_o_ai vector signed __int128
|
static inline __ATTRS_o_ai vector signed __int128
|
||||||
vec_xl(signed long long __offset, signed __int128 *__ptr) {
|
vec_xl(signed long long __offset, signed __int128 *__ptr) {
|
||||||
return *(unaligned_vec_si128 *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_si128 *)__addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai vector unsigned __int128
|
static inline __ATTRS_o_ai vector unsigned __int128
|
||||||
vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
|
vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
|
||||||
return *(unaligned_vec_ui128 *)(__ptr + __offset);
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
return *(unaligned_vec_ui128 *)__addr;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -16425,27 +16438,27 @@ vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
|
|||||||
#ifdef __LITTLE_ENDIAN__
|
#ifdef __LITTLE_ENDIAN__
|
||||||
static __inline__ vector signed char __ATTRS_o_ai
|
static __inline__ vector signed char __ATTRS_o_ai
|
||||||
vec_xl_be(signed long long __offset, signed char *__ptr) {
|
vec_xl_be(signed long long __offset, signed char *__ptr) {
|
||||||
vector signed char __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr);
|
vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr);
|
||||||
return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
||||||
13, 12, 11, 10, 9, 8);
|
13, 12, 11, 10, 9, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ vector unsigned char __ATTRS_o_ai
|
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||||
vec_xl_be(signed long long __offset, unsigned char *__ptr) {
|
vec_xl_be(signed long long __offset, unsigned char *__ptr) {
|
||||||
vector unsigned char __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr);
|
vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr);
|
||||||
return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
||||||
13, 12, 11, 10, 9, 8);
|
13, 12, 11, 10, 9, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ vector signed short __ATTRS_o_ai
|
static __inline__ vector signed short __ATTRS_o_ai
|
||||||
vec_xl_be(signed long long __offset, signed short *__ptr) {
|
vec_xl_be(signed long long __offset, signed short *__ptr) {
|
||||||
vector signed short __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr);
|
vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr);
|
||||||
return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ vector unsigned short __ATTRS_o_ai
|
static __inline__ vector unsigned short __ATTRS_o_ai
|
||||||
vec_xl_be(signed long long __offset, unsigned short *__ptr) {
|
vec_xl_be(signed long long __offset, unsigned short *__ptr) {
|
||||||
vector unsigned short __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr);
|
vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr);
|
||||||
return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -16513,50 +16526,58 @@ static inline __ATTRS_o_ai void vec_xst(vector unsigned char __vec,
|
|||||||
static inline __ATTRS_o_ai void vec_xst(vector signed short __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector signed short __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
signed short *__ptr) {
|
signed short *__ptr) {
|
||||||
*(unaligned_vec_sshort *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_sshort *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
unsigned short *__ptr) {
|
unsigned short *__ptr) {
|
||||||
*(unaligned_vec_ushort *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_ushort *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector signed int __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector signed int __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
signed int *__ptr) {
|
signed int *__ptr) {
|
||||||
*(unaligned_vec_sint *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_sint *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
unsigned int *__ptr) {
|
unsigned int *__ptr) {
|
||||||
*(unaligned_vec_uint *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_uint *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector float __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector float __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
float *__ptr) {
|
float *__ptr) {
|
||||||
*(unaligned_vec_float *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_float *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
signed long long *__ptr) {
|
signed long long *__ptr) {
|
||||||
*(unaligned_vec_sll *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_sll *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
unsigned long long *__ptr) {
|
unsigned long long *__ptr) {
|
||||||
*(unaligned_vec_ull *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_ull *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector double __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector double __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
double *__ptr) {
|
double *__ptr) {
|
||||||
*(unaligned_vec_double *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_double *)__addr = __vec;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -16564,13 +16585,15 @@ static inline __ATTRS_o_ai void vec_xst(vector double __vec,
|
|||||||
static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
signed __int128 *__ptr) {
|
signed __int128 *__ptr) {
|
||||||
*(unaligned_vec_si128 *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_si128 *)__addr = __vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec,
|
static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
unsigned __int128 *__ptr) {
|
unsigned __int128 *__ptr) {
|
||||||
*(unaligned_vec_ui128 *)(__ptr + __offset) = __vec;
|
signed char *__addr = (signed char *)__ptr + __offset;
|
||||||
|
*(unaligned_vec_ui128 *)__addr = __vec;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -16583,7 +16606,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed char __vec,
|
|||||||
vector signed char __tmp =
|
vector signed char __tmp =
|
||||||
__builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
__builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
||||||
13, 12, 11, 10, 9, 8);
|
13, 12, 11, 10, 9, 8);
|
||||||
__builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr);
|
typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double;
|
||||||
|
__builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned char __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned char __vec,
|
||||||
@ -16592,7 +16616,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned char __vec,
|
|||||||
vector unsigned char __tmp =
|
vector unsigned char __tmp =
|
||||||
__builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
__builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
||||||
13, 12, 11, 10, 9, 8);
|
13, 12, 11, 10, 9, 8);
|
||||||
__builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr);
|
typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double;
|
||||||
|
__builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed short __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed short __vec,
|
||||||
@ -16600,7 +16625,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed short __vec,
|
|||||||
signed short *__ptr) {
|
signed short *__ptr) {
|
||||||
vector signed short __tmp =
|
vector signed short __tmp =
|
||||||
__builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
__builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
||||||
__builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr);
|
typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double;
|
||||||
|
__builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned short __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned short __vec,
|
||||||
@ -16608,7 +16634,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned short __vec,
|
|||||||
unsigned short *__ptr) {
|
unsigned short *__ptr) {
|
||||||
vector unsigned short __tmp =
|
vector unsigned short __tmp =
|
||||||
__builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
__builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
|
||||||
__builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr);
|
typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double;
|
||||||
|
__builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed int __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed int __vec,
|
||||||
@ -16620,32 +16647,32 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed int __vec,
|
|||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned int __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned int __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
unsigned int *__ptr) {
|
unsigned int *__ptr) {
|
||||||
__builtin_vsx_stxvw4x_be(__vec, __offset, __ptr);
|
__builtin_vsx_stxvw4x_be((vector int)__vec, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector float __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector float __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
float *__ptr) {
|
float *__ptr) {
|
||||||
__builtin_vsx_stxvw4x_be(__vec, __offset, __ptr);
|
__builtin_vsx_stxvw4x_be((vector int)__vec, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed long long __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed long long __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
signed long long *__ptr) {
|
signed long long *__ptr) {
|
||||||
__builtin_vsx_stxvd2x_be(__vec, __offset, __ptr);
|
__builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned long long __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned long long __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
unsigned long long *__ptr) {
|
unsigned long long *__ptr) {
|
||||||
__builtin_vsx_stxvd2x_be(__vec, __offset, __ptr);
|
__builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __ATTRS_o_ai vec_xst_be(vector double __vec,
|
static __inline__ void __ATTRS_o_ai vec_xst_be(vector double __vec,
|
||||||
signed long long __offset,
|
signed long long __offset,
|
||||||
double *__ptr) {
|
double *__ptr) {
|
||||||
__builtin_vsx_stxvd2x_be(__vec, __offset, __ptr);
|
__builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -16668,12 +16695,12 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned __int128 __vec,
|
|||||||
|
|
||||||
#ifdef __POWER9_VECTOR__
|
#ifdef __POWER9_VECTOR__
|
||||||
#define vec_test_data_class(__a, __b) \
|
#define vec_test_data_class(__a, __b) \
|
||||||
_Generic((__a), \
|
_Generic( \
|
||||||
vector float: \
|
(__a), vector float \
|
||||||
(vector bool int)__builtin_vsx_xvtstdcsp((__a), (__b)), \
|
: (vector bool int)__builtin_vsx_xvtstdcsp((vector float)(__a), (__b)), \
|
||||||
vector double: \
|
vector double \
|
||||||
(vector bool long long)__builtin_vsx_xvtstdcdp((__a), (__b)) \
|
: (vector bool long long)__builtin_vsx_xvtstdcdp((vector double)(__a), \
|
||||||
)
|
(__b)))
|
||||||
|
|
||||||
#endif /* #ifdef __POWER9_VECTOR__ */
|
#endif /* #ifdef __POWER9_VECTOR__ */
|
||||||
|
|
||||||
|
|||||||
50
lib/include/arm_acle.h
vendored
50
lib/include/arm_acle.h
vendored
@ -90,9 +90,11 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* 8.7 NOP */
|
/* 8.7 NOP */
|
||||||
|
#if !defined(_MSC_VER) || !defined(__aarch64__)
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
|
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
|
||||||
__builtin_arm_nop();
|
__builtin_arm_nop();
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* 9 DATA-PROCESSING INTRINSICS */
|
/* 9 DATA-PROCESSING INTRINSICS */
|
||||||
/* 9.2 Miscellaneous data-processing intrinsics */
|
/* 9.2 Miscellaneous data-processing intrinsics */
|
||||||
@ -139,6 +141,26 @@ __clzll(uint64_t __t) {
|
|||||||
return __builtin_clzll(__t);
|
return __builtin_clzll(__t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* CLS */
|
||||||
|
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||||
|
__cls(uint32_t __t) {
|
||||||
|
return __builtin_arm_cls(__t);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||||
|
__clsl(unsigned long __t) {
|
||||||
|
#if __SIZEOF_LONG__ == 4
|
||||||
|
return __builtin_arm_cls(__t);
|
||||||
|
#else
|
||||||
|
return __builtin_arm_cls64(__t);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||||
|
__clsll(uint64_t __t) {
|
||||||
|
return __builtin_arm_cls64(__t);
|
||||||
|
}
|
||||||
|
|
||||||
/* REV */
|
/* REV */
|
||||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||||
__rev(uint32_t __t) {
|
__rev(uint32_t __t) {
|
||||||
@ -609,11 +631,15 @@ __jcvt(double __a) {
|
|||||||
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
|
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
|
||||||
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
|
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
|
||||||
#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
|
#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
|
||||||
|
#define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg))
|
||||||
|
#define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg))
|
||||||
#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
|
#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
|
||||||
#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
|
#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
|
||||||
#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
|
#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
|
||||||
|
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
|
||||||
|
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
|
||||||
|
|
||||||
// Memory Tagging Extensions (MTE) Intrinsics
|
/* Memory Tagging Extensions (MTE) Intrinsics */
|
||||||
#if __ARM_FEATURE_MEMORY_TAGGING
|
#if __ARM_FEATURE_MEMORY_TAGGING
|
||||||
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
|
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
|
||||||
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
|
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
|
||||||
@ -623,6 +649,28 @@ __jcvt(double __a) {
|
|||||||
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
|
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Transactional Memory Extension (TME) Intrinsics */
|
||||||
|
#if __ARM_FEATURE_TME
|
||||||
|
|
||||||
|
#define _TMFAILURE_REASON 0x00007fffu
|
||||||
|
#define _TMFAILURE_RTRY 0x00008000u
|
||||||
|
#define _TMFAILURE_CNCL 0x00010000u
|
||||||
|
#define _TMFAILURE_MEM 0x00020000u
|
||||||
|
#define _TMFAILURE_IMP 0x00040000u
|
||||||
|
#define _TMFAILURE_ERR 0x00080000u
|
||||||
|
#define _TMFAILURE_SIZE 0x00100000u
|
||||||
|
#define _TMFAILURE_NEST 0x00200000u
|
||||||
|
#define _TMFAILURE_DBG 0x00400000u
|
||||||
|
#define _TMFAILURE_INT 0x00800000u
|
||||||
|
#define _TMFAILURE_TRIVIAL 0x01000000u
|
||||||
|
|
||||||
|
#define __tstart() __builtin_arm_tstart()
|
||||||
|
#define __tcommit() __builtin_arm_tcommit()
|
||||||
|
#define __tcancel(__arg) __builtin_arm_tcancel(__arg)
|
||||||
|
#define __ttest() __builtin_arm_ttest()
|
||||||
|
|
||||||
|
#endif /* __ARM_FEATURE_TME */
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
217
lib/include/arm_cmse.h
vendored
Normal file
217
lib/include/arm_cmse.h
vendored
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
//===---- arm_cmse.h - Arm CMSE support -----------------------------------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef __ARM_CMSE_H
|
||||||
|
#define __ARM_CMSE_H
|
||||||
|
|
||||||
|
#if (__ARM_FEATURE_CMSE & 0x1)
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define __ARM_CMSE_SECURE_MODE (__ARM_FEATURE_CMSE & 0x2)
|
||||||
|
#define CMSE_MPU_READWRITE 1 /* checks if readwrite_ok field is set */
|
||||||
|
#define CMSE_AU_NONSECURE 2 /* checks if permissions have secure field unset */
|
||||||
|
#define CMSE_MPU_UNPRIV 4 /* sets T flag on TT insrtuction */
|
||||||
|
#define CMSE_MPU_READ 8 /* checks if read_ok field is set */
|
||||||
|
#define CMSE_MPU_NONSECURE 16 /* sets A flag, checks if secure field unset */
|
||||||
|
#define CMSE_NONSECURE (CMSE_AU_NONSECURE | CMSE_MPU_NONSECURE)
|
||||||
|
|
||||||
|
#define cmse_check_pointed_object(p, f) \
|
||||||
|
cmse_check_address_range((p), sizeof(*(p)), (f))
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef union {
|
||||||
|
struct cmse_address_info {
|
||||||
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
|
/* __ARM_BIG_ENDIAN */
|
||||||
|
#if (__ARM_CMSE_SECURE_MODE)
|
||||||
|
unsigned idau_region : 8;
|
||||||
|
unsigned idau_region_valid : 1;
|
||||||
|
unsigned secure : 1;
|
||||||
|
unsigned nonsecure_readwrite_ok : 1;
|
||||||
|
unsigned nonsecure_read_ok : 1;
|
||||||
|
#else
|
||||||
|
unsigned : 12;
|
||||||
|
#endif
|
||||||
|
unsigned readwrite_ok : 1;
|
||||||
|
unsigned read_ok : 1;
|
||||||
|
#if (__ARM_CMSE_SECURE_MODE)
|
||||||
|
unsigned sau_region_valid : 1;
|
||||||
|
#else
|
||||||
|
unsigned : 1;
|
||||||
|
#endif
|
||||||
|
unsigned mpu_region_valid : 1;
|
||||||
|
#if (__ARM_CMSE_SECURE_MODE)
|
||||||
|
unsigned sau_region : 8;
|
||||||
|
#else
|
||||||
|
unsigned : 8;
|
||||||
|
#endif
|
||||||
|
unsigned mpu_region : 8;
|
||||||
|
|
||||||
|
#else /* __ARM_LITTLE_ENDIAN */
|
||||||
|
unsigned mpu_region : 8;
|
||||||
|
#if (__ARM_CMSE_SECURE_MODE)
|
||||||
|
unsigned sau_region : 8;
|
||||||
|
#else
|
||||||
|
unsigned : 8;
|
||||||
|
#endif
|
||||||
|
unsigned mpu_region_valid : 1;
|
||||||
|
#if (__ARM_CMSE_SECURE_MODE)
|
||||||
|
unsigned sau_region_valid : 1;
|
||||||
|
#else
|
||||||
|
unsigned : 1;
|
||||||
|
#endif
|
||||||
|
unsigned read_ok : 1;
|
||||||
|
unsigned readwrite_ok : 1;
|
||||||
|
#if (__ARM_CMSE_SECURE_MODE)
|
||||||
|
unsigned nonsecure_read_ok : 1;
|
||||||
|
unsigned nonsecure_readwrite_ok : 1;
|
||||||
|
unsigned secure : 1;
|
||||||
|
unsigned idau_region_valid : 1;
|
||||||
|
unsigned idau_region : 8;
|
||||||
|
#else
|
||||||
|
unsigned : 12;
|
||||||
|
#endif
|
||||||
|
#endif /*__ARM_LITTLE_ENDIAN */
|
||||||
|
} flags;
|
||||||
|
unsigned value;
|
||||||
|
} cmse_address_info_t;
|
||||||
|
|
||||||
|
static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
|
||||||
|
cmse_TT(void *__p) {
|
||||||
|
cmse_address_info_t __u;
|
||||||
|
__u.value = __builtin_arm_cmse_TT(__p);
|
||||||
|
return __u;
|
||||||
|
}
|
||||||
|
static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
|
||||||
|
cmse_TTT(void *__p) {
|
||||||
|
cmse_address_info_t __u;
|
||||||
|
__u.value = __builtin_arm_cmse_TTT(__p);
|
||||||
|
return __u;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __ARM_CMSE_SECURE_MODE
|
||||||
|
static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
|
||||||
|
cmse_TTA(void *__p) {
|
||||||
|
cmse_address_info_t __u;
|
||||||
|
__u.value = __builtin_arm_cmse_TTA(__p);
|
||||||
|
return __u;
|
||||||
|
}
|
||||||
|
static cmse_address_info_t __attribute__((__always_inline__, __nodebug__))
|
||||||
|
cmse_TTAT(void *__p) {
|
||||||
|
cmse_address_info_t __u;
|
||||||
|
__u.value = __builtin_arm_cmse_TTAT(__p);
|
||||||
|
return __u;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define cmse_TT_fptr(p) cmse_TT(__builtin_bit_cast(void *, (p)))
|
||||||
|
#define cmse_TTT_fptr(p) cmse_TTT(__builtin_bit_cast(void *, (p)))
|
||||||
|
|
||||||
|
#if __ARM_CMSE_SECURE_MODE
|
||||||
|
#define cmse_TTA_fptr(p) cmse_TTA(__builtin_bit_cast(void *, (p)))
|
||||||
|
#define cmse_TTAT_fptr(p) cmse_TTAT(__builtin_bit_cast(void *, (p)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void *__attribute__((__always_inline__))
|
||||||
|
cmse_check_address_range(void *__pb, size_t __s, int __flags) {
|
||||||
|
uintptr_t __begin = (uintptr_t)__pb;
|
||||||
|
uintptr_t __end = __begin + __s - 1;
|
||||||
|
|
||||||
|
if (__end < __begin)
|
||||||
|
return NULL; /* wrap around check */
|
||||||
|
|
||||||
|
/* Check whether the range crosses a 32-bytes aligned address */
|
||||||
|
const int __single_check = (__begin ^ __end) < 0x20u;
|
||||||
|
|
||||||
|
/* execute the right variant of the TT instructions */
|
||||||
|
void *__pe = (void *)__end;
|
||||||
|
cmse_address_info_t __permb, __perme;
|
||||||
|
switch (__flags & (CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) {
|
||||||
|
case 0:
|
||||||
|
__permb = cmse_TT(__pb);
|
||||||
|
__perme = __single_check ? __permb : cmse_TT(__pe);
|
||||||
|
break;
|
||||||
|
case CMSE_MPU_UNPRIV:
|
||||||
|
__permb = cmse_TTT(__pb);
|
||||||
|
__perme = __single_check ? __permb : cmse_TTT(__pe);
|
||||||
|
break;
|
||||||
|
#if __ARM_CMSE_SECURE_MODE
|
||||||
|
case CMSE_MPU_NONSECURE:
|
||||||
|
__permb = cmse_TTA(__pb);
|
||||||
|
__perme = __single_check ? __permb : cmse_TTA(__pe);
|
||||||
|
break;
|
||||||
|
case CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE:
|
||||||
|
__permb = cmse_TTAT(__pb);
|
||||||
|
__perme = __single_check ? __permb : cmse_TTAT(__pe);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
/* if CMSE_NONSECURE is specified w/o __ARM_CMSE_SECURE_MODE */
|
||||||
|
default:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check that the range does not cross MPU, SAU, or IDAU region boundaries */
|
||||||
|
if (__permb.value != __perme.value)
|
||||||
|
return NULL;
|
||||||
|
#if !(__ARM_CMSE_SECURE_MODE)
|
||||||
|
/* CMSE_AU_NONSECURE is only supported when __ARM_FEATURE_CMSE & 0x2 */
|
||||||
|
if (__flags & CMSE_AU_NONSECURE)
|
||||||
|
return NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* check the permission on the range */
|
||||||
|
switch (__flags & ~(CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) {
|
||||||
|
#if (__ARM_CMSE_SECURE_MODE)
|
||||||
|
case CMSE_MPU_READ | CMSE_MPU_READWRITE | CMSE_AU_NONSECURE:
|
||||||
|
case CMSE_MPU_READWRITE | CMSE_AU_NONSECURE:
|
||||||
|
return __permb.flags.nonsecure_readwrite_ok ? __pb : NULL;
|
||||||
|
|
||||||
|
case CMSE_MPU_READ | CMSE_AU_NONSECURE:
|
||||||
|
return __permb.flags.nonsecure_read_ok ? __pb : NULL;
|
||||||
|
|
||||||
|
case CMSE_AU_NONSECURE:
|
||||||
|
return __permb.flags.secure ? NULL : __pb;
|
||||||
|
#endif
|
||||||
|
case CMSE_MPU_READ | CMSE_MPU_READWRITE:
|
||||||
|
case CMSE_MPU_READWRITE:
|
||||||
|
return __permb.flags.readwrite_ok ? __pb : NULL;
|
||||||
|
|
||||||
|
case CMSE_MPU_READ:
|
||||||
|
return __permb.flags.read_ok ? __pb : NULL;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __ARM_CMSE_SECURE_MODE
|
||||||
|
static int __attribute__((__always_inline__, __nodebug__))
|
||||||
|
cmse_nonsecure_caller(void) {
|
||||||
|
return !((uintptr_t)__builtin_return_address(0) & 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define cmse_nsfptr_create(p) \
|
||||||
|
__builtin_bit_cast(__typeof__(p), \
|
||||||
|
(__builtin_bit_cast(uintptr_t, p) & ~(uintptr_t)1))
|
||||||
|
|
||||||
|
#define cmse_is_nsfptr(p) ((__builtin_bit_cast(uintptr_t, p) & 1) == 0)
|
||||||
|
|
||||||
|
#endif /* __ARM_CMSE_SECURE_MODE */
|
||||||
|
|
||||||
|
void __attribute__((__noreturn__)) cmse_abort(void);
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* (__ARM_FEATURE_CMSE & 0x1) */
|
||||||
|
|
||||||
|
#endif /* __ARM_CMSE_H */
|
||||||
983
lib/include/arm_fp16.h
vendored
983
lib/include/arm_fp16.h
vendored
File diff suppressed because it is too large
Load Diff
12563
lib/include/arm_mve.h
vendored
Normal file
12563
lib/include/arm_mve.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
16829
lib/include/arm_neon.h
vendored
16829
lib/include/arm_neon.h
vendored
File diff suppressed because it is too large
Load Diff
12
lib/include/avx512bwintrin.h
vendored
12
lib/include/avx512bwintrin.h
vendored
@ -1731,13 +1731,13 @@ _mm512_loadu_epi16 (void const *__P)
|
|||||||
struct __loadu_epi16 {
|
struct __loadu_epi16 {
|
||||||
__m512i_u __v;
|
__m512i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi16*)__P)->__v;
|
return ((const struct __loadu_epi16*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||||
_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
|
_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
|
return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
|
||||||
(__v32hi) __W,
|
(__v32hi) __W,
|
||||||
(__mmask32) __U);
|
(__mmask32) __U);
|
||||||
}
|
}
|
||||||
@ -1745,7 +1745,7 @@ _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
|
|||||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||||
_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
|
_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
|
return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
|
||||||
(__v32hi)
|
(__v32hi)
|
||||||
_mm512_setzero_si512 (),
|
_mm512_setzero_si512 (),
|
||||||
(__mmask32) __U);
|
(__mmask32) __U);
|
||||||
@ -1757,13 +1757,13 @@ _mm512_loadu_epi8 (void const *__P)
|
|||||||
struct __loadu_epi8 {
|
struct __loadu_epi8 {
|
||||||
__m512i_u __v;
|
__m512i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi8*)__P)->__v;
|
return ((const struct __loadu_epi8*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||||
_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
|
_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
|
return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
|
||||||
(__v64qi) __W,
|
(__v64qi) __W,
|
||||||
(__mmask64) __U);
|
(__mmask64) __U);
|
||||||
}
|
}
|
||||||
@ -1771,7 +1771,7 @@ _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
|
|||||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||||
_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
|
_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
|
return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
|
||||||
(__v64qi)
|
(__v64qi)
|
||||||
_mm512_setzero_si512 (),
|
_mm512_setzero_si512 (),
|
||||||
(__mmask64) __U);
|
(__mmask64) __U);
|
||||||
|
|||||||
55
lib/include/avx512fintrin.h
vendored
55
lib/include/avx512fintrin.h
vendored
@ -4305,7 +4305,7 @@ _mm512_loadu_si512 (void const *__P)
|
|||||||
struct __loadu_si512 {
|
struct __loadu_si512 {
|
||||||
__m512i_u __v;
|
__m512i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_si512*)__P)->__v;
|
return ((const struct __loadu_si512*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||||
@ -4314,7 +4314,7 @@ _mm512_loadu_epi32 (void const *__P)
|
|||||||
struct __loadu_epi32 {
|
struct __loadu_epi32 {
|
||||||
__m512i_u __v;
|
__m512i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi32*)__P)->__v;
|
return ((const struct __loadu_epi32*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||||
@ -4341,7 +4341,7 @@ _mm512_loadu_epi64 (void const *__P)
|
|||||||
struct __loadu_epi64 {
|
struct __loadu_epi64 {
|
||||||
__m512i_u __v;
|
__m512i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi64*)__P)->__v;
|
return ((const struct __loadu_epi64*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||||
@ -4401,7 +4401,7 @@ _mm512_loadu_pd(void const *__p)
|
|||||||
struct __loadu_pd {
|
struct __loadu_pd {
|
||||||
__m512d_u __v;
|
__m512d_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_pd*)__p)->__v;
|
return ((const struct __loadu_pd*)__p)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512 __DEFAULT_FN_ATTRS512
|
static __inline __m512 __DEFAULT_FN_ATTRS512
|
||||||
@ -4410,13 +4410,13 @@ _mm512_loadu_ps(void const *__p)
|
|||||||
struct __loadu_ps {
|
struct __loadu_ps {
|
||||||
__m512_u __v;
|
__m512_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_ps*)__p)->__v;
|
return ((const struct __loadu_ps*)__p)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512 __DEFAULT_FN_ATTRS512
|
static __inline __m512 __DEFAULT_FN_ATTRS512
|
||||||
_mm512_load_ps(void const *__p)
|
_mm512_load_ps(void const *__p)
|
||||||
{
|
{
|
||||||
return *(__m512*)__p;
|
return *(const __m512*)__p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512 __DEFAULT_FN_ATTRS512
|
static __inline __m512 __DEFAULT_FN_ATTRS512
|
||||||
@ -4439,7 +4439,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
|
|||||||
static __inline __m512d __DEFAULT_FN_ATTRS512
|
static __inline __m512d __DEFAULT_FN_ATTRS512
|
||||||
_mm512_load_pd(void const *__p)
|
_mm512_load_pd(void const *__p)
|
||||||
{
|
{
|
||||||
return *(__m512d*)__p;
|
return *(const __m512d*)__p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512d __DEFAULT_FN_ATTRS512
|
static __inline __m512d __DEFAULT_FN_ATTRS512
|
||||||
@ -4462,19 +4462,19 @@ _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
|
|||||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||||
_mm512_load_si512 (void const *__P)
|
_mm512_load_si512 (void const *__P)
|
||||||
{
|
{
|
||||||
return *(__m512i *) __P;
|
return *(const __m512i *) __P;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||||
_mm512_load_epi32 (void const *__P)
|
_mm512_load_epi32 (void const *__P)
|
||||||
{
|
{
|
||||||
return *(__m512i *) __P;
|
return *(const __m512i *) __P;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||||
_mm512_load_epi64 (void const *__P)
|
_mm512_load_epi64 (void const *__P)
|
||||||
{
|
{
|
||||||
return *(__m512i *) __P;
|
return *(const __m512i *) __P;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* SIMD store ops */
|
/* SIMD store ops */
|
||||||
@ -7658,13 +7658,13 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
|
|||||||
#define _mm512_i32gather_ps(index, addr, scale) \
|
#define _mm512_i32gather_ps(index, addr, scale) \
|
||||||
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
|
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
|
||||||
(void const *)(addr), \
|
(void const *)(addr), \
|
||||||
(__v16sf)(__m512)(index), \
|
(__v16si)(__m512)(index), \
|
||||||
(__mmask16)-1, (int)(scale))
|
(__mmask16)-1, (int)(scale))
|
||||||
|
|
||||||
#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
||||||
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
|
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
|
||||||
(void const *)(addr), \
|
(void const *)(addr), \
|
||||||
(__v16sf)(__m512)(index), \
|
(__v16si)(__m512)(index), \
|
||||||
(__mmask16)(mask), (int)(scale))
|
(__mmask16)(mask), (int)(scale))
|
||||||
|
|
||||||
#define _mm512_i32gather_epi32(index, addr, scale) \
|
#define _mm512_i32gather_epi32(index, addr, scale) \
|
||||||
@ -8436,7 +8436,7 @@ _store_mask16(__mmask16 *__A, __mmask16 __B) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __DEFAULT_FN_ATTRS512
|
static __inline__ void __DEFAULT_FN_ATTRS512
|
||||||
_mm512_stream_si512 (__m512i * __P, __m512i __A)
|
_mm512_stream_si512 (void * __P, __m512i __A)
|
||||||
{
|
{
|
||||||
typedef __v8di __v8di_aligned __attribute__((aligned(64)));
|
typedef __v8di __v8di_aligned __attribute__((aligned(64)));
|
||||||
__builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
|
__builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
|
||||||
@ -8450,14 +8450,14 @@ _mm512_stream_load_si512 (void const *__P)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __DEFAULT_FN_ATTRS512
|
static __inline__ void __DEFAULT_FN_ATTRS512
|
||||||
_mm512_stream_pd (double *__P, __m512d __A)
|
_mm512_stream_pd (void *__P, __m512d __A)
|
||||||
{
|
{
|
||||||
typedef __v8df __v8df_aligned __attribute__((aligned(64)));
|
typedef __v8df __v8df_aligned __attribute__((aligned(64)));
|
||||||
__builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
|
__builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __DEFAULT_FN_ATTRS512
|
static __inline__ void __DEFAULT_FN_ATTRS512
|
||||||
_mm512_stream_ps (float *__P, __m512 __A)
|
_mm512_stream_ps (void *__P, __m512 __A)
|
||||||
{
|
{
|
||||||
typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
|
typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
|
||||||
__builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
|
__builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
|
||||||
@ -8724,13 +8724,13 @@ _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
|
|||||||
(__v4sf)_mm_setzero_ps(),
|
(__v4sf)_mm_setzero_ps(),
|
||||||
0, 4, 4, 4);
|
0, 4, 4, 4);
|
||||||
|
|
||||||
return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
|
return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_load_ss (__mmask8 __U, const float* __A)
|
_mm_maskz_load_ss (__mmask8 __U, const float* __A)
|
||||||
{
|
{
|
||||||
return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
|
return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
|
||||||
(__v4sf) _mm_setzero_ps(),
|
(__v4sf) _mm_setzero_ps(),
|
||||||
__U & 1);
|
__U & 1);
|
||||||
}
|
}
|
||||||
@ -8742,13 +8742,13 @@ _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
|
|||||||
(__v2df)_mm_setzero_pd(),
|
(__v2df)_mm_setzero_pd(),
|
||||||
0, 2);
|
0, 2);
|
||||||
|
|
||||||
return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
|
return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_load_sd (__mmask8 __U, const double* __A)
|
_mm_maskz_load_sd (__mmask8 __U, const double* __A)
|
||||||
{
|
{
|
||||||
return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
|
return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
|
||||||
(__v2df) _mm_setzero_pd(),
|
(__v2df) _mm_setzero_pd(),
|
||||||
__U & 1);
|
__U & 1);
|
||||||
}
|
}
|
||||||
@ -9659,6 +9659,23 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
|
|||||||
}
|
}
|
||||||
#undef _mm512_mask_reduce_operator
|
#undef _mm512_mask_reduce_operator
|
||||||
|
|
||||||
|
/// Moves the least significant 32 bits of a vector of [16 x i32] to a
|
||||||
|
/// 32-bit signed integer value.
|
||||||
|
///
|
||||||
|
/// \headerfile <x86intrin.h>
|
||||||
|
///
|
||||||
|
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
|
||||||
|
///
|
||||||
|
/// \param __A
|
||||||
|
/// A vector of [16 x i32]. The least significant 32 bits are moved to the
|
||||||
|
/// destination.
|
||||||
|
/// \returns A 32-bit signed integer containing the moved value.
|
||||||
|
static __inline__ int __DEFAULT_FN_ATTRS512
|
||||||
|
_mm512_cvtsi512_si32(__m512i __A) {
|
||||||
|
__v16si __b = (__v16si)__A;
|
||||||
|
return __b[0];
|
||||||
|
}
|
||||||
|
|
||||||
#undef __DEFAULT_FN_ATTRS512
|
#undef __DEFAULT_FN_ATTRS512
|
||||||
#undef __DEFAULT_FN_ATTRS128
|
#undef __DEFAULT_FN_ATTRS128
|
||||||
#undef __DEFAULT_FN_ATTRS
|
#undef __DEFAULT_FN_ATTRS
|
||||||
|
|||||||
24
lib/include/avx512vlbwintrin.h
vendored
24
lib/include/avx512vlbwintrin.h
vendored
@ -2289,13 +2289,13 @@ _mm_loadu_epi16 (void const *__P)
|
|||||||
struct __loadu_epi16 {
|
struct __loadu_epi16 {
|
||||||
__m128i_u __v;
|
__m128i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi16*)__P)->__v;
|
return ((const struct __loadu_epi16*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
|
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
|
return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
|
||||||
(__v8hi) __W,
|
(__v8hi) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -2303,7 +2303,7 @@ _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
|
_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
|
return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
|
||||||
(__v8hi)
|
(__v8hi)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -2315,13 +2315,13 @@ _mm256_loadu_epi16 (void const *__P)
|
|||||||
struct __loadu_epi16 {
|
struct __loadu_epi16 {
|
||||||
__m256i_u __v;
|
__m256i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi16*)__P)->__v;
|
return ((const struct __loadu_epi16*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
|
_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
|
return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
|
||||||
(__v16hi) __W,
|
(__v16hi) __W,
|
||||||
(__mmask16) __U);
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
@ -2329,7 +2329,7 @@ _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
|
_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
|
return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
|
||||||
(__v16hi)
|
(__v16hi)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask16) __U);
|
(__mmask16) __U);
|
||||||
@ -2341,13 +2341,13 @@ _mm_loadu_epi8 (void const *__P)
|
|||||||
struct __loadu_epi8 {
|
struct __loadu_epi8 {
|
||||||
__m128i_u __v;
|
__m128i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi8*)__P)->__v;
|
return ((const struct __loadu_epi8*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
|
_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
|
return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
|
||||||
(__v16qi) __W,
|
(__v16qi) __W,
|
||||||
(__mmask16) __U);
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
@ -2355,7 +2355,7 @@ _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
|
|||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
|
_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
|
return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
|
||||||
(__v16qi)
|
(__v16qi)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask16) __U);
|
(__mmask16) __U);
|
||||||
@ -2367,13 +2367,13 @@ _mm256_loadu_epi8 (void const *__P)
|
|||||||
struct __loadu_epi8 {
|
struct __loadu_epi8 {
|
||||||
__m256i_u __v;
|
__m256i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi8*)__P)->__v;
|
return ((const struct __loadu_epi8*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
|
_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
|
return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
|
||||||
(__v32qi) __W,
|
(__v32qi) __W,
|
||||||
(__mmask32) __U);
|
(__mmask32) __U);
|
||||||
}
|
}
|
||||||
@ -2381,7 +2381,7 @@ _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
|
_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
|
return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
|
||||||
(__v32qi)
|
(__v32qi)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask32) __U);
|
(__mmask32) __U);
|
||||||
|
|||||||
112
lib/include/avx512vlintrin.h
vendored
112
lib/include/avx512vlintrin.h
vendored
@ -2505,7 +2505,7 @@ _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
|
|||||||
|
|
||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
|
_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
|
||||||
return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
|
return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
|
||||||
(__v2df) __W,
|
(__v2df) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -2513,7 +2513,7 @@ _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
|
_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
|
||||||
return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
|
return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
|
||||||
(__v2df)
|
(__v2df)
|
||||||
_mm_setzero_pd (),
|
_mm_setzero_pd (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -2522,7 +2522,7 @@ _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
|
_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
|
||||||
return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
|
return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
|
||||||
(__v4df) __W,
|
(__v4df) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -2530,7 +2530,7 @@ _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
|
_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
|
||||||
return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
|
return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
|
||||||
(__v4df)
|
(__v4df)
|
||||||
_mm256_setzero_pd (),
|
_mm256_setzero_pd (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -2539,7 +2539,7 @@ _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
|
_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
|
||||||
return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
|
return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
|
||||||
(__v2di) __W,
|
(__v2di) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -2547,7 +2547,7 @@ _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
|
_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
|
||||||
return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
|
return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
|
||||||
(__v2di)
|
(__v2di)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -2557,7 +2557,7 @@ _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
|
_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
|
||||||
void const *__P) {
|
void const *__P) {
|
||||||
return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
|
return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
|
||||||
(__v4di) __W,
|
(__v4di) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -2565,7 +2565,7 @@ _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
|
|||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
|
_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
|
||||||
return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
|
return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
|
||||||
(__v4di)
|
(__v4di)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -2574,14 +2574,14 @@ _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
|
_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
|
||||||
return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
|
return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
|
||||||
(__v4sf) __W,
|
(__v4sf) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
|
_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
|
||||||
return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
|
return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
|
||||||
(__v4sf)
|
(__v4sf)
|
||||||
_mm_setzero_ps (),
|
_mm_setzero_ps (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -2590,14 +2590,14 @@ _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
|
_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
|
||||||
return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
|
return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
|
||||||
(__v8sf) __W,
|
(__v8sf) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
|
_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
|
||||||
return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
|
return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
|
||||||
(__v8sf)
|
(__v8sf)
|
||||||
_mm256_setzero_ps (),
|
_mm256_setzero_ps (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -2606,7 +2606,7 @@ _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
|
_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
|
||||||
return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
|
return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
|
||||||
(__v4si) __W,
|
(__v4si) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -2614,7 +2614,7 @@ _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
|
|||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
|
_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
|
||||||
return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
|
return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
|
||||||
(__v4si)
|
(__v4si)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -2623,7 +2623,7 @@ _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
|
_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
|
||||||
void const *__P) {
|
void const *__P) {
|
||||||
return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
|
return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
|
||||||
(__v8si) __W,
|
(__v8si) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -2631,7 +2631,7 @@ _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
|
|||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
|
_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
|
||||||
return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
|
return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
|
||||||
(__v8si)
|
(__v8si)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -5073,13 +5073,13 @@ _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
|
|||||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_load_epi32 (void const *__P)
|
_mm_load_epi32 (void const *__P)
|
||||||
{
|
{
|
||||||
return *(__m128i *) __P;
|
return *(const __m128i *) __P;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
|
return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
|
||||||
(__v4si) __W,
|
(__v4si) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -5088,7 +5088,7 @@ _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
|
return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
|
||||||
(__v4si)
|
(__v4si)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -5098,13 +5098,13 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
|||||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_load_epi32 (void const *__P)
|
_mm256_load_epi32 (void const *__P)
|
||||||
{
|
{
|
||||||
return *(__m256i *) __P;
|
return *(const __m256i *) __P;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
|
return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
|
||||||
(__v8si) __W,
|
(__v8si) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -5113,7 +5113,7 @@ _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
|
return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
|
||||||
(__v8si)
|
(__v8si)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -5183,13 +5183,13 @@ _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
|
|||||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_load_epi64 (void const *__P)
|
_mm_load_epi64 (void const *__P)
|
||||||
{
|
{
|
||||||
return *(__m128i *) __P;
|
return *(const __m128i *) __P;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
|
return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
|
||||||
(__v2di) __W,
|
(__v2di) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -5198,7 +5198,7 @@ _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
|
return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
|
||||||
(__v2di)
|
(__v2di)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -5208,13 +5208,13 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
|||||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_load_epi64 (void const *__P)
|
_mm256_load_epi64 (void const *__P)
|
||||||
{
|
{
|
||||||
return *(__m256i *) __P;
|
return *(const __m256i *) __P;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
|
return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
|
||||||
(__v4di) __W,
|
(__v4di) __W,
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
__U);
|
__U);
|
||||||
@ -5223,7 +5223,7 @@ _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
|
return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
|
||||||
(__v4di)
|
(__v4di)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask8)
|
(__mmask8)
|
||||||
@ -5430,7 +5430,7 @@ _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
|
|||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
|
_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
|
return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
|
||||||
(__v2df) __W,
|
(__v2df) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5438,7 +5438,7 @@ _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_load_pd (__mmask8 __U, void const *__P)
|
_mm_maskz_load_pd (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
|
return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
|
||||||
(__v2df)
|
(__v2df)
|
||||||
_mm_setzero_pd (),
|
_mm_setzero_pd (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5447,7 +5447,7 @@ _mm_maskz_load_pd (__mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
|
_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
|
return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
|
||||||
(__v4df) __W,
|
(__v4df) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5455,7 +5455,7 @@ _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
|
_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
|
return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
|
||||||
(__v4df)
|
(__v4df)
|
||||||
_mm256_setzero_pd (),
|
_mm256_setzero_pd (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5464,7 +5464,7 @@ _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
|
_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
|
return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
|
||||||
(__v4sf) __W,
|
(__v4sf) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5472,7 +5472,7 @@ _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_load_ps (__mmask8 __U, void const *__P)
|
_mm_maskz_load_ps (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
|
return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
|
||||||
(__v4sf)
|
(__v4sf)
|
||||||
_mm_setzero_ps (),
|
_mm_setzero_ps (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5481,7 +5481,7 @@ _mm_maskz_load_ps (__mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
|
_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
|
return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
|
||||||
(__v8sf) __W,
|
(__v8sf) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5489,7 +5489,7 @@ _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
|
_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
|
return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
|
||||||
(__v8sf)
|
(__v8sf)
|
||||||
_mm256_setzero_ps (),
|
_mm256_setzero_ps (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5501,13 +5501,13 @@ _mm_loadu_epi64 (void const *__P)
|
|||||||
struct __loadu_epi64 {
|
struct __loadu_epi64 {
|
||||||
__m128i_u __v;
|
__m128i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi64*)__P)->__v;
|
return ((const struct __loadu_epi64*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
|
return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
|
||||||
(__v2di) __W,
|
(__v2di) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5515,7 +5515,7 @@ _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
|
return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
|
||||||
(__v2di)
|
(__v2di)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5527,13 +5527,13 @@ _mm256_loadu_epi64 (void const *__P)
|
|||||||
struct __loadu_epi64 {
|
struct __loadu_epi64 {
|
||||||
__m256i_u __v;
|
__m256i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi64*)__P)->__v;
|
return ((const struct __loadu_epi64*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
|
return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
|
||||||
(__v4di) __W,
|
(__v4di) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5541,7 +5541,7 @@ _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
|
return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
|
||||||
(__v4di)
|
(__v4di)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5553,13 +5553,13 @@ _mm_loadu_epi32 (void const *__P)
|
|||||||
struct __loadu_epi32 {
|
struct __loadu_epi32 {
|
||||||
__m128i_u __v;
|
__m128i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi32*)__P)->__v;
|
return ((const struct __loadu_epi32*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
|
return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
|
||||||
(__v4si) __W,
|
(__v4si) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5567,7 +5567,7 @@ _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
|
_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
|
return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
|
||||||
(__v4si)
|
(__v4si)
|
||||||
_mm_setzero_si128 (),
|
_mm_setzero_si128 (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5579,13 +5579,13 @@ _mm256_loadu_epi32 (void const *__P)
|
|||||||
struct __loadu_epi32 {
|
struct __loadu_epi32 {
|
||||||
__m256i_u __v;
|
__m256i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_epi32*)__P)->__v;
|
return ((const struct __loadu_epi32*)__P)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
|
return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
|
||||||
(__v8si) __W,
|
(__v8si) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5593,7 +5593,7 @@ _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
|
_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
|
return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
|
||||||
(__v8si)
|
(__v8si)
|
||||||
_mm256_setzero_si256 (),
|
_mm256_setzero_si256 (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5602,7 +5602,7 @@ _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
|
_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
|
return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
|
||||||
(__v2df) __W,
|
(__v2df) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5610,7 +5610,7 @@ _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
|
_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
|
return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
|
||||||
(__v2df)
|
(__v2df)
|
||||||
_mm_setzero_pd (),
|
_mm_setzero_pd (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5619,7 +5619,7 @@ _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
|
_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
|
return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
|
||||||
(__v4df) __W,
|
(__v4df) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5627,7 +5627,7 @@ _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
|
_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
|
return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
|
||||||
(__v4df)
|
(__v4df)
|
||||||
_mm256_setzero_pd (),
|
_mm256_setzero_pd (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5636,7 +5636,7 @@ _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||||
_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
|
_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
|
return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
|
||||||
(__v4sf) __W,
|
(__v4sf) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5644,7 +5644,7 @@ _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||||
_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
|
_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
|
return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
|
||||||
(__v4sf)
|
(__v4sf)
|
||||||
_mm_setzero_ps (),
|
_mm_setzero_ps (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
@ -5653,7 +5653,7 @@ _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||||
_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
|
_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
|
return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
|
||||||
(__v8sf) __W,
|
(__v8sf) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
@ -5661,7 +5661,7 @@ _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
|
|||||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||||
_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
|
_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
|
||||||
{
|
{
|
||||||
return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
|
return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
|
||||||
(__v8sf)
|
(__v8sf)
|
||||||
_mm256_setzero_ps (),
|
_mm256_setzero_ps (),
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
|
|||||||
10
lib/include/avxintrin.h
vendored
10
lib/include/avxintrin.h
vendored
@ -3069,7 +3069,7 @@ _mm256_broadcast_ps(__m128 const *__a)
|
|||||||
static __inline __m256d __DEFAULT_FN_ATTRS
|
static __inline __m256d __DEFAULT_FN_ATTRS
|
||||||
_mm256_load_pd(double const *__p)
|
_mm256_load_pd(double const *__p)
|
||||||
{
|
{
|
||||||
return *(__m256d *)__p;
|
return *(const __m256d *)__p;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads 8 single-precision floating point values from a 32-byte aligned
|
/// Loads 8 single-precision floating point values from a 32-byte aligned
|
||||||
@ -3085,7 +3085,7 @@ _mm256_load_pd(double const *__p)
|
|||||||
static __inline __m256 __DEFAULT_FN_ATTRS
|
static __inline __m256 __DEFAULT_FN_ATTRS
|
||||||
_mm256_load_ps(float const *__p)
|
_mm256_load_ps(float const *__p)
|
||||||
{
|
{
|
||||||
return *(__m256 *)__p;
|
return *(const __m256 *)__p;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads 4 double-precision floating point values from an unaligned
|
/// Loads 4 double-precision floating point values from an unaligned
|
||||||
@ -3105,7 +3105,7 @@ _mm256_loadu_pd(double const *__p)
|
|||||||
struct __loadu_pd {
|
struct __loadu_pd {
|
||||||
__m256d_u __v;
|
__m256d_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_pd*)__p)->__v;
|
return ((const struct __loadu_pd*)__p)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads 8 single-precision floating point values from an unaligned
|
/// Loads 8 single-precision floating point values from an unaligned
|
||||||
@ -3125,7 +3125,7 @@ _mm256_loadu_ps(float const *__p)
|
|||||||
struct __loadu_ps {
|
struct __loadu_ps {
|
||||||
__m256_u __v;
|
__m256_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_ps*)__p)->__v;
|
return ((const struct __loadu_ps*)__p)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads 256 bits of integer data from a 32-byte aligned memory
|
/// Loads 256 bits of integer data from a 32-byte aligned memory
|
||||||
@ -3161,7 +3161,7 @@ _mm256_loadu_si256(__m256i_u const *__p)
|
|||||||
struct __loadu_si256 {
|
struct __loadu_si256 {
|
||||||
__m256i_u __v;
|
__m256i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_si256*)__p)->__v;
|
return ((const struct __loadu_si256*)__p)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads 256 bits of integer data from an unaligned memory location
|
/// Loads 256 bits of integer data from an unaligned memory location
|
||||||
|
|||||||
175
lib/include/bmiintrin.h
vendored
175
lib/include/bmiintrin.h
vendored
@ -14,27 +14,13 @@
|
|||||||
#ifndef __BMIINTRIN_H
|
#ifndef __BMIINTRIN_H
|
||||||
#define __BMIINTRIN_H
|
#define __BMIINTRIN_H
|
||||||
|
|
||||||
#define _tzcnt_u16(a) (__tzcnt_u16((a)))
|
|
||||||
|
|
||||||
#define _andn_u32(a, b) (__andn_u32((a), (b)))
|
|
||||||
|
|
||||||
/* _bextr_u32 != __bextr_u32 */
|
|
||||||
#define _blsi_u32(a) (__blsi_u32((a)))
|
|
||||||
|
|
||||||
#define _blsmsk_u32(a) (__blsmsk_u32((a)))
|
|
||||||
|
|
||||||
#define _blsr_u32(a) (__blsr_u32((a)))
|
|
||||||
|
|
||||||
#define _tzcnt_u32(a) (__tzcnt_u32((a)))
|
|
||||||
|
|
||||||
/* Define the default attributes for the functions in this file. */
|
|
||||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
|
|
||||||
|
|
||||||
/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
|
/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
|
||||||
instruction behaves as BSF on non-BMI targets, there is code that expects
|
instruction behaves as BSF on non-BMI targets, there is code that expects
|
||||||
to use it as a potentially faster version of BSF. */
|
to use it as a potentially faster version of BSF. */
|
||||||
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||||
|
|
||||||
|
#define _tzcnt_u16(a) (__tzcnt_u16((a)))
|
||||||
|
|
||||||
/// Counts the number of trailing zero bits in the operand.
|
/// Counts the number of trailing zero bits in the operand.
|
||||||
///
|
///
|
||||||
/// \headerfile <x86intrin.h>
|
/// \headerfile <x86intrin.h>
|
||||||
@ -51,6 +37,94 @@ __tzcnt_u16(unsigned short __X)
|
|||||||
return __builtin_ia32_tzcnt_u16(__X);
|
return __builtin_ia32_tzcnt_u16(__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Counts the number of trailing zero bits in the operand.
|
||||||
|
///
|
||||||
|
/// \headerfile <x86intrin.h>
|
||||||
|
///
|
||||||
|
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||||
|
///
|
||||||
|
/// \param __X
|
||||||
|
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||||
|
/// \returns An unsigned 32-bit integer containing the number of trailing zero
|
||||||
|
/// bits in the operand.
|
||||||
|
static __inline__ unsigned int __RELAXED_FN_ATTRS
|
||||||
|
__tzcnt_u32(unsigned int __X)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_tzcnt_u32(__X);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Counts the number of trailing zero bits in the operand.
|
||||||
|
///
|
||||||
|
/// \headerfile <x86intrin.h>
|
||||||
|
///
|
||||||
|
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||||
|
///
|
||||||
|
/// \param __X
|
||||||
|
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||||
|
/// \returns An 32-bit integer containing the number of trailing zero bits in
|
||||||
|
/// the operand.
|
||||||
|
static __inline__ int __RELAXED_FN_ATTRS
|
||||||
|
_mm_tzcnt_32(unsigned int __X)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_tzcnt_u32(__X);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define _tzcnt_u32(a) (__tzcnt_u32((a)))
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
|
||||||
|
/// Counts the number of trailing zero bits in the operand.
|
||||||
|
///
|
||||||
|
/// \headerfile <x86intrin.h>
|
||||||
|
///
|
||||||
|
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||||
|
///
|
||||||
|
/// \param __X
|
||||||
|
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||||
|
/// \returns An unsigned 64-bit integer containing the number of trailing zero
|
||||||
|
/// bits in the operand.
|
||||||
|
static __inline__ unsigned long long __RELAXED_FN_ATTRS
|
||||||
|
__tzcnt_u64(unsigned long long __X)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_tzcnt_u64(__X);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Counts the number of trailing zero bits in the operand.
|
||||||
|
///
|
||||||
|
/// \headerfile <x86intrin.h>
|
||||||
|
///
|
||||||
|
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||||
|
///
|
||||||
|
/// \param __X
|
||||||
|
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||||
|
/// \returns An 64-bit integer containing the number of trailing zero bits in
|
||||||
|
/// the operand.
|
||||||
|
static __inline__ long long __RELAXED_FN_ATTRS
|
||||||
|
_mm_tzcnt_64(unsigned long long __X)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_tzcnt_u64(__X);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
|
||||||
|
|
||||||
|
#endif /* __x86_64__ */
|
||||||
|
|
||||||
|
#undef __RELAXED_FN_ATTRS
|
||||||
|
|
||||||
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
|
||||||
|
|
||||||
|
/* Define the default attributes for the functions in this file. */
|
||||||
|
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
|
||||||
|
|
||||||
|
#define _andn_u32(a, b) (__andn_u32((a), (b)))
|
||||||
|
|
||||||
|
/* _bextr_u32 != __bextr_u32 */
|
||||||
|
#define _blsi_u32(a) (__blsi_u32((a)))
|
||||||
|
|
||||||
|
#define _blsmsk_u32(a) (__blsmsk_u32((a)))
|
||||||
|
|
||||||
|
#define _blsr_u32(a) (__blsr_u32((a)))
|
||||||
|
|
||||||
/// Performs a bitwise AND of the second operand with the one's
|
/// Performs a bitwise AND of the second operand with the one's
|
||||||
/// complement of the first operand.
|
/// complement of the first operand.
|
||||||
///
|
///
|
||||||
@ -169,38 +243,6 @@ __blsr_u32(unsigned int __X)
|
|||||||
return __X & (__X - 1);
|
return __X & (__X - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Counts the number of trailing zero bits in the operand.
|
|
||||||
///
|
|
||||||
/// \headerfile <x86intrin.h>
|
|
||||||
///
|
|
||||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
|
||||||
///
|
|
||||||
/// \param __X
|
|
||||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
|
||||||
/// \returns An unsigned 32-bit integer containing the number of trailing zero
|
|
||||||
/// bits in the operand.
|
|
||||||
static __inline__ unsigned int __RELAXED_FN_ATTRS
|
|
||||||
__tzcnt_u32(unsigned int __X)
|
|
||||||
{
|
|
||||||
return __builtin_ia32_tzcnt_u32(__X);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Counts the number of trailing zero bits in the operand.
|
|
||||||
///
|
|
||||||
/// \headerfile <x86intrin.h>
|
|
||||||
///
|
|
||||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
|
||||||
///
|
|
||||||
/// \param __X
|
|
||||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
|
||||||
/// \returns An 32-bit integer containing the number of trailing zero bits in
|
|
||||||
/// the operand.
|
|
||||||
static __inline__ int __RELAXED_FN_ATTRS
|
|
||||||
_mm_tzcnt_32(unsigned int __X)
|
|
||||||
{
|
|
||||||
return __builtin_ia32_tzcnt_u32(__X);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
|
||||||
#define _andn_u64(a, b) (__andn_u64((a), (b)))
|
#define _andn_u64(a, b) (__andn_u64((a), (b)))
|
||||||
@ -212,8 +254,6 @@ _mm_tzcnt_32(unsigned int __X)
|
|||||||
|
|
||||||
#define _blsr_u64(a) (__blsr_u64((a)))
|
#define _blsr_u64(a) (__blsr_u64((a)))
|
||||||
|
|
||||||
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
|
|
||||||
|
|
||||||
/// Performs a bitwise AND of the second operand with the one's
|
/// Performs a bitwise AND of the second operand with the one's
|
||||||
/// complement of the first operand.
|
/// complement of the first operand.
|
||||||
///
|
///
|
||||||
@ -332,41 +372,10 @@ __blsr_u64(unsigned long long __X)
|
|||||||
return __X & (__X - 1);
|
return __X & (__X - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Counts the number of trailing zero bits in the operand.
|
|
||||||
///
|
|
||||||
/// \headerfile <x86intrin.h>
|
|
||||||
///
|
|
||||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
|
||||||
///
|
|
||||||
/// \param __X
|
|
||||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
|
||||||
/// \returns An unsigned 64-bit integer containing the number of trailing zero
|
|
||||||
/// bits in the operand.
|
|
||||||
static __inline__ unsigned long long __RELAXED_FN_ATTRS
|
|
||||||
__tzcnt_u64(unsigned long long __X)
|
|
||||||
{
|
|
||||||
return __builtin_ia32_tzcnt_u64(__X);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Counts the number of trailing zero bits in the operand.
|
|
||||||
///
|
|
||||||
/// \headerfile <x86intrin.h>
|
|
||||||
///
|
|
||||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
|
||||||
///
|
|
||||||
/// \param __X
|
|
||||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
|
||||||
/// \returns An 64-bit integer containing the number of trailing zero bits in
|
|
||||||
/// the operand.
|
|
||||||
static __inline__ long long __RELAXED_FN_ATTRS
|
|
||||||
_mm_tzcnt_64(unsigned long long __X)
|
|
||||||
{
|
|
||||||
return __builtin_ia32_tzcnt_u64(__X);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __x86_64__ */
|
#endif /* __x86_64__ */
|
||||||
|
|
||||||
#undef __DEFAULT_FN_ATTRS
|
#undef __DEFAULT_FN_ATTRS
|
||||||
#undef __RELAXED_FN_ATTRS
|
|
||||||
|
#endif /* !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) */
|
||||||
|
|
||||||
#endif /* __BMIINTRIN_H */
|
#endif /* __BMIINTRIN_H */
|
||||||
|
|||||||
4
lib/include/cpuid.h
vendored
4
lib/include/cpuid.h
vendored
@ -38,8 +38,8 @@
|
|||||||
#define signature_TM2_ecx 0x3638784d
|
#define signature_TM2_ecx 0x3638784d
|
||||||
/* NSC: "Geode by NSC" */
|
/* NSC: "Geode by NSC" */
|
||||||
#define signature_NSC_ebx 0x646f6547
|
#define signature_NSC_ebx 0x646f6547
|
||||||
#define signature_NSC_edx 0x43534e20
|
#define signature_NSC_edx 0x79622065
|
||||||
#define signature_NSC_ecx 0x79622065
|
#define signature_NSC_ecx 0x43534e20
|
||||||
/* NEXGEN: "NexGenDriven" */
|
/* NEXGEN: "NexGenDriven" */
|
||||||
#define signature_NEXGEN_ebx 0x4778654e
|
#define signature_NEXGEN_ebx 0x4778654e
|
||||||
#define signature_NEXGEN_edx 0x72446e65
|
#define signature_NEXGEN_edx 0x72446e65
|
||||||
|
|||||||
28
lib/include/emmintrin.h
vendored
28
lib/include/emmintrin.h
vendored
@ -1578,7 +1578,7 @@ _mm_cvtsd_f64(__m128d __a)
|
|||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||||
_mm_load_pd(double const *__dp)
|
_mm_load_pd(double const *__dp)
|
||||||
{
|
{
|
||||||
return *(__m128d*)__dp;
|
return *(const __m128d*)__dp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads a double-precision floating-point value from a specified memory
|
/// Loads a double-precision floating-point value from a specified memory
|
||||||
@ -1599,7 +1599,7 @@ _mm_load1_pd(double const *__dp)
|
|||||||
struct __mm_load1_pd_struct {
|
struct __mm_load1_pd_struct {
|
||||||
double __u;
|
double __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
|
double __u = ((const struct __mm_load1_pd_struct*)__dp)->__u;
|
||||||
return __extension__ (__m128d){ __u, __u };
|
return __extension__ (__m128d){ __u, __u };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1622,7 +1622,7 @@ _mm_load1_pd(double const *__dp)
|
|||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||||
_mm_loadr_pd(double const *__dp)
|
_mm_loadr_pd(double const *__dp)
|
||||||
{
|
{
|
||||||
__m128d __u = *(__m128d*)__dp;
|
__m128d __u = *(const __m128d*)__dp;
|
||||||
return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
|
return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1643,7 +1643,7 @@ _mm_loadu_pd(double const *__dp)
|
|||||||
struct __loadu_pd {
|
struct __loadu_pd {
|
||||||
__m128d_u __v;
|
__m128d_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_pd*)__dp)->__v;
|
return ((const struct __loadu_pd*)__dp)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads a 64-bit integer value to the low element of a 128-bit integer
|
/// Loads a 64-bit integer value to the low element of a 128-bit integer
|
||||||
@ -1663,7 +1663,7 @@ _mm_loadu_si64(void const *__a)
|
|||||||
struct __loadu_si64 {
|
struct __loadu_si64 {
|
||||||
long long __v;
|
long long __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
long long __u = ((struct __loadu_si64*)__a)->__v;
|
long long __u = ((const struct __loadu_si64*)__a)->__v;
|
||||||
return __extension__ (__m128i)(__v2di){__u, 0LL};
|
return __extension__ (__m128i)(__v2di){__u, 0LL};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1684,7 +1684,7 @@ _mm_loadu_si32(void const *__a)
|
|||||||
struct __loadu_si32 {
|
struct __loadu_si32 {
|
||||||
int __v;
|
int __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
int __u = ((struct __loadu_si32*)__a)->__v;
|
int __u = ((const struct __loadu_si32*)__a)->__v;
|
||||||
return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
|
return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1705,7 +1705,7 @@ _mm_loadu_si16(void const *__a)
|
|||||||
struct __loadu_si16 {
|
struct __loadu_si16 {
|
||||||
short __v;
|
short __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
short __u = ((struct __loadu_si16*)__a)->__v;
|
short __u = ((const struct __loadu_si16*)__a)->__v;
|
||||||
return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
|
return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1726,7 +1726,7 @@ _mm_load_sd(double const *__dp)
|
|||||||
struct __mm_load_sd_struct {
|
struct __mm_load_sd_struct {
|
||||||
double __u;
|
double __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
|
double __u = ((const struct __mm_load_sd_struct*)__dp)->__u;
|
||||||
return __extension__ (__m128d){ __u, 0 };
|
return __extension__ (__m128d){ __u, 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1753,7 +1753,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp)
|
|||||||
struct __mm_loadh_pd_struct {
|
struct __mm_loadh_pd_struct {
|
||||||
double __u;
|
double __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
|
double __u = ((const struct __mm_loadh_pd_struct*)__dp)->__u;
|
||||||
return __extension__ (__m128d){ __a[0], __u };
|
return __extension__ (__m128d){ __a[0], __u };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1780,7 +1780,7 @@ _mm_loadl_pd(__m128d __a, double const *__dp)
|
|||||||
struct __mm_loadl_pd_struct {
|
struct __mm_loadl_pd_struct {
|
||||||
double __u;
|
double __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
|
double __u = ((const struct __mm_loadl_pd_struct*)__dp)->__u;
|
||||||
return __extension__ (__m128d){ __u, __a[1] };
|
return __extension__ (__m128d){ __u, __a[1] };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2288,7 +2288,7 @@ _mm_adds_epu16(__m128i __a, __m128i __b)
|
|||||||
return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
|
return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the rounded avarages of corresponding elements of two
|
/// Computes the rounded averages of corresponding elements of two
|
||||||
/// 128-bit unsigned [16 x i8] vectors, saving each result in the
|
/// 128-bit unsigned [16 x i8] vectors, saving each result in the
|
||||||
/// corresponding element of a 128-bit result vector of [16 x i8].
|
/// corresponding element of a 128-bit result vector of [16 x i8].
|
||||||
///
|
///
|
||||||
@ -2308,7 +2308,7 @@ _mm_avg_epu8(__m128i __a, __m128i __b)
|
|||||||
return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
|
return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the rounded avarages of corresponding elements of two
|
/// Computes the rounded averages of corresponding elements of two
|
||||||
/// 128-bit unsigned [8 x i16] vectors, saving each result in the
|
/// 128-bit unsigned [8 x i16] vectors, saving each result in the
|
||||||
/// corresponding element of a 128-bit result vector of [8 x i16].
|
/// corresponding element of a 128-bit result vector of [8 x i16].
|
||||||
///
|
///
|
||||||
@ -3550,7 +3550,7 @@ _mm_loadu_si128(__m128i_u const *__p)
|
|||||||
struct __loadu_si128 {
|
struct __loadu_si128 {
|
||||||
__m128i_u __v;
|
__m128i_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_si128*)__p)->__v;
|
return ((const struct __loadu_si128*)__p)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a vector of [2 x i64] where the lower element is taken from
|
/// Returns a vector of [2 x i64] where the lower element is taken from
|
||||||
@ -3571,7 +3571,7 @@ _mm_loadl_epi64(__m128i_u const *__p)
|
|||||||
struct __mm_loadl_epi64_struct {
|
struct __mm_loadl_epi64_struct {
|
||||||
long long __u;
|
long long __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
|
return __extension__ (__m128i) { ((const struct __mm_loadl_epi64_struct*)__p)->__u, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generates a 128-bit vector of [4 x i32] with unspecified content.
|
/// Generates a 128-bit vector of [4 x i32] with unspecified content.
|
||||||
|
|||||||
68
lib/include/ia32intrin.h
vendored
68
lib/include/ia32intrin.h
vendored
@ -195,6 +195,74 @@ __writeeflags(unsigned int __f)
|
|||||||
}
|
}
|
||||||
#endif /* !__x86_64__ */
|
#endif /* !__x86_64__ */
|
||||||
|
|
||||||
|
/** Cast a 32-bit float value to a 32-bit unsigned integer value
|
||||||
|
*
|
||||||
|
* \headerfile <x86intrin.h>
|
||||||
|
* This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
|
||||||
|
* and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
|
||||||
|
*
|
||||||
|
* \param __A
|
||||||
|
* A 32-bit float value.
|
||||||
|
* \returns a 32-bit unsigned integer containing the converted value.
|
||||||
|
*/
|
||||||
|
static __inline__ unsigned int __attribute__((__always_inline__))
|
||||||
|
_castf32_u32(float __A) {
|
||||||
|
unsigned int D;
|
||||||
|
__builtin_memcpy(&D, &__A, sizeof(__A));
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Cast a 64-bit float value to a 64-bit unsigned integer value
|
||||||
|
*
|
||||||
|
* \headerfile <x86intrin.h>
|
||||||
|
* This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
|
||||||
|
* and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
|
||||||
|
*
|
||||||
|
* \param __A
|
||||||
|
* A 64-bit float value.
|
||||||
|
* \returns a 64-bit unsigned integer containing the converted value.
|
||||||
|
*/
|
||||||
|
static __inline__ unsigned long long __attribute__((__always_inline__))
|
||||||
|
_castf64_u64(double __A) {
|
||||||
|
unsigned long long D;
|
||||||
|
__builtin_memcpy(&D, &__A, sizeof(__A));
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Cast a 32-bit unsigned integer value to a 32-bit float value
|
||||||
|
*
|
||||||
|
* \headerfile <x86intrin.h>
|
||||||
|
* This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
|
||||||
|
* and corresponds to the <c> FLDS </c> instruction in ia32.
|
||||||
|
*
|
||||||
|
* \param __A
|
||||||
|
* A 32-bit unsigned integer value.
|
||||||
|
* \returns a 32-bit float value containing the converted value.
|
||||||
|
*/
|
||||||
|
static __inline__ float __attribute__((__always_inline__))
|
||||||
|
_castu32_f32(unsigned int __A) {
|
||||||
|
float D;
|
||||||
|
__builtin_memcpy(&D, &__A, sizeof(__A));
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Cast a 64-bit unsigned integer value to a 64-bit float value
|
||||||
|
*
|
||||||
|
* \headerfile <x86intrin.h>
|
||||||
|
* This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
|
||||||
|
* and corresponds to the <c> FLDL </c> instruction in ia32.
|
||||||
|
*
|
||||||
|
* \param __A
|
||||||
|
* A 64-bit unsigned integer value.
|
||||||
|
* \returns a 64-bit float value containing the converted value.
|
||||||
|
*/
|
||||||
|
static __inline__ double __attribute__((__always_inline__))
|
||||||
|
_castu64_f64(unsigned long long __A) {
|
||||||
|
double D;
|
||||||
|
__builtin_memcpy(&D, &__A, sizeof(__A));
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
/** Adds the unsigned integer operand to the CRC-32C checksum of the
|
/** Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||||
* unsigned char operand.
|
* unsigned char operand.
|
||||||
*
|
*
|
||||||
|
|||||||
9
lib/include/immintrin.h
vendored
9
lib/include/immintrin.h
vendored
@ -64,9 +64,8 @@
|
|||||||
#include <vpclmulqdqintrin.h>
|
#include <vpclmulqdqintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
|
/* No feature check desired due to internal checks */
|
||||||
#include <bmiintrin.h>
|
#include <bmiintrin.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
|
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
|
||||||
#include <bmi2intrin.h>
|
#include <bmi2intrin.h>
|
||||||
@ -302,7 +301,7 @@ _loadbe_i16(void const * __P) {
|
|||||||
struct __loadu_i16 {
|
struct __loadu_i16 {
|
||||||
short __v;
|
short __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
|
return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||||
@ -318,7 +317,7 @@ _loadbe_i32(void const * __P) {
|
|||||||
struct __loadu_i32 {
|
struct __loadu_i32 {
|
||||||
int __v;
|
int __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
|
return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||||
@ -335,7 +334,7 @@ _loadbe_i64(void const * __P) {
|
|||||||
struct __loadu_i64 {
|
struct __loadu_i64 {
|
||||||
long long __v;
|
long long __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
|
return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||||
|
|||||||
26
lib/include/intrin.h
vendored
26
lib/include/intrin.h
vendored
@ -36,6 +36,12 @@
|
|||||||
/* Define the default attributes for the functions in this file. */
|
/* Define the default attributes for the functions in this file. */
|
||||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||||
|
|
||||||
|
#if __x86_64__
|
||||||
|
#define __LPTRINT_TYPE__ __int64
|
||||||
|
#else
|
||||||
|
#define __LPTRINT_TYPE__ long
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
@ -94,8 +100,7 @@ void __outword(unsigned short, unsigned short);
|
|||||||
void __outwordstring(unsigned short, unsigned short *, unsigned long);
|
void __outwordstring(unsigned short, unsigned short *, unsigned long);
|
||||||
unsigned long __readcr0(void);
|
unsigned long __readcr0(void);
|
||||||
unsigned long __readcr2(void);
|
unsigned long __readcr2(void);
|
||||||
static __inline__
|
unsigned __LPTRINT_TYPE__ __readcr3(void);
|
||||||
unsigned long __readcr3(void);
|
|
||||||
unsigned long __readcr4(void);
|
unsigned long __readcr4(void);
|
||||||
unsigned long __readcr8(void);
|
unsigned long __readcr8(void);
|
||||||
unsigned int __readdr(unsigned int);
|
unsigned int __readdr(unsigned int);
|
||||||
@ -132,7 +137,7 @@ void __vmx_vmptrst(unsigned __int64 *);
|
|||||||
void __wbinvd(void);
|
void __wbinvd(void);
|
||||||
void __writecr0(unsigned int);
|
void __writecr0(unsigned int);
|
||||||
static __inline__
|
static __inline__
|
||||||
void __writecr3(unsigned int);
|
void __writecr3(unsigned __INTPTR_TYPE__);
|
||||||
void __writecr4(unsigned int);
|
void __writecr4(unsigned int);
|
||||||
void __writecr8(unsigned int);
|
void __writecr8(unsigned int);
|
||||||
void __writedr(unsigned int, unsigned int);
|
void __writedr(unsigned int, unsigned int);
|
||||||
@ -164,7 +169,6 @@ long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);
|
|||||||
long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
|
long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
|
||||||
__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);
|
__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);
|
||||||
__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);
|
__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);
|
||||||
void __cdecl _invpcid(unsigned int, void *);
|
|
||||||
static __inline__ void
|
static __inline__ void
|
||||||
__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
|
__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
|
||||||
_ReadBarrier(void);
|
_ReadBarrier(void);
|
||||||
@ -565,24 +569,26 @@ __readmsr(unsigned long __register) {
|
|||||||
__asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register));
|
__asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register));
|
||||||
return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
|
return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static __inline__ unsigned long __DEFAULT_FN_ATTRS
|
static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS
|
||||||
__readcr3(void) {
|
__readcr3(void) {
|
||||||
unsigned long __cr3_val;
|
unsigned __LPTRINT_TYPE__ __cr3_val;
|
||||||
__asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory");
|
__asm__ __volatile__ ("mov %%cr3, %0" : "=r"(__cr3_val) : : "memory");
|
||||||
return __cr3_val;
|
return __cr3_val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __DEFAULT_FN_ATTRS
|
static __inline__ void __DEFAULT_FN_ATTRS
|
||||||
__writecr3(unsigned int __cr3_val) {
|
__writecr3(unsigned __INTPTR_TYPE__ __cr3_val) {
|
||||||
__asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory");
|
__asm__ ("mov %0, %%cr3" : : "r"(__cr3_val) : "memory");
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#undef __LPTRINT_TYPE__
|
||||||
|
|
||||||
#undef __DEFAULT_FN_ATTRS
|
#undef __DEFAULT_FN_ATTRS
|
||||||
|
|
||||||
#endif /* __INTRIN_H */
|
#endif /* __INTRIN_H */
|
||||||
|
|||||||
4
lib/include/mwaitxintrin.h
vendored
4
lib/include/mwaitxintrin.h
vendored
@ -17,9 +17,9 @@
|
|||||||
/* Define the default attributes for the functions in this file. */
|
/* Define the default attributes for the functions in this file. */
|
||||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx")))
|
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx")))
|
||||||
static __inline__ void __DEFAULT_FN_ATTRS
|
static __inline__ void __DEFAULT_FN_ATTRS
|
||||||
_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints)
|
_mm_monitorx(void * __p, unsigned __extensions, unsigned __hints)
|
||||||
{
|
{
|
||||||
__builtin_ia32_monitorx((void *)__p, __extensions, __hints);
|
__builtin_ia32_monitorx(__p, __extensions, __hints);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __DEFAULT_FN_ATTRS
|
static __inline__ void __DEFAULT_FN_ATTRS
|
||||||
|
|||||||
2
lib/include/opencl-c-base.h
vendored
2
lib/include/opencl-c-base.h
vendored
@ -406,7 +406,7 @@ typedef enum memory_order
|
|||||||
#define CLK_OUT_OF_RESOURCES -5
|
#define CLK_OUT_OF_RESOURCES -5
|
||||||
|
|
||||||
#define CLK_NULL_QUEUE 0
|
#define CLK_NULL_QUEUE 0
|
||||||
#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))
|
#define CLK_NULL_EVENT (__builtin_astype(((__SIZE_MAX__)), clk_event_t))
|
||||||
|
|
||||||
// execution model related definitions
|
// execution model related definitions
|
||||||
#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0
|
#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0
|
||||||
|
|||||||
2
lib/include/pmmintrin.h
vendored
2
lib/include/pmmintrin.h
vendored
@ -263,7 +263,7 @@ _mm_movedup_pd(__m128d __a)
|
|||||||
static __inline__ void __DEFAULT_FN_ATTRS
|
static __inline__ void __DEFAULT_FN_ATTRS
|
||||||
_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
|
_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
|
||||||
{
|
{
|
||||||
__builtin_ia32_monitor((void *)__p, __extensions, __hints);
|
__builtin_ia32_monitor(__p, __extensions, __hints);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Used with the MONITOR instruction to wait while the processor is in
|
/// Used with the MONITOR instruction to wait while the processor is in
|
||||||
|
|||||||
@ -35,6 +35,8 @@
|
|||||||
#ifndef EMMINTRIN_H_
|
#ifndef EMMINTRIN_H_
|
||||||
#define EMMINTRIN_H_
|
#define EMMINTRIN_H_
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ppc64__)
|
||||||
|
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
|
|
||||||
/* We need definitions from the SSE header files. */
|
/* We need definitions from the SSE header files. */
|
||||||
@ -1747,7 +1749,7 @@ _mm_sll_epi64 (__m128i __A, __m128i __B)
|
|||||||
lshift = vec_splat ((__v2du) __B, 0);
|
lshift = vec_splat ((__v2du) __B, 0);
|
||||||
shmask = vec_cmplt (lshift, shmax);
|
shmask = vec_cmplt (lshift, shmax);
|
||||||
result = vec_sl ((__v2du) __A, lshift);
|
result = vec_sl ((__v2du) __A, lshift);
|
||||||
result = vec_sel ((__v2du) shmask, result, shmask);
|
result = (__v2du)vec_sel ((__v2df) shmask, (__v2df)result, shmask);
|
||||||
|
|
||||||
return (__m128i) result;
|
return (__m128i) result;
|
||||||
}
|
}
|
||||||
@ -1841,7 +1843,7 @@ _mm_srl_epi64 (__m128i __A, __m128i __B)
|
|||||||
rshift = vec_splat ((__v2du) __B, 0);
|
rshift = vec_splat ((__v2du) __B, 0);
|
||||||
shmask = vec_cmplt (rshift, shmax);
|
shmask = vec_cmplt (rshift, shmax);
|
||||||
result = vec_sr ((__v2du) __A, rshift);
|
result = vec_sr ((__v2du) __A, rshift);
|
||||||
result = vec_sel ((__v2du) shmask, result, shmask);
|
result = (__v2du)vec_sel ((__v2df) shmask, (__v2df)result, shmask);
|
||||||
|
|
||||||
return (__m128i) result;
|
return (__m128i) result;
|
||||||
}
|
}
|
||||||
@ -2315,4 +2317,8 @@ _mm_castsi128_pd(__m128i __A)
|
|||||||
return (__m128d) __A;
|
return (__m128d) __A;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
#include_next <emmintrin.h>
|
||||||
|
#endif /* defined(__linux__) && defined(__ppc64__) */
|
||||||
|
|
||||||
#endif /* EMMINTRIN_H_ */
|
#endif /* EMMINTRIN_H_ */
|
||||||
|
|||||||
@ -10,6 +10,8 @@
|
|||||||
#ifndef _MM_MALLOC_H_INCLUDED
|
#ifndef _MM_MALLOC_H_INCLUDED
|
||||||
#define _MM_MALLOC_H_INCLUDED
|
#define _MM_MALLOC_H_INCLUDED
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ppc64__)
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
|
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
|
||||||
@ -41,4 +43,8 @@ _mm_free (void * ptr)
|
|||||||
free (ptr);
|
free (ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
#include_next <mm_malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
#endif /* _MM_MALLOC_H_INCLUDED */
|
||||||
|
|||||||
@ -35,6 +35,8 @@
|
|||||||
#ifndef _MMINTRIN_H_INCLUDED
|
#ifndef _MMINTRIN_H_INCLUDED
|
||||||
#define _MMINTRIN_H_INCLUDED
|
#define _MMINTRIN_H_INCLUDED
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ppc64__)
|
||||||
|
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||||
vector types, and their scalar components. */
|
vector types, and their scalar components. */
|
||||||
@ -1440,4 +1442,9 @@ extern __inline __m64
|
|||||||
return (res.as_m64);
|
return (res.as_m64);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
#include_next <mmintrin.h>
|
||||||
|
#endif /* defined(__linux__) && defined(__ppc64__) */
|
||||||
|
|
||||||
#endif /* _MMINTRIN_H_INCLUDED */
|
#endif /* _MMINTRIN_H_INCLUDED */
|
||||||
|
|||||||
150
lib/include/ppc_wrappers/pmmintrin.h
Normal file
150
lib/include/ppc_wrappers/pmmintrin.h
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
/*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------===
|
||||||
|
*
|
||||||
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
* See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
*
|
||||||
|
*===-----------------------------------------------------------------------===
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Implemented from the specification included in the Intel C++ Compiler
|
||||||
|
User Guide and Reference, version 9.0. */
|
||||||
|
|
||||||
|
#ifndef NO_WARN_X86_INTRINSICS
|
||||||
|
/* This header is distributed to simplify porting x86_64 code that
|
||||||
|
makes explicit use of Intel intrinsics to powerpc64le.
|
||||||
|
It is the user's responsibility to determine if the results are
|
||||||
|
acceptable and make additional changes as necessary.
|
||||||
|
Note that much code that uses Intel intrinsics can be rewritten in
|
||||||
|
standard C or GNU C extensions, which are more portable and better
|
||||||
|
optimized across multiple targets.
|
||||||
|
|
||||||
|
In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
|
||||||
|
is a good match for most SIMD operations. However the Horizontal
|
||||||
|
add/sub requires the data pairs be permuted into a separate
|
||||||
|
registers with vertical even/odd alignment for the operation.
|
||||||
|
And the addsub operation requires the sign of only the even numbered
|
||||||
|
elements be flipped (xored with -0.0).
|
||||||
|
For larger blocks of code using these intrinsic implementations,
|
||||||
|
the compiler be should be able to schedule instructions to avoid
|
||||||
|
additional latency.
|
||||||
|
|
||||||
|
In the specific case of the monitor and mwait instructions there are
|
||||||
|
no direct equivalent in the PowerISA at this time. So those
|
||||||
|
intrinsics are not implemented. */
|
||||||
|
#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef PMMINTRIN_H_
|
||||||
|
#define PMMINTRIN_H_
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ppc64__)
|
||||||
|
|
||||||
|
/* We need definitions from the SSE2 and SSE header files*/
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_addsub_ps (__m128 __X, __m128 __Y)
|
||||||
|
{
|
||||||
|
const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0};
|
||||||
|
__v4sf even_neg_Y = vec_xor(__Y, even_n0);
|
||||||
|
return (__m128) vec_add (__X, even_neg_Y);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_addsub_pd (__m128d __X, __m128d __Y)
|
||||||
|
{
|
||||||
|
const __v2df even_n0 = {-0.0, 0.0};
|
||||||
|
__v2df even_neg_Y = vec_xor(__Y, even_n0);
|
||||||
|
return (__m128d) vec_add (__X, even_neg_Y);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadd_ps (__m128 __X, __m128 __Y)
|
||||||
|
{
|
||||||
|
__vector unsigned char xform2 = {
|
||||||
|
0x00, 0x01, 0x02, 0x03,
|
||||||
|
0x08, 0x09, 0x0A, 0x0B,
|
||||||
|
0x10, 0x11, 0x12, 0x13,
|
||||||
|
0x18, 0x19, 0x1A, 0x1B
|
||||||
|
};
|
||||||
|
__vector unsigned char xform1 = {
|
||||||
|
0x04, 0x05, 0x06, 0x07,
|
||||||
|
0x0C, 0x0D, 0x0E, 0x0F,
|
||||||
|
0x14, 0x15, 0x16, 0x17,
|
||||||
|
0x1C, 0x1D, 0x1E, 0x1F
|
||||||
|
};
|
||||||
|
return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
|
||||||
|
vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsub_ps (__m128 __X, __m128 __Y)
|
||||||
|
{
|
||||||
|
__vector unsigned char xform2 = {
|
||||||
|
0x00, 0x01, 0x02, 0x03,
|
||||||
|
0x08, 0x09, 0x0A, 0x0B,
|
||||||
|
0x10, 0x11, 0x12, 0x13,
|
||||||
|
0x18, 0x19, 0x1A, 0x1B
|
||||||
|
};
|
||||||
|
__vector unsigned char xform1 = {
|
||||||
|
0x04, 0x05, 0x06, 0x07,
|
||||||
|
0x0C, 0x0D, 0x0E, 0x0F,
|
||||||
|
0x14, 0x15, 0x16, 0x17,
|
||||||
|
0x1C, 0x1D, 0x1E, 0x1F
|
||||||
|
};
|
||||||
|
return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
|
||||||
|
vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadd_pd (__m128d __X, __m128d __Y)
|
||||||
|
{
|
||||||
|
return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y),
|
||||||
|
vec_mergel ((__v2df) __X, (__v2df)__Y));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsub_pd (__m128d __X, __m128d __Y)
|
||||||
|
{
|
||||||
|
return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y),
|
||||||
|
vec_mergel ((__v2df) __X, (__v2df)__Y));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_movehdup_ps (__m128 __X)
|
||||||
|
{
|
||||||
|
return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_moveldup_ps (__m128 __X)
|
||||||
|
{
|
||||||
|
return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_loaddup_pd (double const *__P)
|
||||||
|
{
|
||||||
|
return (__m128d) vec_splats (*__P);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_movedup_pd (__m128d __X)
|
||||||
|
{
|
||||||
|
return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_lddqu_si128 (__m128i const *__P)
|
||||||
|
{
|
||||||
|
return (__m128i) (vec_vsx_ld(0, (signed int const *)__P));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */
|
||||||
|
|
||||||
|
#else
|
||||||
|
#include_next <pmmintrin.h>
|
||||||
|
#endif /* defined(__linux__) && defined(__ppc64__) */
|
||||||
|
|
||||||
|
#endif /* PMMINTRIN_H_ */
|
||||||
85
lib/include/ppc_wrappers/smmintrin.h
Normal file
85
lib/include/ppc_wrappers/smmintrin.h
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
/*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
|
||||||
|
*
|
||||||
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
* See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
*
|
||||||
|
*===-----------------------------------------------------------------------===
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Implemented from the specification included in the Intel C++ Compiler
|
||||||
|
User Guide and Reference, version 9.0.
|
||||||
|
|
||||||
|
NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
|
||||||
|
|
||||||
|
#ifndef NO_WARN_X86_INTRINSICS
|
||||||
|
/* This header is distributed to simplify porting x86_64 code that
|
||||||
|
makes explicit use of Intel intrinsics to powerp64/powerpc64le.
|
||||||
|
|
||||||
|
It is the user's responsibility to determine if the results are
|
||||||
|
acceptable and make additional changes as necessary.
|
||||||
|
|
||||||
|
Note that much code that uses Intel intrinsics can be rewritten in
|
||||||
|
standard C or GNU C extensions, which are more portable and better
|
||||||
|
optimized across multiple targets. */
|
||||||
|
#error \
|
||||||
|
"Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SMMINTRIN_H_
|
||||||
|
#define SMMINTRIN_H_
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ppc64__)
|
||||||
|
|
||||||
|
#include <altivec.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
extern __inline int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_extract_epi8(__m128i __X, const int __N) {
|
||||||
|
return (unsigned char)((__v16qi)__X)[__N & 15];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_extract_epi32(__m128i __X, const int __N) {
|
||||||
|
return ((__v4si)__X)[__N & 3];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_extract_epi64(__m128i __X, const int __N) {
|
||||||
|
return ((__v2di)__X)[__N & 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_extract_ps(__m128 __X, const int __N) {
|
||||||
|
return ((__v4si)__X)[__N & 3];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) {
|
||||||
|
__v16qi __charmask = vec_splats((signed char)__imm8);
|
||||||
|
__charmask = vec_gb(__charmask);
|
||||||
|
__v8hu __shortmask = (__v8hu)vec_unpackh(__charmask);
|
||||||
|
#ifdef __BIG_ENDIAN__
|
||||||
|
__shortmask = vec_reve(__shortmask);
|
||||||
|
#endif
|
||||||
|
return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) {
|
||||||
|
const __v16qu __seven = vec_splats((unsigned char)0x07);
|
||||||
|
__v16qu __lmask = vec_sra((__v16qu)__mask, __seven);
|
||||||
|
return (__m128i)vec_sel((__v16qu)__A, (__v16qu)__B, __lmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
#include_next <smmintrin.h>
|
||||||
|
#endif /* defined(__linux__) && defined(__ppc64__) */
|
||||||
|
|
||||||
|
#endif /* _SMMINTRIN_H_ */
|
||||||
495
lib/include/ppc_wrappers/tmmintrin.h
Normal file
495
lib/include/ppc_wrappers/tmmintrin.h
Normal file
@ -0,0 +1,495 @@
|
|||||||
|
/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
|
||||||
|
*
|
||||||
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
* See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
*
|
||||||
|
*===-----------------------------------------------------------------------===
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Implemented from the specification included in the Intel C++ Compiler
|
||||||
|
User Guide and Reference, version 9.0. */
|
||||||
|
|
||||||
|
#ifndef NO_WARN_X86_INTRINSICS
|
||||||
|
/* This header is distributed to simplify porting x86_64 code that
|
||||||
|
makes explicit use of Intel intrinsics to powerpc64le.
|
||||||
|
|
||||||
|
It is the user's responsibility to determine if the results are
|
||||||
|
acceptable and make additional changes as necessary.
|
||||||
|
|
||||||
|
Note that much code that uses Intel intrinsics can be rewritten in
|
||||||
|
standard C or GNU C extensions, which are more portable and better
|
||||||
|
optimized across multiple targets. */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TMMINTRIN_H_
|
||||||
|
#define TMMINTRIN_H_
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ppc64__)
|
||||||
|
|
||||||
|
#include <altivec.h>
|
||||||
|
|
||||||
|
/* We need definitions from the SSE header files. */
|
||||||
|
#include <pmmintrin.h>
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_abs_epi16 (__m128i __A)
|
||||||
|
{
|
||||||
|
return (__m128i) vec_abs ((__v8hi) __A);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_abs_epi32 (__m128i __A)
|
||||||
|
{
|
||||||
|
return (__m128i) vec_abs ((__v4si) __A);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_abs_epi8 (__m128i __A)
|
||||||
|
{
|
||||||
|
return (__m128i) vec_abs ((__v16qi) __A);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_abs_pi16 (__m64 __A)
|
||||||
|
{
|
||||||
|
__v8hi __B = (__v8hi) (__v2du) { __A, __A };
|
||||||
|
return (__m64) ((__v2du) vec_abs (__B))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_abs_pi32 (__m64 __A)
|
||||||
|
{
|
||||||
|
__v4si __B = (__v4si) (__v2du) { __A, __A };
|
||||||
|
return (__m64) ((__v2du) vec_abs (__B))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_abs_pi8 (__m64 __A)
|
||||||
|
{
|
||||||
|
__v16qi __B = (__v16qi) (__v2du) { __A, __A };
|
||||||
|
return (__m64) ((__v2du) vec_abs (__B))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count)
|
||||||
|
{
|
||||||
|
if (__builtin_constant_p (__count) && __count < 16)
|
||||||
|
{
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
__A = (__m128i) vec_reve ((__v16qu) __A);
|
||||||
|
__B = (__m128i) vec_reve ((__v16qu) __B);
|
||||||
|
#endif
|
||||||
|
__A = (__m128i) vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
__A = (__m128i) vec_reve ((__v16qu) __A);
|
||||||
|
#endif
|
||||||
|
return __A;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (__count == 0)
|
||||||
|
return __B;
|
||||||
|
|
||||||
|
if (__count >= 16)
|
||||||
|
{
|
||||||
|
if (__count >= 32)
|
||||||
|
{
|
||||||
|
const __v16qu zero = { 0 };
|
||||||
|
return (__m128i) zero;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const __v16qu __shift =
|
||||||
|
vec_splats ((unsigned char) ((__count - 16) * 8));
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
return (__m128i) vec_sro ((__v16qu) __A, __shift);
|
||||||
|
#else
|
||||||
|
return (__m128i) vec_slo ((__v16qu) __A, __shift);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const __v16qu __shiftA =
|
||||||
|
vec_splats ((unsigned char) ((16 - __count) * 8));
|
||||||
|
const __v16qu __shiftB = vec_splats ((unsigned char) (__count * 8));
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
__A = (__m128i) vec_slo ((__v16qu) __A, __shiftA);
|
||||||
|
__B = (__m128i) vec_sro ((__v16qu) __B, __shiftB);
|
||||||
|
#else
|
||||||
|
__A = (__m128i) vec_sro ((__v16qu) __A, __shiftA);
|
||||||
|
__B = (__m128i) vec_slo ((__v16qu) __B, __shiftB);
|
||||||
|
#endif
|
||||||
|
return (__m128i) vec_or ((__v16qu) __A, (__v16qu) __B);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_alignr_pi8 (__m64 __A, __m64 __B, unsigned int __count)
|
||||||
|
{
|
||||||
|
if (__count < 16)
|
||||||
|
{
|
||||||
|
__v2du __C = { __B, __A };
|
||||||
|
#ifdef __LITTLE_ENDIAN__
|
||||||
|
const __v4su __shift = { __count << 3, 0, 0, 0 };
|
||||||
|
__C = (__v2du) vec_sro ((__v16qu) __C, (__v16qu) __shift);
|
||||||
|
#else
|
||||||
|
const __v4su __shift = { 0, 0, 0, __count << 3 };
|
||||||
|
__C = (__v2du) vec_slo ((__v16qu) __C, (__v16qu) __shift);
|
||||||
|
#endif
|
||||||
|
return (__m64) __C[0];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const __m64 __zero = { 0 };
|
||||||
|
return __zero;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadd_epi16 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
|
||||||
|
__v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
|
||||||
|
__v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
|
||||||
|
return (__m128i) vec_add (__C, __D);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadd_epi32 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
|
||||||
|
__v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
|
||||||
|
__v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
|
||||||
|
return (__m128i) vec_add (__C, __D);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadd_pi16 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
__v8hi __C = (__v8hi) (__v2du) { __A, __B };
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
|
||||||
|
__v8hi __D = vec_perm (__C, __C, __Q);
|
||||||
|
__C = vec_perm (__C, __C, __P);
|
||||||
|
__C = vec_add (__C, __D);
|
||||||
|
return (__m64) ((__v2du) __C)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadd_pi32 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
__v4si __C = (__v4si) (__v2du) { __A, __B };
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||||
|
__v4si __D = vec_perm (__C, __C, __Q);
|
||||||
|
__C = vec_perm (__C, __C, __P);
|
||||||
|
__C = vec_add (__C, __D);
|
||||||
|
return (__m64) ((__v2du) __C)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadds_epi16 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
__v4si __C = { 0 }, __D = { 0 };
|
||||||
|
__C = vec_sum4s ((__v8hi) __A, __C);
|
||||||
|
__D = vec_sum4s ((__v8hi) __B, __D);
|
||||||
|
__C = (__v4si) vec_packs (__C, __D);
|
||||||
|
return (__m128i) __C;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hadds_pi16 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v4si __zero = { 0 };
|
||||||
|
__v8hi __C = (__v8hi) (__v2du) { __A, __B };
|
||||||
|
__v4si __D = vec_sum4s (__C, __zero);
|
||||||
|
__C = vec_packs (__D, __D);
|
||||||
|
return (__m64) ((__v2du) __C)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsub_epi16 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
|
||||||
|
__v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
|
||||||
|
__v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
|
||||||
|
return (__m128i) vec_sub (__C, __D);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsub_epi32 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
|
||||||
|
__v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
|
||||||
|
__v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
|
||||||
|
return (__m128i) vec_sub (__C, __D);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsub_pi16 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
|
||||||
|
__v8hi __C = (__v8hi) (__v2du) { __A, __B };
|
||||||
|
__v8hi __D = vec_perm (__C, __C, __Q);
|
||||||
|
__C = vec_perm (__C, __C, __P);
|
||||||
|
__C = vec_sub (__C, __D);
|
||||||
|
return (__m64) ((__v2du) __C)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsub_pi32 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||||
|
__v4si __C = (__v4si) (__v2du) { __A, __B };
|
||||||
|
__v4si __D = vec_perm (__C, __C, __Q);
|
||||||
|
__C = vec_perm (__C, __C, __P);
|
||||||
|
__C = vec_sub (__C, __D);
|
||||||
|
return (__m64) ((__v2du) __C)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsubs_epi16 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
|
||||||
|
__v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
|
||||||
|
__v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
|
||||||
|
return (__m128i) vec_subs (__C, __D);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_hsubs_pi16 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v16qu __P =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
|
||||||
|
const __v16qu __Q =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
|
||||||
|
__v8hi __C = (__v8hi) (__v2du) { __A, __B };
|
||||||
|
__v8hi __D = vec_perm (__C, __C, __P);
|
||||||
|
__v8hi __E = vec_perm (__C, __C, __Q);
|
||||||
|
__C = vec_subs (__D, __E);
|
||||||
|
return (__m64) ((__v2du) __C)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_shuffle_epi8 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v16qi __zero = { 0 };
|
||||||
|
__vector __bool char __select = vec_cmplt ((__v16qi) __B, __zero);
|
||||||
|
__v16qi __C = vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
|
||||||
|
return (__m128i) vec_sel (__C, __zero, __select);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_shuffle_pi8 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v16qi __zero = { 0 };
|
||||||
|
__v16qi __C = (__v16qi) (__v2du) { __A, __A };
|
||||||
|
__v16qi __D = (__v16qi) (__v2du) { __B, __B };
|
||||||
|
__vector __bool char __select = vec_cmplt ((__v16qi) __D, __zero);
|
||||||
|
__C = vec_perm ((__v16qi) __C, (__v16qi) __C, (__v16qu) __D);
|
||||||
|
__C = vec_sel (__C, __zero, __select);
|
||||||
|
return (__m64) ((__v2du) (__C))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_sign_epi8 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v16qi __zero = { 0 };
|
||||||
|
__v16qi __selectneg = (__v16qi) vec_cmplt ((__v16qi) __B, __zero);
|
||||||
|
__v16qi __selectpos =
|
||||||
|
(__v16qi) vec_neg ((__v16qi) vec_cmpgt ((__v16qi) __B, __zero));
|
||||||
|
__v16qi __conv = vec_add (__selectneg, __selectpos);
|
||||||
|
return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_sign_epi16 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v8hi __zero = { 0 };
|
||||||
|
__v8hi __selectneg = (__v8hi) vec_cmplt ((__v8hi) __B, __zero);
|
||||||
|
__v8hi __selectpos =
|
||||||
|
(__v8hi) vec_neg ((__v8hi) vec_cmpgt ((__v8hi) __B, __zero));
|
||||||
|
__v8hi __conv = vec_add (__selectneg, __selectpos);
|
||||||
|
return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_sign_epi32 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
const __v4si __zero = { 0 };
|
||||||
|
__v4si __selectneg = (__v4si) vec_cmplt ((__v4si) __B, __zero);
|
||||||
|
__v4si __selectpos =
|
||||||
|
(__v4si) vec_neg ((__v4si) vec_cmpgt ((__v4si) __B, __zero));
|
||||||
|
__v4si __conv = vec_add (__selectneg, __selectpos);
|
||||||
|
return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_sign_pi8 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v16qi __zero = { 0 };
|
||||||
|
__v16qi __C = (__v16qi) (__v2du) { __A, __A };
|
||||||
|
__v16qi __D = (__v16qi) (__v2du) { __B, __B };
|
||||||
|
__C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D);
|
||||||
|
return (__m64) ((__v2du) (__C))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_sign_pi16 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v8hi __zero = { 0 };
|
||||||
|
__v8hi __C = (__v8hi) (__v2du) { __A, __A };
|
||||||
|
__v8hi __D = (__v8hi) (__v2du) { __B, __B };
|
||||||
|
__C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D);
|
||||||
|
return (__m64) ((__v2du) (__C))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_sign_pi32 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
const __v4si __zero = { 0 };
|
||||||
|
__v4si __C = (__v4si) (__v2du) { __A, __A };
|
||||||
|
__v4si __D = (__v4si) (__v2du) { __B, __B };
|
||||||
|
__C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D);
|
||||||
|
return (__m64) ((__v2du) (__C))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maddubs_epi16 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
__v8hi __unsigned = vec_splats ((signed short) 0x00ff);
|
||||||
|
__v8hi __C = vec_and (vec_unpackh ((__v16qi) __A), __unsigned);
|
||||||
|
__v8hi __D = vec_and (vec_unpackl ((__v16qi) __A), __unsigned);
|
||||||
|
__v8hi __E = vec_unpackh ((__v16qi) __B);
|
||||||
|
__v8hi __F = vec_unpackl ((__v16qi) __B);
|
||||||
|
__C = vec_mul (__C, __E);
|
||||||
|
__D = vec_mul (__D, __F);
|
||||||
|
const __v16qu __odds =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
|
||||||
|
const __v16qu __evens =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
|
||||||
|
__E = vec_perm (__C, __D, __odds);
|
||||||
|
__F = vec_perm (__C, __D, __evens);
|
||||||
|
return (__m128i) vec_adds (__E, __F);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maddubs_pi16 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
__v8hi __C = (__v8hi) (__v2du) { __A, __A };
|
||||||
|
__C = vec_unpackl ((__v16qi) __C);
|
||||||
|
const __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
|
||||||
|
__C = vec_and (__C, __unsigned);
|
||||||
|
__v8hi __D = (__v8hi) (__v2du) { __B, __B };
|
||||||
|
__D = vec_unpackl ((__v16qi) __D);
|
||||||
|
__D = vec_mul (__C, __D);
|
||||||
|
const __v16qu __odds =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
|
||||||
|
const __v16qu __evens =
|
||||||
|
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
|
||||||
|
__C = vec_perm (__D, __D, __odds);
|
||||||
|
__D = vec_perm (__D, __D, __evens);
|
||||||
|
__C = vec_adds (__C, __D);
|
||||||
|
return (__m64) ((__v2du) (__C))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mulhrs_epi16 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
__v4si __C = vec_unpackh ((__v8hi) __A);
|
||||||
|
__v4si __D = vec_unpackh ((__v8hi) __B);
|
||||||
|
__C = vec_mul (__C, __D);
|
||||||
|
__D = vec_unpackl ((__v8hi) __A);
|
||||||
|
__v4si __E = vec_unpackl ((__v8hi) __B);
|
||||||
|
__D = vec_mul (__D, __E);
|
||||||
|
const __v4su __shift = vec_splats ((unsigned int) 14);
|
||||||
|
__C = vec_sr (__C, __shift);
|
||||||
|
__D = vec_sr (__D, __shift);
|
||||||
|
const __v4si __ones = vec_splats ((signed int) 1);
|
||||||
|
__C = vec_add (__C, __ones);
|
||||||
|
__C = vec_sr (__C, (__v4su) __ones);
|
||||||
|
__D = vec_add (__D, __ones);
|
||||||
|
__D = vec_sr (__D, (__v4su) __ones);
|
||||||
|
return (__m128i) vec_pack (__C, __D);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mulhrs_pi16 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
|
__v4si __C = (__v4si) (__v2du) { __A, __A };
|
||||||
|
__C = vec_unpackh ((__v8hi) __C);
|
||||||
|
__v4si __D = (__v4si) (__v2du) { __B, __B };
|
||||||
|
__D = vec_unpackh ((__v8hi) __D);
|
||||||
|
__C = vec_mul (__C, __D);
|
||||||
|
const __v4su __shift = vec_splats ((unsigned int) 14);
|
||||||
|
__C = vec_sr (__C, __shift);
|
||||||
|
const __v4si __ones = vec_splats ((signed int) 1);
|
||||||
|
__C = vec_add (__C, __ones);
|
||||||
|
__C = vec_sr (__C, (__v4su) __ones);
|
||||||
|
__v8hi __E = vec_pack (__C, __D);
|
||||||
|
return (__m64) ((__v2du) (__E))[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
#include_next <tmmintrin.h>
|
||||||
|
#endif /* defined(__linux__) && defined(__ppc64__) */
|
||||||
|
|
||||||
|
#endif /* TMMINTRIN_H_ */
|
||||||
@ -34,6 +34,8 @@
|
|||||||
#ifndef _XMMINTRIN_H_INCLUDED
|
#ifndef _XMMINTRIN_H_INCLUDED
|
||||||
#define _XMMINTRIN_H_INCLUDED
|
#define _XMMINTRIN_H_INCLUDED
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__ppc64__)
|
||||||
|
|
||||||
/* Define four value permute mask */
|
/* Define four value permute mask */
|
||||||
#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
|
#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
|
||||||
|
|
||||||
@ -1835,4 +1837,8 @@ do { \
|
|||||||
/* For backward source compatibility. */
|
/* For backward source compatibility. */
|
||||||
//# include <emmintrin.h>
|
//# include <emmintrin.h>
|
||||||
|
|
||||||
|
#else
|
||||||
|
#include_next <xmmintrin.h>
|
||||||
|
#endif /* defined(__linux__) && defined(__ppc64__) */
|
||||||
|
|
||||||
#endif /* _XMMINTRIN_H_INCLUDED */
|
#endif /* _XMMINTRIN_H_INCLUDED */
|
||||||
|
|||||||
14
lib/include/xmmintrin.h
vendored
14
lib/include/xmmintrin.h
vendored
@ -1627,7 +1627,7 @@ _mm_loadh_pi(__m128 __a, const __m64 *__p)
|
|||||||
struct __mm_loadh_pi_struct {
|
struct __mm_loadh_pi_struct {
|
||||||
__mm_loadh_pi_v2f32 __u;
|
__mm_loadh_pi_v2f32 __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
__mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;
|
__mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u;
|
||||||
__m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
|
__m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
|
||||||
return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
|
return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
|
||||||
}
|
}
|
||||||
@ -1654,7 +1654,7 @@ _mm_loadl_pi(__m128 __a, const __m64 *__p)
|
|||||||
struct __mm_loadl_pi_struct {
|
struct __mm_loadl_pi_struct {
|
||||||
__mm_loadl_pi_v2f32 __u;
|
__mm_loadl_pi_v2f32 __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
__mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;
|
__mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u;
|
||||||
__m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
|
__m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
|
||||||
return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
|
return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
|
||||||
}
|
}
|
||||||
@ -1680,7 +1680,7 @@ _mm_load_ss(const float *__p)
|
|||||||
struct __mm_load_ss_struct {
|
struct __mm_load_ss_struct {
|
||||||
float __u;
|
float __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
float __u = ((struct __mm_load_ss_struct*)__p)->__u;
|
float __u = ((const struct __mm_load_ss_struct*)__p)->__u;
|
||||||
return __extension__ (__m128){ __u, 0, 0, 0 };
|
return __extension__ (__m128){ __u, 0, 0, 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1702,7 +1702,7 @@ _mm_load1_ps(const float *__p)
|
|||||||
struct __mm_load1_ps_struct {
|
struct __mm_load1_ps_struct {
|
||||||
float __u;
|
float __u;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
float __u = ((struct __mm_load1_ps_struct*)__p)->__u;
|
float __u = ((const struct __mm_load1_ps_struct*)__p)->__u;
|
||||||
return __extension__ (__m128){ __u, __u, __u, __u };
|
return __extension__ (__m128){ __u, __u, __u, __u };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1722,7 +1722,7 @@ _mm_load1_ps(const float *__p)
|
|||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||||
_mm_load_ps(const float *__p)
|
_mm_load_ps(const float *__p)
|
||||||
{
|
{
|
||||||
return *(__m128*)__p;
|
return *(const __m128*)__p;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads a 128-bit floating-point vector of [4 x float] from an
|
/// Loads a 128-bit floating-point vector of [4 x float] from an
|
||||||
@ -1742,7 +1742,7 @@ _mm_loadu_ps(const float *__p)
|
|||||||
struct __loadu_ps {
|
struct __loadu_ps {
|
||||||
__m128_u __v;
|
__m128_u __v;
|
||||||
} __attribute__((__packed__, __may_alias__));
|
} __attribute__((__packed__, __may_alias__));
|
||||||
return ((struct __loadu_ps*)__p)->__v;
|
return ((const struct __loadu_ps*)__p)->__v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loads four packed float values, in reverse order, from an aligned
|
/// Loads four packed float values, in reverse order, from an aligned
|
||||||
@ -2100,7 +2100,7 @@ _mm_storer_ps(float *__p, __m128 __a)
|
|||||||
/// be generated. \n
|
/// be generated. \n
|
||||||
/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
|
/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
|
||||||
/// be generated.
|
/// be generated.
|
||||||
#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \
|
#define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \
|
||||||
((sel) >> 2) & 1, (sel) & 0x3))
|
((sel) >> 2) & 1, (sel) & 0x3))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user