From 74872263cc83bb7cbd8d548e83b441f4bf2f2249 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 22 Jan 2020 17:42:02 -0500 Subject: [PATCH] lib/headers: update to clang 10.x C headers upstream revision: 3cce3790072249cbe51b96cea26bc78019c11fd0 --- lib/include/__clang_cuda_intrinsics.h | 10 +- lib/include/altivec.h | 145 +- lib/include/arm_acle.h | 50 +- lib/include/arm_cmse.h | 217 + lib/include/arm_fp16.h | 983 +- lib/include/arm_mve.h | 12563 ++++++++++++++++++ lib/include/arm_neon.h | 16829 +++++------------------- lib/include/avx512bwintrin.h | 12 +- lib/include/avx512fintrin.h | 55 +- lib/include/avx512vlbwintrin.h | 24 +- lib/include/avx512vlintrin.h | 112 +- lib/include/avxintrin.h | 10 +- lib/include/bmiintrin.h | 175 +- lib/include/cpuid.h | 4 +- lib/include/emmintrin.h | 28 +- lib/include/ia32intrin.h | 68 + lib/include/immintrin.h | 9 +- lib/include/intrin.h | 26 +- lib/include/mwaitxintrin.h | 4 +- lib/include/opencl-c-base.h | 2 +- lib/include/pmmintrin.h | 2 +- lib/include/ppc_wrappers/emmintrin.h | 10 +- lib/include/ppc_wrappers/mm_malloc.h | 6 + lib/include/ppc_wrappers/mmintrin.h | 7 + lib/include/ppc_wrappers/pmmintrin.h | 150 + lib/include/ppc_wrappers/smmintrin.h | 85 + lib/include/ppc_wrappers/tmmintrin.h | 495 + lib/include/ppc_wrappers/xmmintrin.h | 6 + lib/include/xmmintrin.h | 14 +- 29 files changed, 17144 insertions(+), 14957 deletions(-) create mode 100644 lib/include/arm_cmse.h create mode 100644 lib/include/arm_mve.h create mode 100644 lib/include/ppc_wrappers/pmmintrin.h create mode 100644 lib/include/ppc_wrappers/smmintrin.h create mode 100644 lib/include/ppc_wrappers/tmmintrin.h diff --git a/lib/include/__clang_cuda_intrinsics.h b/lib/include/__clang_cuda_intrinsics.h index 2970d17f89..b67461a146 100644 --- a/lib/include/__clang_cuda_intrinsics.h +++ b/lib/include/__clang_cuda_intrinsics.h @@ -211,7 +211,15 @@ inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) { return __nvvm_vote_ballot_sync(mask, pred); } -inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); } +inline __device__ unsigned int __activemask() { +#if CUDA_VERSION < 9020 + return __nvvm_vote_ballot(1); +#else + unsigned int mask; + asm volatile("activemask.b32 %0;" : "=r"(mask)); + return mask; +#endif +} inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) { return __nvvm_fns(mask, base, offset); diff --git a/lib/include/altivec.h b/lib/include/altivec.h index 4008440b2b..7e231a2a42 100644 --- a/lib/include/altivec.h +++ b/lib/include/altivec.h @@ -2761,8 +2761,8 @@ static __inline__ vector double __ATTRS_o_ai vec_xl_len(double *__a, return (vector double)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector double __ATTRS_o_ai vec_xl_len_r(unsigned char *__a, - size_t __b) { +static __inline__ vector unsigned char __ATTRS_o_ai +vec_xl_len_r(unsigned char *__a, size_t __b) { vector unsigned char __res = (vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56)); #ifdef __LITTLE_ENDIAN__ @@ -2876,9 +2876,10 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, #ifdef __VSX__ #define vec_ctf(__a, __b) \ _Generic((__a), vector int \ - : (vector float)__builtin_altivec_vcfsx((__a), (__b)), \ + : (vector float)__builtin_altivec_vcfsx((vector int)(__a), (__b)), \ vector unsigned int \ - : (vector float)__builtin_altivec_vcfux((vector int)(__a), (__b)), \ + : (vector float)__builtin_altivec_vcfux((vector unsigned int)(__a), \ + (__b)), \ vector unsigned long long \ : (__builtin_convertvector((vector unsigned long long)(__a), \ vector double) * \ @@ -2892,9 +2893,10 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, #else #define vec_ctf(__a, __b) \ _Generic((__a), vector int \ - : (vector float)__builtin_altivec_vcfsx((__a), (__b)), \ + : (vector float)__builtin_altivec_vcfsx((vector int)(__a), (__b)), \ vector unsigned int \ - : (vector float)__builtin_altivec_vcfux((vector int)(__a), (__b))) + : (vector float)__builtin_altivec_vcfux((vector unsigned int)(__a), \ + (__b))) #endif /* vec_vcfsx */ @@ -2910,10 +2912,11 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, #ifdef __VSX__ #define vec_cts(__a, __b) \ _Generic((__a), vector float \ - : __builtin_altivec_vctsxs((__a), (__b)), vector double \ + : __builtin_altivec_vctsxs((vector float)(__a), (__b)), \ + vector double \ : __extension__({ \ vector double __ret = \ - (__a) * \ + (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + (__b)) \ << 52); \ __builtin_convertvector(__ret, vector signed long long); \ @@ -2931,10 +2934,11 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, #ifdef __VSX__ #define vec_ctu(__a, __b) \ _Generic((__a), vector float \ - : __builtin_altivec_vctuxs((__a), (__b)), vector double \ + : __builtin_altivec_vctuxs((vector float)(__a), (__b)), \ + vector double \ : __extension__({ \ vector double __ret = \ - (__a) * \ + (vector double)(__a) * \ (vector double)(vector unsigned long long)((0x3ffULL + __b) \ << 52); \ __builtin_convertvector(__ret, vector unsigned long long); \ @@ -3286,9 +3290,7 @@ static __inline__ vector double __ATTRS_o_ai vec_div(vector double __a, /* vec_dss */ -static __inline__ void __attribute__((__always_inline__)) vec_dss(int __a) { - __builtin_altivec_dss(__a); -} +#define vec_dss __builtin_altivec_dss /* vec_dssall */ @@ -6301,19 +6303,20 @@ static __inline__ vector float __ATTRS_o_ai vec_or(vector float __a, #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_or(vector bool long long __a, vector double __b) { - return (vector unsigned long long)__a | (vector unsigned long long)__b; + return (vector double)((vector unsigned long long)__a | + (vector unsigned long long)__b); } static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a, vector bool long long __b) { - return (vector unsigned long long)__a | (vector unsigned long long)__b; + return (vector double)((vector unsigned long long)__a | + (vector unsigned long long)__b); } static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a, vector double __b) { - vector unsigned long long __res = - (vector unsigned long long)__a | (vector unsigned long long)__b; - return (vector double)__res; + return (vector double)((vector unsigned long long)__a | + (vector unsigned long long)__b); } static __inline__ vector signed long long __ATTRS_o_ai @@ -14781,7 +14784,7 @@ static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a, static __inline__ int __ATTRS_o_ai vec_all_ne(vector float __a, vector float __b) { #ifdef __VSX__ - return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b); + return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ, __a, __b); #else return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b); #endif @@ -16361,27 +16364,32 @@ vec_xl(signed long long __offset, unsigned char *__ptr) { static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset, signed short *__ptr) { - return *(unaligned_vec_sshort *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_sshort *)__addr; } static inline __ATTRS_o_ai vector unsigned short vec_xl(signed long long __offset, unsigned short *__ptr) { - return *(unaligned_vec_ushort *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_ushort *)__addr; } static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset, signed int *__ptr) { - return *(unaligned_vec_sint *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_sint *)__addr; } static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset, unsigned int *__ptr) { - return *(unaligned_vec_uint *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_uint *)__addr; } static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset, float *__ptr) { - return *(unaligned_vec_float *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_float *)__addr; } #ifdef __VSX__ @@ -16391,17 +16399,20 @@ typedef vector double unaligned_vec_double __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed long long vec_xl(signed long long __offset, signed long long *__ptr) { - return *(unaligned_vec_sll *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_sll *)__addr; } static inline __ATTRS_o_ai vector unsigned long long vec_xl(signed long long __offset, unsigned long long *__ptr) { - return *(unaligned_vec_ull *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_ull *)__addr; } static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset, double *__ptr) { - return *(unaligned_vec_double *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_double *)__addr; } #endif @@ -16411,12 +16422,14 @@ typedef vector unsigned __int128 unaligned_vec_ui128 __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed __int128 vec_xl(signed long long __offset, signed __int128 *__ptr) { - return *(unaligned_vec_si128 *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_si128 *)__addr; } static inline __ATTRS_o_ai vector unsigned __int128 vec_xl(signed long long __offset, unsigned __int128 *__ptr) { - return *(unaligned_vec_ui128 *)(__ptr + __offset); + signed char *__addr = (signed char *)__ptr + __offset; + return *(unaligned_vec_ui128 *)__addr; } #endif @@ -16425,27 +16438,27 @@ vec_xl(signed long long __offset, unsigned __int128 *__ptr) { #ifdef __LITTLE_ENDIAN__ static __inline__ vector signed char __ATTRS_o_ai vec_xl_be(signed long long __offset, signed char *__ptr) { - vector signed char __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector unsigned char __ATTRS_o_ai vec_xl_be(signed long long __offset, unsigned char *__ptr) { - vector unsigned char __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector signed short __ATTRS_o_ai vec_xl_be(signed long long __offset, signed short *__ptr) { - vector signed short __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector unsigned short __ATTRS_o_ai vec_xl_be(signed long long __offset, unsigned short *__ptr) { - vector unsigned short __vec = __builtin_vsx_lxvd2x_be(__offset, __ptr); + vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } @@ -16513,50 +16526,58 @@ static inline __ATTRS_o_ai void vec_xst(vector unsigned char __vec, static inline __ATTRS_o_ai void vec_xst(vector signed short __vec, signed long long __offset, signed short *__ptr) { - *(unaligned_vec_sshort *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_sshort *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec, signed long long __offset, unsigned short *__ptr) { - *(unaligned_vec_ushort *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_ushort *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector signed int __vec, signed long long __offset, signed int *__ptr) { - *(unaligned_vec_sint *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_sint *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec, signed long long __offset, unsigned int *__ptr) { - *(unaligned_vec_uint *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_uint *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector float __vec, signed long long __offset, float *__ptr) { - *(unaligned_vec_float *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_float *)__addr = __vec; } #ifdef __VSX__ static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec, signed long long __offset, signed long long *__ptr) { - *(unaligned_vec_sll *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_sll *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec, signed long long __offset, unsigned long long *__ptr) { - *(unaligned_vec_ull *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_ull *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector double __vec, signed long long __offset, double *__ptr) { - *(unaligned_vec_double *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_double *)__addr = __vec; } #endif @@ -16564,13 +16585,15 @@ static inline __ATTRS_o_ai void vec_xst(vector double __vec, static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec, signed long long __offset, signed __int128 *__ptr) { - *(unaligned_vec_si128 *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_si128 *)__addr = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, signed long long __offset, unsigned __int128 *__ptr) { - *(unaligned_vec_ui128 *)(__ptr + __offset) = __vec; + signed char *__addr = (signed char *)__ptr + __offset; + *(unaligned_vec_ui128 *)__addr = __vec; } #endif @@ -16583,7 +16606,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed char __vec, vector signed char __tmp = __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); + typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; + __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned char __vec, @@ -16592,7 +16616,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned char __vec, vector unsigned char __tmp = __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); + typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; + __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed short __vec, @@ -16600,7 +16625,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed short __vec, signed short *__ptr) { vector signed short __tmp = __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); - __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); + typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; + __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned short __vec, @@ -16608,7 +16634,8 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned short __vec, unsigned short *__ptr) { vector unsigned short __tmp = __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); - __builtin_vsx_stxvd2x_be(__tmp, __offset, __ptr); + typedef __attribute__((vector_size(sizeof(__tmp)))) double __vector_double; + __builtin_vsx_stxvd2x_be((__vector_double)__tmp, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed int __vec, @@ -16620,32 +16647,32 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed int __vec, static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned int __vec, signed long long __offset, unsigned int *__ptr) { - __builtin_vsx_stxvw4x_be(__vec, __offset, __ptr); + __builtin_vsx_stxvw4x_be((vector int)__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector float __vec, signed long long __offset, float *__ptr) { - __builtin_vsx_stxvw4x_be(__vec, __offset, __ptr); + __builtin_vsx_stxvw4x_be((vector int)__vec, __offset, __ptr); } #ifdef __VSX__ static __inline__ void __ATTRS_o_ai vec_xst_be(vector signed long long __vec, signed long long __offset, signed long long *__ptr) { - __builtin_vsx_stxvd2x_be(__vec, __offset, __ptr); + __builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned long long __vec, signed long long __offset, unsigned long long *__ptr) { - __builtin_vsx_stxvd2x_be(__vec, __offset, __ptr); + __builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr); } static __inline__ void __ATTRS_o_ai vec_xst_be(vector double __vec, signed long long __offset, double *__ptr) { - __builtin_vsx_stxvd2x_be(__vec, __offset, __ptr); + __builtin_vsx_stxvd2x_be((vector double)__vec, __offset, __ptr); } #endif @@ -16667,13 +16694,13 @@ static __inline__ void __ATTRS_o_ai vec_xst_be(vector unsigned __int128 __vec, #endif #ifdef __POWER9_VECTOR__ -#define vec_test_data_class(__a, __b) \ - _Generic((__a), \ - vector float: \ - (vector bool int)__builtin_vsx_xvtstdcsp((__a), (__b)), \ - vector double: \ - (vector bool long long)__builtin_vsx_xvtstdcdp((__a), (__b)) \ - ) +#define vec_test_data_class(__a, __b) \ + _Generic( \ + (__a), vector float \ + : (vector bool int)__builtin_vsx_xvtstdcsp((vector float)(__a), (__b)), \ + vector double \ + : (vector bool long long)__builtin_vsx_xvtstdcdp((vector double)(__a), \ + (__b))) #endif /* #ifdef __POWER9_VECTOR__ */ diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h index 096cc261af..596ea03cff 100644 --- a/lib/include/arm_acle.h +++ b/lib/include/arm_acle.h @@ -90,9 +90,11 @@ __swp(uint32_t __x, volatile uint32_t *__p) { #endif /* 8.7 NOP */ +#if !defined(_MSC_VER) || !defined(__aarch64__) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) { __builtin_arm_nop(); } +#endif /* 9 DATA-PROCESSING INTRINSICS */ /* 9.2 Miscellaneous data-processing intrinsics */ @@ -139,6 +141,26 @@ __clzll(uint64_t __t) { return __builtin_clzll(__t); } +/* CLS */ +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__cls(uint32_t __t) { + return __builtin_arm_cls(__t); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__clsl(unsigned long __t) { +#if __SIZEOF_LONG__ == 4 + return __builtin_arm_cls(__t); +#else + return __builtin_arm_cls64(__t); +#endif +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__clsll(uint64_t __t) { + return __builtin_arm_cls64(__t); +} + /* REV */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev(uint32_t __t) { @@ -609,11 +631,15 @@ __jcvt(double __a) { #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg) +#define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg)) +#define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg)) #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v) #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) +#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v)) +#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v)) -// Memory Tagging Extensions (MTE) Intrinsics +/* Memory Tagging Extensions (MTE) Intrinsics */ #if __ARM_FEATURE_MEMORY_TAGGING #define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask) #define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset) @@ -623,6 +649,28 @@ __jcvt(double __a) { #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) #endif +/* Transactional Memory Extension (TME) Intrinsics */ +#if __ARM_FEATURE_TME + +#define _TMFAILURE_REASON 0x00007fffu +#define _TMFAILURE_RTRY 0x00008000u +#define _TMFAILURE_CNCL 0x00010000u +#define _TMFAILURE_MEM 0x00020000u +#define _TMFAILURE_IMP 0x00040000u +#define _TMFAILURE_ERR 0x00080000u +#define _TMFAILURE_SIZE 0x00100000u +#define _TMFAILURE_NEST 0x00200000u +#define _TMFAILURE_DBG 0x00400000u +#define _TMFAILURE_INT 0x00800000u +#define _TMFAILURE_TRIVIAL 0x01000000u + +#define __tstart() __builtin_arm_tstart() +#define __tcommit() __builtin_arm_tcommit() +#define __tcancel(__arg) __builtin_arm_tcancel(__arg) +#define __ttest() __builtin_arm_ttest() + +#endif /* __ARM_FEATURE_TME */ + #if defined(__cplusplus) } #endif diff --git a/lib/include/arm_cmse.h b/lib/include/arm_cmse.h new file mode 100644 index 0000000000..ecf50ecc5c --- /dev/null +++ b/lib/include/arm_cmse.h @@ -0,0 +1,217 @@ +//===---- arm_cmse.h - Arm CMSE support -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __ARM_CMSE_H +#define __ARM_CMSE_H + +#if (__ARM_FEATURE_CMSE & 0x1) +#include +#include + +#define __ARM_CMSE_SECURE_MODE (__ARM_FEATURE_CMSE & 0x2) +#define CMSE_MPU_READWRITE 1 /* checks if readwrite_ok field is set */ +#define CMSE_AU_NONSECURE 2 /* checks if permissions have secure field unset */ +#define CMSE_MPU_UNPRIV 4 /* sets T flag on TT insrtuction */ +#define CMSE_MPU_READ 8 /* checks if read_ok field is set */ +#define CMSE_MPU_NONSECURE 16 /* sets A flag, checks if secure field unset */ +#define CMSE_NONSECURE (CMSE_AU_NONSECURE | CMSE_MPU_NONSECURE) + +#define cmse_check_pointed_object(p, f) \ + cmse_check_address_range((p), sizeof(*(p)), (f)) + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef union { + struct cmse_address_info { +#ifdef __ARM_BIG_ENDIAN + /* __ARM_BIG_ENDIAN */ +#if (__ARM_CMSE_SECURE_MODE) + unsigned idau_region : 8; + unsigned idau_region_valid : 1; + unsigned secure : 1; + unsigned nonsecure_readwrite_ok : 1; + unsigned nonsecure_read_ok : 1; +#else + unsigned : 12; +#endif + unsigned readwrite_ok : 1; + unsigned read_ok : 1; +#if (__ARM_CMSE_SECURE_MODE) + unsigned sau_region_valid : 1; +#else + unsigned : 1; +#endif + unsigned mpu_region_valid : 1; +#if (__ARM_CMSE_SECURE_MODE) + unsigned sau_region : 8; +#else + unsigned : 8; +#endif + unsigned mpu_region : 8; + +#else /* __ARM_LITTLE_ENDIAN */ + unsigned mpu_region : 8; +#if (__ARM_CMSE_SECURE_MODE) + unsigned sau_region : 8; +#else + unsigned : 8; +#endif + unsigned mpu_region_valid : 1; +#if (__ARM_CMSE_SECURE_MODE) + unsigned sau_region_valid : 1; +#else + unsigned : 1; +#endif + unsigned read_ok : 1; + unsigned readwrite_ok : 1; +#if (__ARM_CMSE_SECURE_MODE) + unsigned nonsecure_read_ok : 1; + unsigned nonsecure_readwrite_ok : 1; + unsigned secure : 1; + unsigned idau_region_valid : 1; + unsigned idau_region : 8; +#else + unsigned : 12; +#endif +#endif /*__ARM_LITTLE_ENDIAN */ + } flags; + unsigned value; +} cmse_address_info_t; + +static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) +cmse_TT(void *__p) { + cmse_address_info_t __u; + __u.value = __builtin_arm_cmse_TT(__p); + return __u; +} +static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) +cmse_TTT(void *__p) { + cmse_address_info_t __u; + __u.value = __builtin_arm_cmse_TTT(__p); + return __u; +} + +#if __ARM_CMSE_SECURE_MODE +static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) +cmse_TTA(void *__p) { + cmse_address_info_t __u; + __u.value = __builtin_arm_cmse_TTA(__p); + return __u; +} +static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) +cmse_TTAT(void *__p) { + cmse_address_info_t __u; + __u.value = __builtin_arm_cmse_TTAT(__p); + return __u; +} +#endif + +#define cmse_TT_fptr(p) cmse_TT(__builtin_bit_cast(void *, (p))) +#define cmse_TTT_fptr(p) cmse_TTT(__builtin_bit_cast(void *, (p))) + +#if __ARM_CMSE_SECURE_MODE +#define cmse_TTA_fptr(p) cmse_TTA(__builtin_bit_cast(void *, (p))) +#define cmse_TTAT_fptr(p) cmse_TTAT(__builtin_bit_cast(void *, (p))) +#endif + +static void *__attribute__((__always_inline__)) +cmse_check_address_range(void *__pb, size_t __s, int __flags) { + uintptr_t __begin = (uintptr_t)__pb; + uintptr_t __end = __begin + __s - 1; + + if (__end < __begin) + return NULL; /* wrap around check */ + + /* Check whether the range crosses a 32-bytes aligned address */ + const int __single_check = (__begin ^ __end) < 0x20u; + + /* execute the right variant of the TT instructions */ + void *__pe = (void *)__end; + cmse_address_info_t __permb, __perme; + switch (__flags & (CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) { + case 0: + __permb = cmse_TT(__pb); + __perme = __single_check ? __permb : cmse_TT(__pe); + break; + case CMSE_MPU_UNPRIV: + __permb = cmse_TTT(__pb); + __perme = __single_check ? __permb : cmse_TTT(__pe); + break; +#if __ARM_CMSE_SECURE_MODE + case CMSE_MPU_NONSECURE: + __permb = cmse_TTA(__pb); + __perme = __single_check ? __permb : cmse_TTA(__pe); + break; + case CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE: + __permb = cmse_TTAT(__pb); + __perme = __single_check ? __permb : cmse_TTAT(__pe); + break; +#endif + /* if CMSE_NONSECURE is specified w/o __ARM_CMSE_SECURE_MODE */ + default: + return NULL; + } + + /* check that the range does not cross MPU, SAU, or IDAU region boundaries */ + if (__permb.value != __perme.value) + return NULL; +#if !(__ARM_CMSE_SECURE_MODE) + /* CMSE_AU_NONSECURE is only supported when __ARM_FEATURE_CMSE & 0x2 */ + if (__flags & CMSE_AU_NONSECURE) + return NULL; +#endif + + /* check the permission on the range */ + switch (__flags & ~(CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) { +#if (__ARM_CMSE_SECURE_MODE) + case CMSE_MPU_READ | CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: + case CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: + return __permb.flags.nonsecure_readwrite_ok ? __pb : NULL; + + case CMSE_MPU_READ | CMSE_AU_NONSECURE: + return __permb.flags.nonsecure_read_ok ? __pb : NULL; + + case CMSE_AU_NONSECURE: + return __permb.flags.secure ? NULL : __pb; +#endif + case CMSE_MPU_READ | CMSE_MPU_READWRITE: + case CMSE_MPU_READWRITE: + return __permb.flags.readwrite_ok ? __pb : NULL; + + case CMSE_MPU_READ: + return __permb.flags.read_ok ? __pb : NULL; + + default: + return NULL; + } +} + +#if __ARM_CMSE_SECURE_MODE +static int __attribute__((__always_inline__, __nodebug__)) +cmse_nonsecure_caller(void) { + return !((uintptr_t)__builtin_return_address(0) & 1); +} + +#define cmse_nsfptr_create(p) \ + __builtin_bit_cast(__typeof__(p), \ + (__builtin_bit_cast(uintptr_t, p) & ~(uintptr_t)1)) + +#define cmse_is_nsfptr(p) ((__builtin_bit_cast(uintptr_t, p) & 1) == 0) + +#endif /* __ARM_CMSE_SECURE_MODE */ + +void __attribute__((__noreturn__)) cmse_abort(void); +#if defined(__cplusplus) +} +#endif + +#endif /* (__ARM_FEATURE_CMSE & 0x1) */ + +#endif /* __ARM_CMSE_H */ diff --git a/lib/include/arm_fp16.h b/lib/include/arm_fp16.h index de5446508b..ce993ce3c8 100644 --- a/lib/include/arm_fp16.h +++ b/lib/include/arm_fp16.h @@ -30,7 +30,6 @@ typedef __fp16 float16_t; #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__) -#ifdef __LITTLE_ENDIAN__ #define vabdh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -38,33 +37,12 @@ typedef __fp16 float16_t; __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vabdh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vabsh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \ __ret; \ }) -#else -#define vabsh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vaddh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -72,17 +50,6 @@ typedef __fp16 float16_t; __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vaddh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcageh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -90,17 +57,6 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vcageh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcagth_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -108,17 +64,6 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \ __ret; \ }) -#else -#define vcagth_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcaleh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -126,17 +71,6 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vcaleh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcalth_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -144,17 +78,6 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \ __ret; \ }) -#else -#define vcalth_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vceqh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -162,33 +85,12 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vceqh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vceqzh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \ __ret; \ }) -#else -#define vceqzh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcgeh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -196,33 +98,12 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vcgeh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcgezh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \ __ret; \ }) -#else -#define vcgezh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcgth_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -230,33 +111,12 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \ __ret; \ }) -#else -#define vcgth_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcgtzh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \ __ret; \ }) -#else -#define vcgtzh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcleh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -264,33 +124,12 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vcleh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vclezh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \ __ret; \ }) -#else -#define vclezh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vclth_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -298,789 +137,300 @@ typedef __fp16 float16_t; __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \ __ret; \ }) -#else -#define vclth_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcltzh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \ __ret; \ }) -#else -#define vcltzh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ int16_t __ret; \ __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ uint32_t __ret; \ __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_s16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int16_t __ret; \ __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \ __ret; \ }) -#else -#define vcvth_s16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_s32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \ __ret; \ }) -#else -#define vcvth_s32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_s64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \ __ret; \ }) -#else -#define vcvth_s64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_u16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \ __ret; \ }) -#else -#define vcvth_u16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_u32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint32_t __ret; \ __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \ __ret; \ }) -#else -#define vcvth_u32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_u64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \ __ret; \ }) -#else -#define vcvth_u64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtah_s16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int16_t __ret; \ __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \ __ret; \ }) -#else -#define vcvtah_s16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtah_s32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \ __ret; \ }) -#else -#define vcvtah_s32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtah_s64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \ __ret; \ }) -#else -#define vcvtah_s64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtah_u16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \ __ret; \ }) -#else -#define vcvtah_u16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtah_u32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint32_t __ret; \ __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \ __ret; \ }) -#else -#define vcvtah_u32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtah_u64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \ __ret; \ }) -#else -#define vcvtah_u64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \ +#define vcvth_f16_u16(__p0) __extension__ ({ \ + uint16_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__s0); \ + __ret; \ +}) +#define vcvth_f16_s16(__p0) __extension__ ({ \ + int16_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__s0); \ + __ret; \ +}) +#define vcvth_f16_u32(__p0) __extension__ ({ \ + uint32_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__s0); \ + __ret; \ +}) +#define vcvth_f16_s32(__p0) __extension__ ({ \ + int32_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__s0); \ + __ret; \ +}) +#define vcvth_f16_u64(__p0) __extension__ ({ \ + uint64_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__s0); \ + __ret; \ +}) +#define vcvth_f16_s64(__p0) __extension__ ({ \ + int64_t __s0 = __p0; \ + float16_t __ret; \ + __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__s0); \ __ret; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16_t vcvth_f16_u32(uint32_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0); - return __ret; -} -#else -__ai float16_t vcvth_f16_u32(uint32_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16_t vcvth_f16_u64(uint64_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0); - return __ret; -} -#else -__ai float16_t vcvth_f16_u64(uint64_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16_t vcvth_f16_u16(uint16_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0); - return __ret; -} -#else -__ai float16_t vcvth_f16_u16(uint16_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16_t vcvth_f16_s32(int32_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0); - return __ret; -} -#else -__ai float16_t vcvth_f16_s32(int32_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16_t vcvth_f16_s64(int64_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0); - return __ret; -} -#else -__ai float16_t vcvth_f16_s64(int64_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16_t vcvth_f16_s16(int16_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0); - return __ret; -} -#else -__ai float16_t vcvth_f16_s16(int16_t __p0) { - float16_t __ret; - __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \ uint32_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \ - __ret; \ -}) -#else -#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \ - __ret; \ -}) -#else -#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \ int32_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ +#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \ + __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \ __ret; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \ int64_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ +#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \ + __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \ __ret; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \ int16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \ __ret; \ }) -#else -#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtmh_s16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int16_t __ret; \ __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \ __ret; \ }) -#else -#define vcvtmh_s16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtmh_s32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \ __ret; \ }) -#else -#define vcvtmh_s32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtmh_s64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \ __ret; \ }) -#else -#define vcvtmh_s64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtmh_u16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \ __ret; \ }) -#else -#define vcvtmh_u16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtmh_u32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint32_t __ret; \ __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \ __ret; \ }) -#else -#define vcvtmh_u32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtmh_u64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \ __ret; \ }) -#else -#define vcvtmh_u64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtnh_s16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int16_t __ret; \ __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \ __ret; \ }) -#else -#define vcvtnh_s16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtnh_s32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \ __ret; \ }) -#else -#define vcvtnh_s32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtnh_s64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \ __ret; \ }) -#else -#define vcvtnh_s64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtnh_u16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \ __ret; \ }) -#else -#define vcvtnh_u16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtnh_u32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint32_t __ret; \ __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \ __ret; \ }) -#else -#define vcvtnh_u32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtnh_u64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \ __ret; \ }) -#else -#define vcvtnh_u64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtph_s16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int16_t __ret; \ __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \ __ret; \ }) -#else -#define vcvtph_s16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtph_s32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \ __ret; \ }) -#else -#define vcvtph_s32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtph_s64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \ __ret; \ }) -#else -#define vcvtph_s64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtph_u16_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint16_t __ret; \ __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \ __ret; \ }) -#else -#define vcvtph_u16_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtph_u32_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint32_t __ret; \ __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \ __ret; \ }) -#else -#define vcvtph_u32_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtph_u64_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \ __ret; \ }) -#else -#define vcvtph_u64_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vdivh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1088,17 +438,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vdivh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1107,18 +446,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \ __ret; \ }) -#else -#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1127,18 +454,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \ __ret; \ }) -#else -#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __s2 = __p2; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vmaxh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1146,17 +461,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vmaxh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vmaxnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1164,17 +468,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vminh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1182,17 +475,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vminh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vminnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1200,17 +482,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vminnmh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vmulh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1218,17 +489,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vmulh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vmulxh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1236,49 +496,18 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vmulxh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vnegh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \ __ret; \ }) -#else -#define vnegh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrecpeh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \ __ret; \ }) -#else -#define vrecpeh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrecpsh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1286,161 +515,60 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vrecpsh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrecpxh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \ __ret; \ }) -#else -#define vrecpxh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrndh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \ __ret; \ }) -#else -#define vrndh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrndah_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \ __ret; \ }) -#else -#define vrndah_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrndih_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \ __ret; \ }) -#else -#define vrndih_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrndmh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \ __ret; \ }) -#else -#define vrndmh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrndnh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \ __ret; \ }) -#else -#define vrndnh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrndph_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \ __ret; \ }) -#else -#define vrndph_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrndxh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \ __ret; \ }) -#else -#define vrndxh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrsqrteh_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \ __ret; \ }) -#else -#define vrsqrteh_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1448,33 +576,12 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vsqrth_f16(__p0) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __ret; \ __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \ __ret; \ }) -#else -#define vsqrth_f16(__p0) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vsubh_f16(__p0, __p1) __extension__ ({ \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ @@ -1482,16 +589,6 @@ __ai float16_t vcvth_f16_s16(int16_t __p0) { __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \ __ret; \ }) -#else -#define vsubh_f16(__p0, __p1) __extension__ ({ \ - float16_t __s0 = __p0; \ - float16_t __s1 = __p1; \ - float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \ - __ret; \ -}) -#endif - #endif #undef __ai diff --git a/lib/include/arm_mve.h b/lib/include/arm_mve.h new file mode 100644 index 0000000000..df6cab4151 --- /dev/null +++ b/lib/include/arm_mve.h @@ -0,0 +1,12563 @@ +/*===---- arm_mve.h - ARM MVE intrinsics -----------------------------------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ARM_MVE_H +#define __ARM_MVE_H + +#if !__ARM_FEATURE_MVE +#error "MVE support not enabled" +#endif + +#include + +typedef uint16_t mve_pred16_t; +typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; +typedef struct { int16x8_t val[2]; } int16x8x2_t; +typedef struct { int16x8_t val[4]; } int16x8x4_t; +typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; +typedef struct { int32x4_t val[2]; } int32x4x2_t; +typedef struct { int32x4_t val[4]; } int32x4x4_t; +typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; +typedef struct { int64x2_t val[2]; } int64x2x2_t; +typedef struct { int64x2_t val[4]; } int64x2x4_t; +typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; +typedef struct { int8x16_t val[2]; } int8x16x2_t; +typedef struct { int8x16_t val[4]; } int8x16x4_t; +typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; +typedef struct { uint16x8_t val[2]; } uint16x8x2_t; +typedef struct { uint16x8_t val[4]; } uint16x8x4_t; +typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; +typedef struct { uint32x4_t val[2]; } uint32x4x2_t; +typedef struct { uint32x4_t val[4]; } uint32x4x4_t; +typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; +typedef struct { uint64x2_t val[2]; } uint64x2x2_t; +typedef struct { uint64x2_t val[4]; } uint64x2x4_t; +typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; +typedef struct { uint8x16_t val[2]; } uint8x16x2_t; +typedef struct { uint8x16_t val[4]; } uint8x16x4_t; + +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_asrl))) +int64_t __arm_asrl(int64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_lsll))) +uint64_t __arm_lsll(uint64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqrshr))) +int32_t __arm_sqrshr(int32_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqrshrl))) +int64_t __arm_sqrshrl(int64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqrshrl_sat48))) +int64_t __arm_sqrshrl_sat48(int64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqshl))) +int32_t __arm_sqshl(int32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqshll))) +int64_t __arm_sqshll(int64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_srshr))) +int32_t __arm_srshr(int32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_srshrl))) +int64_t __arm_srshrl(int64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqrshl))) +uint32_t __arm_uqrshl(uint32_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqrshll))) +uint64_t __arm_uqrshll(uint64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqrshll_sat48))) +uint64_t __arm_uqrshll_sat48(uint64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqshl))) +uint32_t __arm_uqshl(uint32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqshll))) +uint64_t __arm_uqshll(uint64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_urshr))) +uint32_t __arm_urshr(uint32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_urshrl))) +uint64_t __arm_urshrl(uint64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s16))) +uint32_t __arm_vabavq_p_s16(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s16))) +uint32_t __arm_vabavq_p(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s32))) +uint32_t __arm_vabavq_p_s32(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s32))) +uint32_t __arm_vabavq_p(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s8))) +uint32_t __arm_vabavq_p_s8(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s8))) +uint32_t __arm_vabavq_p(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u16))) +uint32_t __arm_vabavq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u16))) +uint32_t __arm_vabavq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u32))) +uint32_t __arm_vabavq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u32))) +uint32_t __arm_vabavq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u8))) +uint32_t __arm_vabavq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u8))) +uint32_t __arm_vabavq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_s16))) +uint32_t __arm_vabavq_s16(uint32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_s16))) +uint32_t __arm_vabavq(uint32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_s32))) +uint32_t __arm_vabavq_s32(uint32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_s32))) +uint32_t __arm_vabavq(uint32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_s8))) +uint32_t __arm_vabavq_s8(uint32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_s8))) +uint32_t __arm_vabavq(uint32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_u16))) +uint32_t __arm_vabavq_u16(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_u16))) +uint32_t __arm_vabavq(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_u32))) +uint32_t __arm_vabavq_u32(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_u32))) +uint32_t __arm_vabavq(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_u8))) +uint32_t __arm_vabavq_u8(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_u8))) +uint32_t __arm_vabavq(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s16))) +int16x8_t __arm_vabdq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s16))) +int16x8_t __arm_vabdq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s32))) +int32x4_t __arm_vabdq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s32))) +int32x4_t __arm_vabdq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s8))) +int8x16_t __arm_vabdq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s8))) +int8x16_t __arm_vabdq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u16))) +uint16x8_t __arm_vabdq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u16))) +uint16x8_t __arm_vabdq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u32))) +uint32x4_t __arm_vabdq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u32))) +uint32x4_t __arm_vabdq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u8))) +uint8x16_t __arm_vabdq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u8))) +uint8x16_t __arm_vabdq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_s16))) +int16x8_t __arm_vabdq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_s16))) +int16x8_t __arm_vabdq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_s32))) +int32x4_t __arm_vabdq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_s32))) +int32x4_t __arm_vabdq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_s8))) +int8x16_t __arm_vabdq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_s8))) +int8x16_t __arm_vabdq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_u16))) +uint16x8_t __arm_vabdq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_u16))) +uint16x8_t __arm_vabdq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_u32))) +uint32x4_t __arm_vabdq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_u32))) +uint32x4_t __arm_vabdq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_u8))) +uint8x16_t __arm_vabdq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_u8))) +uint8x16_t __arm_vabdq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s16))) +int16x8_t __arm_vabdq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s16))) +int16x8_t __arm_vabdq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s32))) +int32x4_t __arm_vabdq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s32))) +int32x4_t __arm_vabdq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s8))) +int8x16_t __arm_vabdq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s8))) +int8x16_t __arm_vabdq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u16))) +uint16x8_t __arm_vabdq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u16))) +uint16x8_t __arm_vabdq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u32))) +uint32x4_t __arm_vabdq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u32))) +uint32x4_t __arm_vabdq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u8))) +uint8x16_t __arm_vabdq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u8))) +uint8x16_t __arm_vabdq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_s32))) +int32x4_t __arm_vadciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_s32))) +int32x4_t __arm_vadciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_u32))) +uint32x4_t __arm_vadciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_u32))) +uint32x4_t __arm_vadciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_s32))) +int32x4_t __arm_vadciq_s32(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_s32))) +int32x4_t __arm_vadciq(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_u32))) +uint32x4_t __arm_vadciq_u32(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_u32))) +uint32x4_t __arm_vadciq(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_s32))) +int32x4_t __arm_vadcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_s32))) +int32x4_t __arm_vadcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_u32))) +uint32x4_t __arm_vadcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_u32))) +uint32x4_t __arm_vadcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_s32))) +int32x4_t __arm_vadcq_s32(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_s32))) +int32x4_t __arm_vadcq(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_u32))) +uint32x4_t __arm_vadcq_u32(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_u32))) +uint32x4_t __arm_vadcq(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s16))) +int16x8_t __arm_vaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s16))) +int16x8_t __arm_vaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s32))) +int32x4_t __arm_vaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s32))) +int32x4_t __arm_vaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s8))) +int8x16_t __arm_vaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s8))) +int8x16_t __arm_vaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u16))) +uint16x8_t __arm_vaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u16))) +uint16x8_t __arm_vaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u32))) +uint32x4_t __arm_vaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u32))) +uint32x4_t __arm_vaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u8))) +uint8x16_t __arm_vaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u8))) +uint8x16_t __arm_vaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_s16))) +int16x8_t __arm_vaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_s16))) +int16x8_t __arm_vaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_s32))) +int32x4_t __arm_vaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_s32))) +int32x4_t __arm_vaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_s8))) +int8x16_t __arm_vaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_s8))) +int8x16_t __arm_vaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_u16))) +uint16x8_t __arm_vaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_u16))) +uint16x8_t __arm_vaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_u32))) +uint32x4_t __arm_vaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_u32))) +uint32x4_t __arm_vaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_u8))) +uint8x16_t __arm_vaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_u8))) +uint8x16_t __arm_vaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s16))) +int16x8_t __arm_vaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s16))) +int16x8_t __arm_vaddq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s32))) +int32x4_t __arm_vaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s32))) +int32x4_t __arm_vaddq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s8))) +int8x16_t __arm_vaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s8))) +int8x16_t __arm_vaddq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u16))) +uint16x8_t __arm_vaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u16))) +uint16x8_t __arm_vaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u32))) +uint32x4_t __arm_vaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u32))) +uint32x4_t __arm_vaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u8))) +uint8x16_t __arm_vaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u8))) +uint8x16_t __arm_vaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s16))) +int16x8_t __arm_vandq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s16))) +int16x8_t __arm_vandq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s32))) +int32x4_t __arm_vandq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s32))) +int32x4_t __arm_vandq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s8))) +int8x16_t __arm_vandq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s8))) +int8x16_t __arm_vandq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u16))) +uint16x8_t __arm_vandq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u16))) +uint16x8_t __arm_vandq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u32))) +uint32x4_t __arm_vandq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u32))) +uint32x4_t __arm_vandq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u8))) +uint8x16_t __arm_vandq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u8))) +uint8x16_t __arm_vandq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_s16))) +int16x8_t __arm_vandq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_s16))) +int16x8_t __arm_vandq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_s32))) +int32x4_t __arm_vandq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_s32))) +int32x4_t __arm_vandq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_s8))) +int8x16_t __arm_vandq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_s8))) +int8x16_t __arm_vandq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_u16))) +uint16x8_t __arm_vandq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_u16))) +uint16x8_t __arm_vandq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_u32))) +uint32x4_t __arm_vandq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_u32))) +uint32x4_t __arm_vandq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_u8))) +uint8x16_t __arm_vandq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_u8))) +uint8x16_t __arm_vandq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s16))) +int16x8_t __arm_vandq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s16))) +int16x8_t __arm_vandq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s32))) +int32x4_t __arm_vandq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s32))) +int32x4_t __arm_vandq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s8))) +int8x16_t __arm_vandq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s8))) +int8x16_t __arm_vandq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u16))) +uint16x8_t __arm_vandq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u16))) +uint16x8_t __arm_vandq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u32))) +uint32x4_t __arm_vandq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u32))) +uint32x4_t __arm_vandq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u8))) +uint8x16_t __arm_vandq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u8))) +uint8x16_t __arm_vandq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s16))) +int16x8_t __arm_vbicq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s16))) +int16x8_t __arm_vbicq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s32))) +int32x4_t __arm_vbicq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s32))) +int32x4_t __arm_vbicq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s8))) +int8x16_t __arm_vbicq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s8))) +int8x16_t __arm_vbicq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u16))) +uint16x8_t __arm_vbicq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u16))) +uint16x8_t __arm_vbicq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u32))) +uint32x4_t __arm_vbicq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u32))) +uint32x4_t __arm_vbicq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u8))) +uint8x16_t __arm_vbicq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u8))) +uint8x16_t __arm_vbicq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_s16))) +int16x8_t __arm_vbicq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_s16))) +int16x8_t __arm_vbicq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_s32))) +int32x4_t __arm_vbicq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_s32))) +int32x4_t __arm_vbicq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_s8))) +int8x16_t __arm_vbicq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_s8))) +int8x16_t __arm_vbicq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_u16))) +uint16x8_t __arm_vbicq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_u16))) +uint16x8_t __arm_vbicq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_u32))) +uint32x4_t __arm_vbicq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_u32))) +uint32x4_t __arm_vbicq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_u8))) +uint8x16_t __arm_vbicq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_u8))) +uint8x16_t __arm_vbicq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s16))) +int16x8_t __arm_vbicq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s16))) +int16x8_t __arm_vbicq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s32))) +int32x4_t __arm_vbicq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s32))) +int32x4_t __arm_vbicq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s8))) +int8x16_t __arm_vbicq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s8))) +int8x16_t __arm_vbicq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u16))) +uint16x8_t __arm_vbicq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u16))) +uint16x8_t __arm_vbicq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u32))) +uint32x4_t __arm_vbicq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u32))) +uint32x4_t __arm_vbicq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u8))) +uint8x16_t __arm_vbicq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u8))) +uint8x16_t __arm_vbicq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) +int16x8_t __arm_vcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) +int16x8_t __arm_vcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) +int32x4_t __arm_vcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) +int32x4_t __arm_vcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) +int8x16_t __arm_vcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) +int8x16_t __arm_vcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) +uint16x8_t __arm_vcaddq_rot270_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) +uint16x8_t __arm_vcaddq_rot270_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) +uint32x4_t __arm_vcaddq_rot270_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) +uint32x4_t __arm_vcaddq_rot270_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) +uint8x16_t __arm_vcaddq_rot270_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) +uint8x16_t __arm_vcaddq_rot270_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s16))) +int16x8_t __arm_vcaddq_rot270_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s16))) +int16x8_t __arm_vcaddq_rot270(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s32))) +int32x4_t __arm_vcaddq_rot270_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s32))) +int32x4_t __arm_vcaddq_rot270(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s8))) +int8x16_t __arm_vcaddq_rot270_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s8))) +int8x16_t __arm_vcaddq_rot270(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u16))) +uint16x8_t __arm_vcaddq_rot270_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u16))) +uint16x8_t __arm_vcaddq_rot270(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u32))) +uint32x4_t __arm_vcaddq_rot270_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u32))) +uint32x4_t __arm_vcaddq_rot270(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u8))) +uint8x16_t __arm_vcaddq_rot270_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u8))) +uint8x16_t __arm_vcaddq_rot270(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) +int16x8_t __arm_vcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) +int16x8_t __arm_vcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) +int32x4_t __arm_vcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) +int32x4_t __arm_vcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) +int8x16_t __arm_vcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) +int8x16_t __arm_vcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) +uint16x8_t __arm_vcaddq_rot270_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) +uint16x8_t __arm_vcaddq_rot270_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) +uint32x4_t __arm_vcaddq_rot270_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) +uint32x4_t __arm_vcaddq_rot270_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) +uint8x16_t __arm_vcaddq_rot270_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) +uint8x16_t __arm_vcaddq_rot270_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) +int16x8_t __arm_vcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) +int16x8_t __arm_vcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) +int32x4_t __arm_vcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) +int32x4_t __arm_vcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) +int8x16_t __arm_vcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) +int8x16_t __arm_vcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) +uint16x8_t __arm_vcaddq_rot90_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) +uint16x8_t __arm_vcaddq_rot90_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) +uint32x4_t __arm_vcaddq_rot90_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) +uint32x4_t __arm_vcaddq_rot90_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) +uint8x16_t __arm_vcaddq_rot90_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) +uint8x16_t __arm_vcaddq_rot90_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s16))) +int16x8_t __arm_vcaddq_rot90_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s16))) +int16x8_t __arm_vcaddq_rot90(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s32))) +int32x4_t __arm_vcaddq_rot90_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s32))) +int32x4_t __arm_vcaddq_rot90(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s8))) +int8x16_t __arm_vcaddq_rot90_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s8))) +int8x16_t __arm_vcaddq_rot90(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u16))) +uint16x8_t __arm_vcaddq_rot90_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u16))) +uint16x8_t __arm_vcaddq_rot90(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u32))) +uint32x4_t __arm_vcaddq_rot90_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u32))) +uint32x4_t __arm_vcaddq_rot90(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u8))) +uint8x16_t __arm_vcaddq_rot90_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u8))) +uint8x16_t __arm_vcaddq_rot90(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) +int16x8_t __arm_vcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) +int16x8_t __arm_vcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) +int32x4_t __arm_vcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) +int32x4_t __arm_vcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) +int8x16_t __arm_vcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) +int8x16_t __arm_vcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) +uint16x8_t __arm_vcaddq_rot90_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) +uint16x8_t __arm_vcaddq_rot90_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) +uint32x4_t __arm_vcaddq_rot90_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) +uint32x4_t __arm_vcaddq_rot90_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) +uint8x16_t __arm_vcaddq_rot90_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) +uint8x16_t __arm_vcaddq_rot90_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) +mve_pred16_t __arm_vcmpcsq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) +mve_pred16_t __arm_vcmpcsq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) +mve_pred16_t __arm_vcmpcsq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) +mve_pred16_t __arm_vcmpcsq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) +mve_pred16_t __arm_vcmpcsq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) +mve_pred16_t __arm_vcmpcsq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u16))) +mve_pred16_t __arm_vcmpcsq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u16))) +mve_pred16_t __arm_vcmpcsq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u32))) +mve_pred16_t __arm_vcmpcsq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u32))) +mve_pred16_t __arm_vcmpcsq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u8))) +mve_pred16_t __arm_vcmpcsq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u8))) +mve_pred16_t __arm_vcmpcsq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u16))) +mve_pred16_t __arm_vcmpcsq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u16))) +mve_pred16_t __arm_vcmpcsq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u32))) +mve_pred16_t __arm_vcmpcsq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u32))) +mve_pred16_t __arm_vcmpcsq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u8))) +mve_pred16_t __arm_vcmpcsq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u8))) +mve_pred16_t __arm_vcmpcsq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u16))) +mve_pred16_t __arm_vcmpcsq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u16))) +mve_pred16_t __arm_vcmpcsq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u32))) +mve_pred16_t __arm_vcmpcsq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u32))) +mve_pred16_t __arm_vcmpcsq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u8))) +mve_pred16_t __arm_vcmpcsq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u8))) +mve_pred16_t __arm_vcmpcsq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) +mve_pred16_t __arm_vcmpeqq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) +mve_pred16_t __arm_vcmpeqq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) +mve_pred16_t __arm_vcmpeqq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) +mve_pred16_t __arm_vcmpeqq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) +mve_pred16_t __arm_vcmpeqq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) +mve_pred16_t __arm_vcmpeqq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) +mve_pred16_t __arm_vcmpeqq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) +mve_pred16_t __arm_vcmpeqq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) +mve_pred16_t __arm_vcmpeqq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) +mve_pred16_t __arm_vcmpeqq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) +mve_pred16_t __arm_vcmpeqq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) +mve_pred16_t __arm_vcmpeqq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s16))) +mve_pred16_t __arm_vcmpeqq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s16))) +mve_pred16_t __arm_vcmpeqq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s32))) +mve_pred16_t __arm_vcmpeqq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s32))) +mve_pred16_t __arm_vcmpeqq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s8))) +mve_pred16_t __arm_vcmpeqq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s8))) +mve_pred16_t __arm_vcmpeqq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u16))) +mve_pred16_t __arm_vcmpeqq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u16))) +mve_pred16_t __arm_vcmpeqq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u32))) +mve_pred16_t __arm_vcmpeqq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u32))) +mve_pred16_t __arm_vcmpeqq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u8))) +mve_pred16_t __arm_vcmpeqq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u8))) +mve_pred16_t __arm_vcmpeqq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s16))) +mve_pred16_t __arm_vcmpeqq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s16))) +mve_pred16_t __arm_vcmpeqq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s32))) +mve_pred16_t __arm_vcmpeqq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s32))) +mve_pred16_t __arm_vcmpeqq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s8))) +mve_pred16_t __arm_vcmpeqq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s8))) +mve_pred16_t __arm_vcmpeqq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u16))) +mve_pred16_t __arm_vcmpeqq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u16))) +mve_pred16_t __arm_vcmpeqq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u32))) +mve_pred16_t __arm_vcmpeqq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u32))) +mve_pred16_t __arm_vcmpeqq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u8))) +mve_pred16_t __arm_vcmpeqq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u8))) +mve_pred16_t __arm_vcmpeqq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s16))) +mve_pred16_t __arm_vcmpeqq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s16))) +mve_pred16_t __arm_vcmpeqq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s32))) +mve_pred16_t __arm_vcmpeqq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s32))) +mve_pred16_t __arm_vcmpeqq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s8))) +mve_pred16_t __arm_vcmpeqq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s8))) +mve_pred16_t __arm_vcmpeqq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u16))) +mve_pred16_t __arm_vcmpeqq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u16))) +mve_pred16_t __arm_vcmpeqq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u32))) +mve_pred16_t __arm_vcmpeqq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u32))) +mve_pred16_t __arm_vcmpeqq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u8))) +mve_pred16_t __arm_vcmpeqq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u8))) +mve_pred16_t __arm_vcmpeqq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) +mve_pred16_t __arm_vcmpgeq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) +mve_pred16_t __arm_vcmpgeq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) +mve_pred16_t __arm_vcmpgeq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) +mve_pred16_t __arm_vcmpgeq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) +mve_pred16_t __arm_vcmpgeq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) +mve_pred16_t __arm_vcmpgeq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s16))) +mve_pred16_t __arm_vcmpgeq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s16))) +mve_pred16_t __arm_vcmpgeq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s32))) +mve_pred16_t __arm_vcmpgeq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s32))) +mve_pred16_t __arm_vcmpgeq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s8))) +mve_pred16_t __arm_vcmpgeq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s8))) +mve_pred16_t __arm_vcmpgeq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s16))) +mve_pred16_t __arm_vcmpgeq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s16))) +mve_pred16_t __arm_vcmpgeq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s32))) +mve_pred16_t __arm_vcmpgeq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s32))) +mve_pred16_t __arm_vcmpgeq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s8))) +mve_pred16_t __arm_vcmpgeq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s8))) +mve_pred16_t __arm_vcmpgeq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s16))) +mve_pred16_t __arm_vcmpgeq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s16))) +mve_pred16_t __arm_vcmpgeq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s32))) +mve_pred16_t __arm_vcmpgeq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s32))) +mve_pred16_t __arm_vcmpgeq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s8))) +mve_pred16_t __arm_vcmpgeq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s8))) +mve_pred16_t __arm_vcmpgeq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) +mve_pred16_t __arm_vcmpgtq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) +mve_pred16_t __arm_vcmpgtq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) +mve_pred16_t __arm_vcmpgtq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) +mve_pred16_t __arm_vcmpgtq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) +mve_pred16_t __arm_vcmpgtq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) +mve_pred16_t __arm_vcmpgtq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s16))) +mve_pred16_t __arm_vcmpgtq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s16))) +mve_pred16_t __arm_vcmpgtq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s32))) +mve_pred16_t __arm_vcmpgtq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s32))) +mve_pred16_t __arm_vcmpgtq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s8))) +mve_pred16_t __arm_vcmpgtq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s8))) +mve_pred16_t __arm_vcmpgtq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s16))) +mve_pred16_t __arm_vcmpgtq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s16))) +mve_pred16_t __arm_vcmpgtq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s32))) +mve_pred16_t __arm_vcmpgtq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s32))) +mve_pred16_t __arm_vcmpgtq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s8))) +mve_pred16_t __arm_vcmpgtq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s8))) +mve_pred16_t __arm_vcmpgtq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s16))) +mve_pred16_t __arm_vcmpgtq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s16))) +mve_pred16_t __arm_vcmpgtq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s32))) +mve_pred16_t __arm_vcmpgtq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s32))) +mve_pred16_t __arm_vcmpgtq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s8))) +mve_pred16_t __arm_vcmpgtq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s8))) +mve_pred16_t __arm_vcmpgtq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) +mve_pred16_t __arm_vcmphiq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) +mve_pred16_t __arm_vcmphiq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) +mve_pred16_t __arm_vcmphiq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) +mve_pred16_t __arm_vcmphiq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) +mve_pred16_t __arm_vcmphiq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) +mve_pred16_t __arm_vcmphiq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u16))) +mve_pred16_t __arm_vcmphiq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u16))) +mve_pred16_t __arm_vcmphiq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u32))) +mve_pred16_t __arm_vcmphiq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u32))) +mve_pred16_t __arm_vcmphiq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u8))) +mve_pred16_t __arm_vcmphiq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u8))) +mve_pred16_t __arm_vcmphiq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u16))) +mve_pred16_t __arm_vcmphiq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u16))) +mve_pred16_t __arm_vcmphiq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u32))) +mve_pred16_t __arm_vcmphiq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u32))) +mve_pred16_t __arm_vcmphiq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u8))) +mve_pred16_t __arm_vcmphiq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u8))) +mve_pred16_t __arm_vcmphiq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u16))) +mve_pred16_t __arm_vcmphiq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u16))) +mve_pred16_t __arm_vcmphiq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u32))) +mve_pred16_t __arm_vcmphiq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u32))) +mve_pred16_t __arm_vcmphiq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u8))) +mve_pred16_t __arm_vcmphiq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u8))) +mve_pred16_t __arm_vcmphiq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) +mve_pred16_t __arm_vcmpleq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) +mve_pred16_t __arm_vcmpleq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) +mve_pred16_t __arm_vcmpleq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) +mve_pred16_t __arm_vcmpleq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) +mve_pred16_t __arm_vcmpleq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) +mve_pred16_t __arm_vcmpleq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s16))) +mve_pred16_t __arm_vcmpleq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s16))) +mve_pred16_t __arm_vcmpleq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s32))) +mve_pred16_t __arm_vcmpleq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s32))) +mve_pred16_t __arm_vcmpleq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s8))) +mve_pred16_t __arm_vcmpleq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s8))) +mve_pred16_t __arm_vcmpleq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s16))) +mve_pred16_t __arm_vcmpleq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s16))) +mve_pred16_t __arm_vcmpleq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s32))) +mve_pred16_t __arm_vcmpleq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s32))) +mve_pred16_t __arm_vcmpleq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s8))) +mve_pred16_t __arm_vcmpleq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s8))) +mve_pred16_t __arm_vcmpleq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s16))) +mve_pred16_t __arm_vcmpleq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s16))) +mve_pred16_t __arm_vcmpleq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s32))) +mve_pred16_t __arm_vcmpleq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s32))) +mve_pred16_t __arm_vcmpleq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s8))) +mve_pred16_t __arm_vcmpleq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s8))) +mve_pred16_t __arm_vcmpleq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) +mve_pred16_t __arm_vcmpltq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) +mve_pred16_t __arm_vcmpltq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) +mve_pred16_t __arm_vcmpltq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) +mve_pred16_t __arm_vcmpltq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) +mve_pred16_t __arm_vcmpltq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) +mve_pred16_t __arm_vcmpltq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s16))) +mve_pred16_t __arm_vcmpltq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s16))) +mve_pred16_t __arm_vcmpltq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s32))) +mve_pred16_t __arm_vcmpltq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s32))) +mve_pred16_t __arm_vcmpltq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s8))) +mve_pred16_t __arm_vcmpltq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s8))) +mve_pred16_t __arm_vcmpltq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s16))) +mve_pred16_t __arm_vcmpltq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s16))) +mve_pred16_t __arm_vcmpltq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s32))) +mve_pred16_t __arm_vcmpltq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s32))) +mve_pred16_t __arm_vcmpltq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s8))) +mve_pred16_t __arm_vcmpltq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s8))) +mve_pred16_t __arm_vcmpltq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s16))) +mve_pred16_t __arm_vcmpltq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s16))) +mve_pred16_t __arm_vcmpltq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s32))) +mve_pred16_t __arm_vcmpltq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s32))) +mve_pred16_t __arm_vcmpltq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s8))) +mve_pred16_t __arm_vcmpltq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s8))) +mve_pred16_t __arm_vcmpltq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) +mve_pred16_t __arm_vcmpneq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) +mve_pred16_t __arm_vcmpneq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) +mve_pred16_t __arm_vcmpneq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) +mve_pred16_t __arm_vcmpneq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) +mve_pred16_t __arm_vcmpneq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) +mve_pred16_t __arm_vcmpneq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) +mve_pred16_t __arm_vcmpneq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) +mve_pred16_t __arm_vcmpneq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) +mve_pred16_t __arm_vcmpneq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) +mve_pred16_t __arm_vcmpneq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) +mve_pred16_t __arm_vcmpneq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) +mve_pred16_t __arm_vcmpneq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s16))) +mve_pred16_t __arm_vcmpneq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s16))) +mve_pred16_t __arm_vcmpneq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s32))) +mve_pred16_t __arm_vcmpneq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s32))) +mve_pred16_t __arm_vcmpneq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s8))) +mve_pred16_t __arm_vcmpneq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s8))) +mve_pred16_t __arm_vcmpneq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u16))) +mve_pred16_t __arm_vcmpneq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u16))) +mve_pred16_t __arm_vcmpneq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u32))) +mve_pred16_t __arm_vcmpneq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u32))) +mve_pred16_t __arm_vcmpneq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u8))) +mve_pred16_t __arm_vcmpneq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u8))) +mve_pred16_t __arm_vcmpneq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s16))) +mve_pred16_t __arm_vcmpneq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s16))) +mve_pred16_t __arm_vcmpneq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s32))) +mve_pred16_t __arm_vcmpneq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s32))) +mve_pred16_t __arm_vcmpneq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s8))) +mve_pred16_t __arm_vcmpneq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s8))) +mve_pred16_t __arm_vcmpneq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u16))) +mve_pred16_t __arm_vcmpneq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u16))) +mve_pred16_t __arm_vcmpneq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u32))) +mve_pred16_t __arm_vcmpneq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u32))) +mve_pred16_t __arm_vcmpneq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u8))) +mve_pred16_t __arm_vcmpneq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u8))) +mve_pred16_t __arm_vcmpneq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s16))) +mve_pred16_t __arm_vcmpneq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s16))) +mve_pred16_t __arm_vcmpneq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s32))) +mve_pred16_t __arm_vcmpneq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s32))) +mve_pred16_t __arm_vcmpneq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s8))) +mve_pred16_t __arm_vcmpneq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s8))) +mve_pred16_t __arm_vcmpneq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u16))) +mve_pred16_t __arm_vcmpneq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u16))) +mve_pred16_t __arm_vcmpneq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u32))) +mve_pred16_t __arm_vcmpneq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u32))) +mve_pred16_t __arm_vcmpneq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u8))) +mve_pred16_t __arm_vcmpneq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u8))) +mve_pred16_t __arm_vcmpneq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s16))) +int16x8_t __arm_vcreateq_s16(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s32))) +int32x4_t __arm_vcreateq_s32(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s64))) +int64x2_t __arm_vcreateq_s64(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s8))) +int8x16_t __arm_vcreateq_s8(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u16))) +uint16x8_t __arm_vcreateq_u16(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u32))) +uint32x4_t __arm_vcreateq_u32(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u64))) +uint64x2_t __arm_vcreateq_u64(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u8))) +uint8x16_t __arm_vcreateq_u8(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp16q))) +mve_pred16_t __arm_vctp16q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp16q_m))) +mve_pred16_t __arm_vctp16q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp32q))) +mve_pred16_t __arm_vctp32q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp32q_m))) +mve_pred16_t __arm_vctp32q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp64q))) +mve_pred16_t __arm_vctp64q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp64q_m))) +mve_pred16_t __arm_vctp64q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp8q))) +mve_pred16_t __arm_vctp8q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp8q_m))) +mve_pred16_t __arm_vctp8q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s16))) +int16x8_t __arm_veorq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s16))) +int16x8_t __arm_veorq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s32))) +int32x4_t __arm_veorq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s32))) +int32x4_t __arm_veorq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s8))) +int8x16_t __arm_veorq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s8))) +int8x16_t __arm_veorq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u16))) +uint16x8_t __arm_veorq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u16))) +uint16x8_t __arm_veorq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u32))) +uint32x4_t __arm_veorq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u32))) +uint32x4_t __arm_veorq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u8))) +uint8x16_t __arm_veorq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u8))) +uint8x16_t __arm_veorq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_s16))) +int16x8_t __arm_veorq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_s16))) +int16x8_t __arm_veorq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_s32))) +int32x4_t __arm_veorq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_s32))) +int32x4_t __arm_veorq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_s8))) +int8x16_t __arm_veorq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_s8))) +int8x16_t __arm_veorq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_u16))) +uint16x8_t __arm_veorq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_u16))) +uint16x8_t __arm_veorq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_u32))) +uint32x4_t __arm_veorq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_u32))) +uint32x4_t __arm_veorq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_u8))) +uint8x16_t __arm_veorq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_u8))) +uint8x16_t __arm_veorq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s16))) +int16x8_t __arm_veorq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s16))) +int16x8_t __arm_veorq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s32))) +int32x4_t __arm_veorq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s32))) +int32x4_t __arm_veorq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s8))) +int8x16_t __arm_veorq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s8))) +int8x16_t __arm_veorq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u16))) +uint16x8_t __arm_veorq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u16))) +uint16x8_t __arm_veorq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u32))) +uint32x4_t __arm_veorq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u32))) +uint32x4_t __arm_veorq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u8))) +uint8x16_t __arm_veorq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u8))) +uint8x16_t __arm_veorq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s16))) +int16_t __arm_vgetq_lane_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s16))) +int16_t __arm_vgetq_lane(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s32))) +int32_t __arm_vgetq_lane_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s32))) +int32_t __arm_vgetq_lane(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s64))) +int64_t __arm_vgetq_lane_s64(int64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s64))) +int64_t __arm_vgetq_lane(int64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s8))) +int8_t __arm_vgetq_lane_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s8))) +int8_t __arm_vgetq_lane(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u16))) +uint16_t __arm_vgetq_lane_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u16))) +uint16_t __arm_vgetq_lane(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u32))) +uint32_t __arm_vgetq_lane_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u32))) +uint32_t __arm_vgetq_lane(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u64))) +uint64_t __arm_vgetq_lane_u64(uint64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u64))) +uint64_t __arm_vgetq_lane(uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u8))) +uint8_t __arm_vgetq_lane_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u8))) +uint8_t __arm_vgetq_lane(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s16))) +int16x8_t __arm_vhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s16))) +int16x8_t __arm_vhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s32))) +int32x4_t __arm_vhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s32))) +int32x4_t __arm_vhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s8))) +int8x16_t __arm_vhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s8))) +int8x16_t __arm_vhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u16))) +uint16x8_t __arm_vhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u16))) +uint16x8_t __arm_vhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u32))) +uint32x4_t __arm_vhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u32))) +uint32x4_t __arm_vhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u8))) +uint8x16_t __arm_vhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u8))) +uint8x16_t __arm_vhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s16))) +int16x8_t __arm_vhaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s16))) +int16x8_t __arm_vhaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s32))) +int32x4_t __arm_vhaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s32))) +int32x4_t __arm_vhaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s8))) +int8x16_t __arm_vhaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s8))) +int8x16_t __arm_vhaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u16))) +uint16x8_t __arm_vhaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u16))) +uint16x8_t __arm_vhaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u32))) +uint32x4_t __arm_vhaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u32))) +uint32x4_t __arm_vhaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u8))) +uint8x16_t __arm_vhaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u8))) +uint8x16_t __arm_vhaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s16))) +int16x8_t __arm_vhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s16))) +int16x8_t __arm_vhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s32))) +int32x4_t __arm_vhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s32))) +int32x4_t __arm_vhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s8))) +int8x16_t __arm_vhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s8))) +int8x16_t __arm_vhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u16))) +uint16x8_t __arm_vhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u16))) +uint16x8_t __arm_vhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u32))) +uint32x4_t __arm_vhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u32))) +uint32x4_t __arm_vhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u8))) +uint8x16_t __arm_vhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u8))) +uint8x16_t __arm_vhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) +int16x8_t __arm_vhcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) +int16x8_t __arm_vhcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) +int32x4_t __arm_vhcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) +int32x4_t __arm_vhcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) +int8x16_t __arm_vhcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) +int8x16_t __arm_vhcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) +int16x8_t __arm_vhcaddq_rot270_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) +int16x8_t __arm_vhcaddq_rot270(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) +int32x4_t __arm_vhcaddq_rot270_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) +int32x4_t __arm_vhcaddq_rot270(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) +int8x16_t __arm_vhcaddq_rot270_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) +int8x16_t __arm_vhcaddq_rot270(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) +int16x8_t __arm_vhcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) +int16x8_t __arm_vhcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) +int32x4_t __arm_vhcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) +int32x4_t __arm_vhcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) +int8x16_t __arm_vhcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) +int8x16_t __arm_vhcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) +int16x8_t __arm_vhcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) +int16x8_t __arm_vhcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) +int32x4_t __arm_vhcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) +int32x4_t __arm_vhcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) +int8x16_t __arm_vhcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) +int8x16_t __arm_vhcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) +int16x8_t __arm_vhcaddq_rot90_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) +int16x8_t __arm_vhcaddq_rot90(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) +int32x4_t __arm_vhcaddq_rot90_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) +int32x4_t __arm_vhcaddq_rot90(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) +int8x16_t __arm_vhcaddq_rot90_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) +int8x16_t __arm_vhcaddq_rot90(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) +int16x8_t __arm_vhcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) +int16x8_t __arm_vhcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) +int32x4_t __arm_vhcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) +int32x4_t __arm_vhcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) +int8x16_t __arm_vhcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) +int8x16_t __arm_vhcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s16))) +int16x8_t __arm_vhsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s16))) +int16x8_t __arm_vhsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s32))) +int32x4_t __arm_vhsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s32))) +int32x4_t __arm_vhsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s8))) +int8x16_t __arm_vhsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s8))) +int8x16_t __arm_vhsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u16))) +uint16x8_t __arm_vhsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u16))) +uint16x8_t __arm_vhsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u32))) +uint32x4_t __arm_vhsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u32))) +uint32x4_t __arm_vhsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u8))) +uint8x16_t __arm_vhsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u8))) +uint8x16_t __arm_vhsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s16))) +int16x8_t __arm_vhsubq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s16))) +int16x8_t __arm_vhsubq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s32))) +int32x4_t __arm_vhsubq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s32))) +int32x4_t __arm_vhsubq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s8))) +int8x16_t __arm_vhsubq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s8))) +int8x16_t __arm_vhsubq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u16))) +uint16x8_t __arm_vhsubq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u16))) +uint16x8_t __arm_vhsubq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u32))) +uint32x4_t __arm_vhsubq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u32))) +uint32x4_t __arm_vhsubq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u8))) +uint8x16_t __arm_vhsubq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u8))) +uint8x16_t __arm_vhsubq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s16))) +int16x8_t __arm_vhsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s16))) +int16x8_t __arm_vhsubq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s32))) +int32x4_t __arm_vhsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s32))) +int32x4_t __arm_vhsubq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s8))) +int8x16_t __arm_vhsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s8))) +int8x16_t __arm_vhsubq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u16))) +uint16x8_t __arm_vhsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u16))) +uint16x8_t __arm_vhsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u32))) +uint32x4_t __arm_vhsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u32))) +uint32x4_t __arm_vhsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u8))) +uint8x16_t __arm_vhsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u8))) +uint8x16_t __arm_vhsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_s16))) +int16x8_t __arm_vld1q_s16(const int16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_s16))) +int16x8_t __arm_vld1q(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_s32))) +int32x4_t __arm_vld1q_s32(const int32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_s32))) +int32x4_t __arm_vld1q(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_s8))) +int8x16_t __arm_vld1q_s8(const int8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_s8))) +int8x16_t __arm_vld1q(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_u16))) +uint16x8_t __arm_vld1q_u16(const uint16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_u16))) +uint16x8_t __arm_vld1q(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_u32))) +uint32x4_t __arm_vld1q_u32(const uint32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_u32))) +uint32x4_t __arm_vld1q(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_u8))) +uint8x16_t __arm_vld1q_u8(const uint8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_u8))) +uint8x16_t __arm_vld1q(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s16))) +int16x8_t __arm_vld1q_z_s16(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s16))) +int16x8_t __arm_vld1q_z(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s32))) +int32x4_t __arm_vld1q_z_s32(const int32_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s32))) +int32x4_t __arm_vld1q_z(const int32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s8))) +int8x16_t __arm_vld1q_z_s8(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s8))) +int8x16_t __arm_vld1q_z(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u16))) +uint16x8_t __arm_vld1q_z_u16(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u16))) +uint16x8_t __arm_vld1q_z(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u32))) +uint32x4_t __arm_vld1q_z_u32(const uint32_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u32))) +uint32x4_t __arm_vld1q_z(const uint32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u8))) +uint8x16_t __arm_vld1q_z_u8(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u8))) +uint8x16_t __arm_vld1q_z(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_s16))) +int16x8x2_t __arm_vld2q_s16(const int16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_s16))) +int16x8x2_t __arm_vld2q(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_s32))) +int32x4x2_t __arm_vld2q_s32(const int32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_s32))) +int32x4x2_t __arm_vld2q(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_s8))) +int8x16x2_t __arm_vld2q_s8(const int8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_s8))) +int8x16x2_t __arm_vld2q(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_u16))) +uint16x8x2_t __arm_vld2q_u16(const uint16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_u16))) +uint16x8x2_t __arm_vld2q(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_u32))) +uint32x4x2_t __arm_vld2q_u32(const uint32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_u32))) +uint32x4x2_t __arm_vld2q(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_u8))) +uint8x16x2_t __arm_vld2q_u8(const uint8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_u8))) +uint8x16x2_t __arm_vld2q(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_s16))) +int16x8x4_t __arm_vld4q_s16(const int16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_s16))) +int16x8x4_t __arm_vld4q(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_s32))) +int32x4x4_t __arm_vld4q_s32(const int32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_s32))) +int32x4x4_t __arm_vld4q(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_s8))) +int8x16x4_t __arm_vld4q_s8(const int8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_s8))) +int8x16x4_t __arm_vld4q(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_u16))) +uint16x8x4_t __arm_vld4q_u16(const uint16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_u16))) +uint16x8x4_t __arm_vld4q(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_u32))) +uint32x4x4_t __arm_vld4q_u32(const uint32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_u32))) +uint32x4x4_t __arm_vld4q(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_u8))) +uint8x16x4_t __arm_vld4q_u8(const uint8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_u8))) +uint8x16x4_t __arm_vld4q(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) +int16x8_t __arm_vldrbq_gather_offset_s16(const int8_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) +int16x8_t __arm_vldrbq_gather_offset(const int8_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) +int32x4_t __arm_vldrbq_gather_offset_s32(const int8_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) +int32x4_t __arm_vldrbq_gather_offset(const int8_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) +int8x16_t __arm_vldrbq_gather_offset_s8(const int8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) +int8x16_t __arm_vldrbq_gather_offset(const int8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) +uint16x8_t __arm_vldrbq_gather_offset_u16(const uint8_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) +uint16x8_t __arm_vldrbq_gather_offset(const uint8_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) +uint32x4_t __arm_vldrbq_gather_offset_u32(const uint8_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) +uint32x4_t __arm_vldrbq_gather_offset(const uint8_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) +uint8x16_t __arm_vldrbq_gather_offset_u8(const uint8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) +uint8x16_t __arm_vldrbq_gather_offset(const uint8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) +int16x8_t __arm_vldrbq_gather_offset_z_s16(const int8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) +int16x8_t __arm_vldrbq_gather_offset_z(const int8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) +int32x4_t __arm_vldrbq_gather_offset_z_s32(const int8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) +int32x4_t __arm_vldrbq_gather_offset_z(const int8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) +int8x16_t __arm_vldrbq_gather_offset_z_s8(const int8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) +int8x16_t __arm_vldrbq_gather_offset_z(const int8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) +uint16x8_t __arm_vldrbq_gather_offset_z_u16(const uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) +uint16x8_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) +uint32x4_t __arm_vldrbq_gather_offset_z_u32(const uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) +uint32x4_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) +uint8x16_t __arm_vldrbq_gather_offset_z_u8(const uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) +uint8x16_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_s16))) +int16x8_t __arm_vldrbq_s16(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_s32))) +int32x4_t __arm_vldrbq_s32(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_s8))) +int8x16_t __arm_vldrbq_s8(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_u16))) +uint16x8_t __arm_vldrbq_u16(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_u32))) +uint32x4_t __arm_vldrbq_u32(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_u8))) +uint8x16_t __arm_vldrbq_u8(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_s16))) +int16x8_t __arm_vldrbq_z_s16(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_s32))) +int32x4_t __arm_vldrbq_z_s32(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_s8))) +int8x16_t __arm_vldrbq_z_s8(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_u16))) +uint16x8_t __arm_vldrbq_z_u16(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_u32))) +uint32x4_t __arm_vldrbq_z_u32(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_u8))) +uint8x16_t __arm_vldrbq_z_u8(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_s64))) +int64x2_t __arm_vldrdq_gather_base_s64(uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_u64))) +uint64x2_t __arm_vldrdq_gather_base_u64(uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_s64))) +int64x2_t __arm_vldrdq_gather_base_wb_s64(uint64x2_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_u64))) +uint64x2_t __arm_vldrdq_gather_base_wb_u64(uint64x2_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_s64))) +int64x2_t __arm_vldrdq_gather_base_wb_z_s64(uint64x2_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_u64))) +uint64x2_t __arm_vldrdq_gather_base_wb_z_u64(uint64x2_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_z_s64))) +int64x2_t __arm_vldrdq_gather_base_z_s64(uint64x2_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_z_u64))) +uint64x2_t __arm_vldrdq_gather_base_z_u64(uint64x2_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) +int64x2_t __arm_vldrdq_gather_offset_s64(const int64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) +int64x2_t __arm_vldrdq_gather_offset(const int64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) +uint64x2_t __arm_vldrdq_gather_offset_u64(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) +uint64x2_t __arm_vldrdq_gather_offset(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) +int64x2_t __arm_vldrdq_gather_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) +int64x2_t __arm_vldrdq_gather_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) +uint64x2_t __arm_vldrdq_gather_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) +uint64x2_t __arm_vldrdq_gather_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) +int64x2_t __arm_vldrdq_gather_shifted_offset_s64(const int64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) +int64x2_t __arm_vldrdq_gather_shifted_offset(const int64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) +uint64x2_t __arm_vldrdq_gather_shifted_offset_u64(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) +uint64x2_t __arm_vldrdq_gather_shifted_offset(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) +int64x2_t __arm_vldrdq_gather_shifted_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) +int64x2_t __arm_vldrdq_gather_shifted_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) +uint64x2_t __arm_vldrdq_gather_shifted_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) +uint64x2_t __arm_vldrdq_gather_shifted_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) +int16x8_t __arm_vldrhq_gather_offset_s16(const int16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) +int16x8_t __arm_vldrhq_gather_offset(const int16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) +int32x4_t __arm_vldrhq_gather_offset_s32(const int16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) +int32x4_t __arm_vldrhq_gather_offset(const int16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) +uint16x8_t __arm_vldrhq_gather_offset_u16(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) +uint16x8_t __arm_vldrhq_gather_offset(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) +uint32x4_t __arm_vldrhq_gather_offset_u32(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) +uint32x4_t __arm_vldrhq_gather_offset(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) +int16x8_t __arm_vldrhq_gather_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) +int16x8_t __arm_vldrhq_gather_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) +int32x4_t __arm_vldrhq_gather_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) +int32x4_t __arm_vldrhq_gather_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) +uint16x8_t __arm_vldrhq_gather_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) +uint16x8_t __arm_vldrhq_gather_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) +uint32x4_t __arm_vldrhq_gather_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) +uint32x4_t __arm_vldrhq_gather_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) +int16x8_t __arm_vldrhq_gather_shifted_offset_s16(const int16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) +int16x8_t __arm_vldrhq_gather_shifted_offset(const int16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) +int32x4_t __arm_vldrhq_gather_shifted_offset_s32(const int16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) +int32x4_t __arm_vldrhq_gather_shifted_offset(const int16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) +uint16x8_t __arm_vldrhq_gather_shifted_offset_u16(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) +uint16x8_t __arm_vldrhq_gather_shifted_offset(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) +uint32x4_t __arm_vldrhq_gather_shifted_offset_u32(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) +uint32x4_t __arm_vldrhq_gather_shifted_offset(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) +int16x8_t __arm_vldrhq_gather_shifted_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) +int16x8_t __arm_vldrhq_gather_shifted_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) +int32x4_t __arm_vldrhq_gather_shifted_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) +int32x4_t __arm_vldrhq_gather_shifted_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) +uint16x8_t __arm_vldrhq_gather_shifted_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) +uint16x8_t __arm_vldrhq_gather_shifted_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) +uint32x4_t __arm_vldrhq_gather_shifted_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) +uint32x4_t __arm_vldrhq_gather_shifted_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_s16))) +int16x8_t __arm_vldrhq_s16(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_s32))) +int32x4_t __arm_vldrhq_s32(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_u16))) +uint16x8_t __arm_vldrhq_u16(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_u32))) +uint32x4_t __arm_vldrhq_u32(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_s16))) +int16x8_t __arm_vldrhq_z_s16(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_s32))) +int32x4_t __arm_vldrhq_z_s32(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_u16))) +uint16x8_t __arm_vldrhq_z_u16(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_u32))) +uint32x4_t __arm_vldrhq_z_u32(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_s32))) +int32x4_t __arm_vldrwq_gather_base_s32(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_u32))) +uint32x4_t __arm_vldrwq_gather_base_u32(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_s32))) +int32x4_t __arm_vldrwq_gather_base_wb_s32(uint32x4_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_u32))) +uint32x4_t __arm_vldrwq_gather_base_wb_u32(uint32x4_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_s32))) +int32x4_t __arm_vldrwq_gather_base_wb_z_s32(uint32x4_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_u32))) +uint32x4_t __arm_vldrwq_gather_base_wb_z_u32(uint32x4_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_z_s32))) +int32x4_t __arm_vldrwq_gather_base_z_s32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_z_u32))) +uint32x4_t __arm_vldrwq_gather_base_z_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) +int32x4_t __arm_vldrwq_gather_offset_s32(const int32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) +int32x4_t __arm_vldrwq_gather_offset(const int32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) +uint32x4_t __arm_vldrwq_gather_offset_u32(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) +uint32x4_t __arm_vldrwq_gather_offset(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) +int32x4_t __arm_vldrwq_gather_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) +int32x4_t __arm_vldrwq_gather_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) +uint32x4_t __arm_vldrwq_gather_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) +uint32x4_t __arm_vldrwq_gather_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) +int32x4_t __arm_vldrwq_gather_shifted_offset_s32(const int32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) +int32x4_t __arm_vldrwq_gather_shifted_offset(const int32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) +uint32x4_t __arm_vldrwq_gather_shifted_offset_u32(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) +uint32x4_t __arm_vldrwq_gather_shifted_offset(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) +int32x4_t __arm_vldrwq_gather_shifted_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) +int32x4_t __arm_vldrwq_gather_shifted_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) +uint32x4_t __arm_vldrwq_gather_shifted_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) +uint32x4_t __arm_vldrwq_gather_shifted_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_s32))) +int32x4_t __arm_vldrwq_s32(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_u32))) +uint32x4_t __arm_vldrwq_u32(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_z_s32))) +int32x4_t __arm_vldrwq_z_s32(const int32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_z_u32))) +uint32x4_t __arm_vldrwq_z_u32(const uint32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s16))) +int16x8_t __arm_vmaxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s16))) +int16x8_t __arm_vmaxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s32))) +int32x4_t __arm_vmaxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s32))) +int32x4_t __arm_vmaxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s8))) +int8x16_t __arm_vmaxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s8))) +int8x16_t __arm_vmaxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u16))) +uint16x8_t __arm_vmaxq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u16))) +uint16x8_t __arm_vmaxq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u32))) +uint32x4_t __arm_vmaxq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u32))) +uint32x4_t __arm_vmaxq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u8))) +uint8x16_t __arm_vmaxq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u8))) +uint8x16_t __arm_vmaxq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s16))) +int16x8_t __arm_vmaxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s16))) +int16x8_t __arm_vmaxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s32))) +int32x4_t __arm_vmaxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s32))) +int32x4_t __arm_vmaxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s8))) +int8x16_t __arm_vmaxq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s8))) +int8x16_t __arm_vmaxq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u16))) +uint16x8_t __arm_vmaxq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u16))) +uint16x8_t __arm_vmaxq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u32))) +uint32x4_t __arm_vmaxq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u32))) +uint32x4_t __arm_vmaxq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u8))) +uint8x16_t __arm_vmaxq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u8))) +uint8x16_t __arm_vmaxq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s16))) +int16x8_t __arm_vmaxq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s16))) +int16x8_t __arm_vmaxq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s32))) +int32x4_t __arm_vmaxq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s32))) +int32x4_t __arm_vmaxq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s8))) +int8x16_t __arm_vmaxq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s8))) +int8x16_t __arm_vmaxq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u16))) +uint16x8_t __arm_vmaxq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u16))) +uint16x8_t __arm_vmaxq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u32))) +uint32x4_t __arm_vmaxq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u32))) +uint32x4_t __arm_vmaxq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u8))) +uint8x16_t __arm_vmaxq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u8))) +uint8x16_t __arm_vmaxq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s16))) +int16_t __arm_vmaxvq_s16(int16_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s16))) +int16_t __arm_vmaxvq(int16_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s32))) +int32_t __arm_vmaxvq_s32(int32_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s32))) +int32_t __arm_vmaxvq(int32_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s8))) +int8_t __arm_vmaxvq_s8(int8_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s8))) +int8_t __arm_vmaxvq(int8_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u16))) +uint16_t __arm_vmaxvq_u16(uint16_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u16))) +uint16_t __arm_vmaxvq(uint16_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u32))) +uint32_t __arm_vmaxvq_u32(uint32_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u32))) +uint32_t __arm_vmaxvq(uint32_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u8))) +uint8_t __arm_vmaxvq_u8(uint8_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u8))) +uint8_t __arm_vmaxvq(uint8_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s16))) +int16x8_t __arm_vminq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s16))) +int16x8_t __arm_vminq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s32))) +int32x4_t __arm_vminq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s32))) +int32x4_t __arm_vminq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s8))) +int8x16_t __arm_vminq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s8))) +int8x16_t __arm_vminq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u16))) +uint16x8_t __arm_vminq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u16))) +uint16x8_t __arm_vminq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u32))) +uint32x4_t __arm_vminq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u32))) +uint32x4_t __arm_vminq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u8))) +uint8x16_t __arm_vminq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u8))) +uint8x16_t __arm_vminq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_s16))) +int16x8_t __arm_vminq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_s16))) +int16x8_t __arm_vminq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_s32))) +int32x4_t __arm_vminq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_s32))) +int32x4_t __arm_vminq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_s8))) +int8x16_t __arm_vminq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_s8))) +int8x16_t __arm_vminq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_u16))) +uint16x8_t __arm_vminq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_u16))) +uint16x8_t __arm_vminq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_u32))) +uint32x4_t __arm_vminq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_u32))) +uint32x4_t __arm_vminq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_u8))) +uint8x16_t __arm_vminq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_u8))) +uint8x16_t __arm_vminq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s16))) +int16x8_t __arm_vminq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s16))) +int16x8_t __arm_vminq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s32))) +int32x4_t __arm_vminq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s32))) +int32x4_t __arm_vminq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s8))) +int8x16_t __arm_vminq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s8))) +int8x16_t __arm_vminq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u16))) +uint16x8_t __arm_vminq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u16))) +uint16x8_t __arm_vminq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u32))) +uint32x4_t __arm_vminq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u32))) +uint32x4_t __arm_vminq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u8))) +uint8x16_t __arm_vminq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u8))) +uint8x16_t __arm_vminq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_s16))) +int16_t __arm_vminvq_s16(int16_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_s16))) +int16_t __arm_vminvq(int16_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_s32))) +int32_t __arm_vminvq_s32(int32_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_s32))) +int32_t __arm_vminvq(int32_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_s8))) +int8_t __arm_vminvq_s8(int8_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_s8))) +int8_t __arm_vminvq(int8_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_u16))) +uint16_t __arm_vminvq_u16(uint16_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_u16))) +uint16_t __arm_vminvq(uint16_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_u32))) +uint32_t __arm_vminvq_u32(uint32_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_u32))) +uint32_t __arm_vminvq(uint32_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_u8))) +uint8_t __arm_vminvq_u8(uint8_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_u8))) +uint8_t __arm_vminvq(uint8_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s16))) +int32_t __arm_vmladavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s16))) +int32_t __arm_vmladavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s32))) +int32_t __arm_vmladavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s32))) +int32_t __arm_vmladavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s8))) +int32_t __arm_vmladavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s8))) +int32_t __arm_vmladavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u16))) +uint32_t __arm_vmladavaq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u16))) +uint32_t __arm_vmladavaq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u32))) +uint32_t __arm_vmladavaq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u32))) +uint32_t __arm_vmladavaq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u8))) +uint32_t __arm_vmladavaq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u8))) +uint32_t __arm_vmladavaq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s16))) +int32_t __arm_vmladavaq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s16))) +int32_t __arm_vmladavaq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s32))) +int32_t __arm_vmladavaq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s32))) +int32_t __arm_vmladavaq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s8))) +int32_t __arm_vmladavaq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s8))) +int32_t __arm_vmladavaq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u16))) +uint32_t __arm_vmladavaq_u16(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u16))) +uint32_t __arm_vmladavaq(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u32))) +uint32_t __arm_vmladavaq_u32(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u32))) +uint32_t __arm_vmladavaq(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u8))) +uint32_t __arm_vmladavaq_u8(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u8))) +uint32_t __arm_vmladavaq(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s16))) +int32_t __arm_vmladavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s16))) +int32_t __arm_vmladavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s32))) +int32_t __arm_vmladavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s32))) +int32_t __arm_vmladavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s8))) +int32_t __arm_vmladavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s8))) +int32_t __arm_vmladavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s16))) +int32_t __arm_vmladavaxq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s16))) +int32_t __arm_vmladavaxq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s32))) +int32_t __arm_vmladavaxq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s32))) +int32_t __arm_vmladavaxq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s8))) +int32_t __arm_vmladavaxq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s8))) +int32_t __arm_vmladavaxq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s16))) +int32_t __arm_vmladavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s16))) +int32_t __arm_vmladavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s32))) +int32_t __arm_vmladavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s32))) +int32_t __arm_vmladavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s8))) +int32_t __arm_vmladavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s8))) +int32_t __arm_vmladavq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u16))) +uint32_t __arm_vmladavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u16))) +uint32_t __arm_vmladavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u32))) +uint32_t __arm_vmladavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u32))) +uint32_t __arm_vmladavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u8))) +uint32_t __arm_vmladavq_p_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u8))) +uint32_t __arm_vmladavq_p(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s16))) +int32_t __arm_vmladavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s16))) +int32_t __arm_vmladavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s32))) +int32_t __arm_vmladavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s32))) +int32_t __arm_vmladavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s8))) +int32_t __arm_vmladavq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s8))) +int32_t __arm_vmladavq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u16))) +uint32_t __arm_vmladavq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u16))) +uint32_t __arm_vmladavq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u32))) +uint32_t __arm_vmladavq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u32))) +uint32_t __arm_vmladavq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u8))) +uint32_t __arm_vmladavq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u8))) +uint32_t __arm_vmladavq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s16))) +int32_t __arm_vmladavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s16))) +int32_t __arm_vmladavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s32))) +int32_t __arm_vmladavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s32))) +int32_t __arm_vmladavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s8))) +int32_t __arm_vmladavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s8))) +int32_t __arm_vmladavxq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s16))) +int32_t __arm_vmladavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s16))) +int32_t __arm_vmladavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s32))) +int32_t __arm_vmladavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s32))) +int32_t __arm_vmladavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s8))) +int32_t __arm_vmladavxq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s8))) +int32_t __arm_vmladavxq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) +int64_t __arm_vmlaldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) +int64_t __arm_vmlaldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) +int64_t __arm_vmlaldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) +int64_t __arm_vmlaldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) +uint64_t __arm_vmlaldavaq_p_u16(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) +uint64_t __arm_vmlaldavaq_p(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) +uint64_t __arm_vmlaldavaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) +uint64_t __arm_vmlaldavaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s16))) +int64_t __arm_vmlaldavaq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s16))) +int64_t __arm_vmlaldavaq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s32))) +int64_t __arm_vmlaldavaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s32))) +int64_t __arm_vmlaldavaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u16))) +uint64_t __arm_vmlaldavaq_u16(uint64_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u16))) +uint64_t __arm_vmlaldavaq(uint64_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u32))) +uint64_t __arm_vmlaldavaq_u32(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u32))) +uint64_t __arm_vmlaldavaq(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) +int64_t __arm_vmlaldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) +int64_t __arm_vmlaldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) +int64_t __arm_vmlaldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) +int64_t __arm_vmlaldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s16))) +int64_t __arm_vmlaldavaxq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s16))) +int64_t __arm_vmlaldavaxq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s32))) +int64_t __arm_vmlaldavaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s32))) +int64_t __arm_vmlaldavaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s16))) +int64_t __arm_vmlaldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s16))) +int64_t __arm_vmlaldavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s32))) +int64_t __arm_vmlaldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s32))) +int64_t __arm_vmlaldavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u16))) +uint64_t __arm_vmlaldavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u16))) +uint64_t __arm_vmlaldavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u32))) +uint64_t __arm_vmlaldavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u32))) +uint64_t __arm_vmlaldavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s16))) +int64_t __arm_vmlaldavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s16))) +int64_t __arm_vmlaldavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s32))) +int64_t __arm_vmlaldavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s32))) +int64_t __arm_vmlaldavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u16))) +uint64_t __arm_vmlaldavq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u16))) +uint64_t __arm_vmlaldavq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u32))) +uint64_t __arm_vmlaldavq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u32))) +uint64_t __arm_vmlaldavq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) +int64_t __arm_vmlaldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) +int64_t __arm_vmlaldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) +int64_t __arm_vmlaldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) +int64_t __arm_vmlaldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s16))) +int64_t __arm_vmlaldavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s16))) +int64_t __arm_vmlaldavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s32))) +int64_t __arm_vmlaldavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s32))) +int64_t __arm_vmlaldavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) +int32_t __arm_vmlsdavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) +int32_t __arm_vmlsdavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) +int32_t __arm_vmlsdavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) +int32_t __arm_vmlsdavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) +int32_t __arm_vmlsdavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) +int32_t __arm_vmlsdavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s16))) +int32_t __arm_vmlsdavaq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s16))) +int32_t __arm_vmlsdavaq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s32))) +int32_t __arm_vmlsdavaq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s32))) +int32_t __arm_vmlsdavaq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s8))) +int32_t __arm_vmlsdavaq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s8))) +int32_t __arm_vmlsdavaq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) +int32_t __arm_vmlsdavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) +int32_t __arm_vmlsdavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) +int32_t __arm_vmlsdavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) +int32_t __arm_vmlsdavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) +int32_t __arm_vmlsdavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) +int32_t __arm_vmlsdavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s16))) +int32_t __arm_vmlsdavaxq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s16))) +int32_t __arm_vmlsdavaxq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s32))) +int32_t __arm_vmlsdavaxq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s32))) +int32_t __arm_vmlsdavaxq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s8))) +int32_t __arm_vmlsdavaxq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s8))) +int32_t __arm_vmlsdavaxq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s16))) +int32_t __arm_vmlsdavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s16))) +int32_t __arm_vmlsdavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s32))) +int32_t __arm_vmlsdavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s32))) +int32_t __arm_vmlsdavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s8))) +int32_t __arm_vmlsdavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s8))) +int32_t __arm_vmlsdavq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s16))) +int32_t __arm_vmlsdavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s16))) +int32_t __arm_vmlsdavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s32))) +int32_t __arm_vmlsdavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s32))) +int32_t __arm_vmlsdavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s8))) +int32_t __arm_vmlsdavq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s8))) +int32_t __arm_vmlsdavq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) +int32_t __arm_vmlsdavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) +int32_t __arm_vmlsdavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) +int32_t __arm_vmlsdavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) +int32_t __arm_vmlsdavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) +int32_t __arm_vmlsdavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) +int32_t __arm_vmlsdavxq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s16))) +int32_t __arm_vmlsdavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s16))) +int32_t __arm_vmlsdavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s32))) +int32_t __arm_vmlsdavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s32))) +int32_t __arm_vmlsdavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s8))) +int32_t __arm_vmlsdavxq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s8))) +int32_t __arm_vmlsdavxq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) +int64_t __arm_vmlsldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) +int64_t __arm_vmlsldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) +int64_t __arm_vmlsldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) +int64_t __arm_vmlsldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s16))) +int64_t __arm_vmlsldavaq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s16))) +int64_t __arm_vmlsldavaq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s32))) +int64_t __arm_vmlsldavaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s32))) +int64_t __arm_vmlsldavaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) +int64_t __arm_vmlsldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) +int64_t __arm_vmlsldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) +int64_t __arm_vmlsldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) +int64_t __arm_vmlsldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s16))) +int64_t __arm_vmlsldavaxq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s16))) +int64_t __arm_vmlsldavaxq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s32))) +int64_t __arm_vmlsldavaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s32))) +int64_t __arm_vmlsldavaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s16))) +int64_t __arm_vmlsldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s16))) +int64_t __arm_vmlsldavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s32))) +int64_t __arm_vmlsldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s32))) +int64_t __arm_vmlsldavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s16))) +int64_t __arm_vmlsldavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s16))) +int64_t __arm_vmlsldavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s32))) +int64_t __arm_vmlsldavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s32))) +int64_t __arm_vmlsldavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) +int64_t __arm_vmlsldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) +int64_t __arm_vmlsldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) +int64_t __arm_vmlsldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) +int64_t __arm_vmlsldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s16))) +int64_t __arm_vmlsldavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s16))) +int64_t __arm_vmlsldavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s32))) +int64_t __arm_vmlsldavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s32))) +int64_t __arm_vmlsldavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s16))) +int16x8_t __arm_vmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s16))) +int16x8_t __arm_vmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s32))) +int32x4_t __arm_vmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s32))) +int32x4_t __arm_vmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s8))) +int8x16_t __arm_vmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s8))) +int8x16_t __arm_vmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u16))) +uint16x8_t __arm_vmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u16))) +uint16x8_t __arm_vmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u32))) +uint32x4_t __arm_vmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u32))) +uint32x4_t __arm_vmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u8))) +uint8x16_t __arm_vmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u8))) +uint8x16_t __arm_vmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s16))) +int16x8_t __arm_vmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s16))) +int16x8_t __arm_vmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s32))) +int32x4_t __arm_vmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s32))) +int32x4_t __arm_vmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s8))) +int8x16_t __arm_vmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s8))) +int8x16_t __arm_vmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u16))) +uint16x8_t __arm_vmulhq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u16))) +uint16x8_t __arm_vmulhq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u32))) +uint32x4_t __arm_vmulhq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u32))) +uint32x4_t __arm_vmulhq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u8))) +uint8x16_t __arm_vmulhq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u8))) +uint8x16_t __arm_vmulhq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s16))) +int16x8_t __arm_vmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s16))) +int16x8_t __arm_vmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s32))) +int32x4_t __arm_vmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s32))) +int32x4_t __arm_vmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s8))) +int8x16_t __arm_vmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s8))) +int8x16_t __arm_vmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u16))) +uint16x8_t __arm_vmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u16))) +uint16x8_t __arm_vmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u32))) +uint32x4_t __arm_vmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u32))) +uint32x4_t __arm_vmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u8))) +uint8x16_t __arm_vmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u8))) +uint8x16_t __arm_vmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s16))) +int32x4_t __arm_vmullbq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s16))) +int32x4_t __arm_vmullbq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s32))) +int64x2_t __arm_vmullbq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s32))) +int64x2_t __arm_vmullbq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s8))) +int16x8_t __arm_vmullbq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s8))) +int16x8_t __arm_vmullbq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u16))) +uint32x4_t __arm_vmullbq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u16))) +uint32x4_t __arm_vmullbq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u32))) +uint64x2_t __arm_vmullbq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u32))) +uint64x2_t __arm_vmullbq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u8))) +uint16x8_t __arm_vmullbq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u8))) +uint16x8_t __arm_vmullbq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s16))) +int32x4_t __arm_vmullbq_int_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s16))) +int32x4_t __arm_vmullbq_int(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s32))) +int64x2_t __arm_vmullbq_int_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s32))) +int64x2_t __arm_vmullbq_int(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s8))) +int16x8_t __arm_vmullbq_int_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s8))) +int16x8_t __arm_vmullbq_int(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u16))) +uint32x4_t __arm_vmullbq_int_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u16))) +uint32x4_t __arm_vmullbq_int(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u32))) +uint64x2_t __arm_vmullbq_int_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u32))) +uint64x2_t __arm_vmullbq_int(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u8))) +uint16x8_t __arm_vmullbq_int_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u8))) +uint16x8_t __arm_vmullbq_int(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s16))) +int32x4_t __arm_vmullbq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s16))) +int32x4_t __arm_vmullbq_int_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s32))) +int64x2_t __arm_vmullbq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s32))) +int64x2_t __arm_vmullbq_int_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s8))) +int16x8_t __arm_vmullbq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s8))) +int16x8_t __arm_vmullbq_int_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u16))) +uint32x4_t __arm_vmullbq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u16))) +uint32x4_t __arm_vmullbq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u32))) +uint64x2_t __arm_vmullbq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u32))) +uint64x2_t __arm_vmullbq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u8))) +uint16x8_t __arm_vmullbq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u8))) +uint16x8_t __arm_vmullbq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) +uint32x4_t __arm_vmullbq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) +uint32x4_t __arm_vmullbq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) +uint16x8_t __arm_vmullbq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) +uint16x8_t __arm_vmullbq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p16))) +uint32x4_t __arm_vmullbq_poly_p16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p16))) +uint32x4_t __arm_vmullbq_poly(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p8))) +uint16x8_t __arm_vmullbq_poly_p8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p8))) +uint16x8_t __arm_vmullbq_poly(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) +uint32x4_t __arm_vmullbq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) +uint32x4_t __arm_vmullbq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) +uint16x8_t __arm_vmullbq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) +uint16x8_t __arm_vmullbq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s16))) +int32x4_t __arm_vmulltq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s16))) +int32x4_t __arm_vmulltq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s32))) +int64x2_t __arm_vmulltq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s32))) +int64x2_t __arm_vmulltq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s8))) +int16x8_t __arm_vmulltq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s8))) +int16x8_t __arm_vmulltq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u16))) +uint32x4_t __arm_vmulltq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u16))) +uint32x4_t __arm_vmulltq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u32))) +uint64x2_t __arm_vmulltq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u32))) +uint64x2_t __arm_vmulltq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u8))) +uint16x8_t __arm_vmulltq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u8))) +uint16x8_t __arm_vmulltq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s16))) +int32x4_t __arm_vmulltq_int_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s16))) +int32x4_t __arm_vmulltq_int(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s32))) +int64x2_t __arm_vmulltq_int_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s32))) +int64x2_t __arm_vmulltq_int(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s8))) +int16x8_t __arm_vmulltq_int_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s8))) +int16x8_t __arm_vmulltq_int(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u16))) +uint32x4_t __arm_vmulltq_int_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u16))) +uint32x4_t __arm_vmulltq_int(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u32))) +uint64x2_t __arm_vmulltq_int_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u32))) +uint64x2_t __arm_vmulltq_int(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u8))) +uint16x8_t __arm_vmulltq_int_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u8))) +uint16x8_t __arm_vmulltq_int(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s16))) +int32x4_t __arm_vmulltq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s16))) +int32x4_t __arm_vmulltq_int_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s32))) +int64x2_t __arm_vmulltq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s32))) +int64x2_t __arm_vmulltq_int_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s8))) +int16x8_t __arm_vmulltq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s8))) +int16x8_t __arm_vmulltq_int_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u16))) +uint32x4_t __arm_vmulltq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u16))) +uint32x4_t __arm_vmulltq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u32))) +uint64x2_t __arm_vmulltq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u32))) +uint64x2_t __arm_vmulltq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u8))) +uint16x8_t __arm_vmulltq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u8))) +uint16x8_t __arm_vmulltq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) +uint32x4_t __arm_vmulltq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) +uint32x4_t __arm_vmulltq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) +uint16x8_t __arm_vmulltq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) +uint16x8_t __arm_vmulltq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p16))) +uint32x4_t __arm_vmulltq_poly_p16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p16))) +uint32x4_t __arm_vmulltq_poly(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p8))) +uint16x8_t __arm_vmulltq_poly_p8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p8))) +uint16x8_t __arm_vmulltq_poly(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) +uint32x4_t __arm_vmulltq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) +uint32x4_t __arm_vmulltq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) +uint16x8_t __arm_vmulltq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) +uint16x8_t __arm_vmulltq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s16))) +int16x8_t __arm_vmulq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s16))) +int16x8_t __arm_vmulq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s32))) +int32x4_t __arm_vmulq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s32))) +int32x4_t __arm_vmulq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s8))) +int8x16_t __arm_vmulq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s8))) +int8x16_t __arm_vmulq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u16))) +uint16x8_t __arm_vmulq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u16))) +uint16x8_t __arm_vmulq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u32))) +uint32x4_t __arm_vmulq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u32))) +uint32x4_t __arm_vmulq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u8))) +uint8x16_t __arm_vmulq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u8))) +uint8x16_t __arm_vmulq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_s16))) +int16x8_t __arm_vmulq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_s16))) +int16x8_t __arm_vmulq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_s32))) +int32x4_t __arm_vmulq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_s32))) +int32x4_t __arm_vmulq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_s8))) +int8x16_t __arm_vmulq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_s8))) +int8x16_t __arm_vmulq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_u16))) +uint16x8_t __arm_vmulq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_u16))) +uint16x8_t __arm_vmulq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_u32))) +uint32x4_t __arm_vmulq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_u32))) +uint32x4_t __arm_vmulq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_u8))) +uint8x16_t __arm_vmulq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_u8))) +uint8x16_t __arm_vmulq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s16))) +int16x8_t __arm_vmulq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s16))) +int16x8_t __arm_vmulq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s32))) +int32x4_t __arm_vmulq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s32))) +int32x4_t __arm_vmulq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s8))) +int8x16_t __arm_vmulq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s8))) +int8x16_t __arm_vmulq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u16))) +uint16x8_t __arm_vmulq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u16))) +uint16x8_t __arm_vmulq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u32))) +uint32x4_t __arm_vmulq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u32))) +uint32x4_t __arm_vmulq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u8))) +uint8x16_t __arm_vmulq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u8))) +uint8x16_t __arm_vmulq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s16))) +int16x8_t __arm_vornq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s16))) +int16x8_t __arm_vornq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s32))) +int32x4_t __arm_vornq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s32))) +int32x4_t __arm_vornq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s8))) +int8x16_t __arm_vornq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s8))) +int8x16_t __arm_vornq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u16))) +uint16x8_t __arm_vornq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u16))) +uint16x8_t __arm_vornq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u32))) +uint32x4_t __arm_vornq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u32))) +uint32x4_t __arm_vornq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u8))) +uint8x16_t __arm_vornq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u8))) +uint8x16_t __arm_vornq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_s16))) +int16x8_t __arm_vornq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_s16))) +int16x8_t __arm_vornq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_s32))) +int32x4_t __arm_vornq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_s32))) +int32x4_t __arm_vornq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_s8))) +int8x16_t __arm_vornq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_s8))) +int8x16_t __arm_vornq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_u16))) +uint16x8_t __arm_vornq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_u16))) +uint16x8_t __arm_vornq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_u32))) +uint32x4_t __arm_vornq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_u32))) +uint32x4_t __arm_vornq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_u8))) +uint8x16_t __arm_vornq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_u8))) +uint8x16_t __arm_vornq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s16))) +int16x8_t __arm_vornq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s16))) +int16x8_t __arm_vornq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s32))) +int32x4_t __arm_vornq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s32))) +int32x4_t __arm_vornq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s8))) +int8x16_t __arm_vornq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s8))) +int8x16_t __arm_vornq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u16))) +uint16x8_t __arm_vornq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u16))) +uint16x8_t __arm_vornq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u32))) +uint32x4_t __arm_vornq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u32))) +uint32x4_t __arm_vornq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u8))) +uint8x16_t __arm_vornq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u8))) +uint8x16_t __arm_vornq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s16))) +int16x8_t __arm_vorrq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s16))) +int16x8_t __arm_vorrq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s32))) +int32x4_t __arm_vorrq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s32))) +int32x4_t __arm_vorrq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s8))) +int8x16_t __arm_vorrq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s8))) +int8x16_t __arm_vorrq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u16))) +uint16x8_t __arm_vorrq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u16))) +uint16x8_t __arm_vorrq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u32))) +uint32x4_t __arm_vorrq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u32))) +uint32x4_t __arm_vorrq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u8))) +uint8x16_t __arm_vorrq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u8))) +uint8x16_t __arm_vorrq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_s16))) +int16x8_t __arm_vorrq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_s16))) +int16x8_t __arm_vorrq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_s32))) +int32x4_t __arm_vorrq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_s32))) +int32x4_t __arm_vorrq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_s8))) +int8x16_t __arm_vorrq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_s8))) +int8x16_t __arm_vorrq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_u16))) +uint16x8_t __arm_vorrq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_u16))) +uint16x8_t __arm_vorrq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_u32))) +uint32x4_t __arm_vorrq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_u32))) +uint32x4_t __arm_vorrq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_u8))) +uint8x16_t __arm_vorrq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_u8))) +uint8x16_t __arm_vorrq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s16))) +int16x8_t __arm_vorrq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s16))) +int16x8_t __arm_vorrq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s32))) +int32x4_t __arm_vorrq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s32))) +int32x4_t __arm_vorrq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s8))) +int8x16_t __arm_vorrq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s8))) +int8x16_t __arm_vorrq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u16))) +uint16x8_t __arm_vorrq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u16))) +uint16x8_t __arm_vorrq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u32))) +uint32x4_t __arm_vorrq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u32))) +uint32x4_t __arm_vorrq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u8))) +uint8x16_t __arm_vorrq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u8))) +uint8x16_t __arm_vorrq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpnot))) +mve_pred16_t __arm_vpnot(mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s16))) +int16x8_t __arm_vpselq_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s16))) +int16x8_t __arm_vpselq(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s32))) +int32x4_t __arm_vpselq_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s32))) +int32x4_t __arm_vpselq(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s64))) +int64x2_t __arm_vpselq_s64(int64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s64))) +int64x2_t __arm_vpselq(int64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s8))) +int8x16_t __arm_vpselq_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s8))) +int8x16_t __arm_vpselq(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u16))) +uint16x8_t __arm_vpselq_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u16))) +uint16x8_t __arm_vpselq(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u32))) +uint32x4_t __arm_vpselq_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u32))) +uint32x4_t __arm_vpselq(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u64))) +uint64x2_t __arm_vpselq_u64(uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u64))) +uint64x2_t __arm_vpselq(uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u8))) +uint8x16_t __arm_vpselq_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u8))) +uint8x16_t __arm_vpselq(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s16))) +int16x8_t __arm_vqaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s16))) +int16x8_t __arm_vqaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s32))) +int32x4_t __arm_vqaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s32))) +int32x4_t __arm_vqaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s8))) +int8x16_t __arm_vqaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s8))) +int8x16_t __arm_vqaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u16))) +uint16x8_t __arm_vqaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u16))) +uint16x8_t __arm_vqaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u32))) +uint32x4_t __arm_vqaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u32))) +uint32x4_t __arm_vqaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u8))) +uint8x16_t __arm_vqaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u8))) +uint8x16_t __arm_vqaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s16))) +int16x8_t __arm_vqaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s16))) +int16x8_t __arm_vqaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s32))) +int32x4_t __arm_vqaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s32))) +int32x4_t __arm_vqaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s8))) +int8x16_t __arm_vqaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s8))) +int8x16_t __arm_vqaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u16))) +uint16x8_t __arm_vqaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u16))) +uint16x8_t __arm_vqaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u32))) +uint32x4_t __arm_vqaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u32))) +uint32x4_t __arm_vqaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u8))) +uint8x16_t __arm_vqaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u8))) +uint8x16_t __arm_vqaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s16))) +int16x8_t __arm_vqdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s16))) +int16x8_t __arm_vqdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s32))) +int32x4_t __arm_vqdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s32))) +int32x4_t __arm_vqdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s8))) +int8x16_t __arm_vqdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s8))) +int8x16_t __arm_vqdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s16))) +int16x8_t __arm_vqdmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s16))) +int16x8_t __arm_vqdmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s32))) +int32x4_t __arm_vqdmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s32))) +int32x4_t __arm_vqdmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s8))) +int8x16_t __arm_vqdmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s8))) +int8x16_t __arm_vqdmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) +int16x8_t __arm_vqrdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) +int16x8_t __arm_vqrdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) +int32x4_t __arm_vqrdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) +int32x4_t __arm_vqrdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) +int8x16_t __arm_vqrdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) +int8x16_t __arm_vqrdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s16))) +int16x8_t __arm_vqrdmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s16))) +int16x8_t __arm_vqrdmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s32))) +int32x4_t __arm_vqrdmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s32))) +int32x4_t __arm_vqrdmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s8))) +int8x16_t __arm_vqrdmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s8))) +int8x16_t __arm_vqrdmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) +int16x8_t __arm_vqrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) +int16x8_t __arm_vqrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) +int32x4_t __arm_vqrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) +int32x4_t __arm_vqrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) +int8x16_t __arm_vqrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) +int8x16_t __arm_vqrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) +uint16x8_t __arm_vqrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) +uint16x8_t __arm_vqrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) +uint32x4_t __arm_vqrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) +uint32x4_t __arm_vqrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) +uint8x16_t __arm_vqrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) +uint8x16_t __arm_vqrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s16))) +int16x8_t __arm_vqrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s16))) +int16x8_t __arm_vqrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s32))) +int32x4_t __arm_vqrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s32))) +int32x4_t __arm_vqrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s8))) +int8x16_t __arm_vqrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s8))) +int8x16_t __arm_vqrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u16))) +uint16x8_t __arm_vqrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u16))) +uint16x8_t __arm_vqrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u32))) +uint32x4_t __arm_vqrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u32))) +uint32x4_t __arm_vqrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u8))) +uint8x16_t __arm_vqrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u8))) +uint8x16_t __arm_vqrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s16))) +int16x8_t __arm_vqrshlq_n_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s16))) +int16x8_t __arm_vqrshlq(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s32))) +int32x4_t __arm_vqrshlq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s32))) +int32x4_t __arm_vqrshlq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s8))) +int8x16_t __arm_vqrshlq_n_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s8))) +int8x16_t __arm_vqrshlq(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u16))) +uint16x8_t __arm_vqrshlq_n_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u16))) +uint16x8_t __arm_vqrshlq(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u32))) +uint32x4_t __arm_vqrshlq_n_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u32))) +uint32x4_t __arm_vqrshlq(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u8))) +uint8x16_t __arm_vqrshlq_n_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u8))) +uint8x16_t __arm_vqrshlq(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s16))) +int16x8_t __arm_vqrshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s16))) +int16x8_t __arm_vqrshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s32))) +int32x4_t __arm_vqrshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s32))) +int32x4_t __arm_vqrshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s8))) +int8x16_t __arm_vqrshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s8))) +int8x16_t __arm_vqrshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u16))) +uint16x8_t __arm_vqrshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u16))) +uint16x8_t __arm_vqrshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u32))) +uint32x4_t __arm_vqrshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u32))) +uint32x4_t __arm_vqrshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u8))) +uint8x16_t __arm_vqrshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u8))) +uint8x16_t __arm_vqrshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) +int8x16_t __arm_vqrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) +int8x16_t __arm_vqrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) +int16x8_t __arm_vqrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) +int16x8_t __arm_vqrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) +uint8x16_t __arm_vqrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) +uint8x16_t __arm_vqrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) +uint16x8_t __arm_vqrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) +uint16x8_t __arm_vqrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) +int8x16_t __arm_vqrshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) +int8x16_t __arm_vqrshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) +int16x8_t __arm_vqrshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) +int16x8_t __arm_vqrshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) +uint8x16_t __arm_vqrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) +uint8x16_t __arm_vqrshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) +uint16x8_t __arm_vqrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) +uint16x8_t __arm_vqrshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) +int8x16_t __arm_vqrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) +int8x16_t __arm_vqrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) +int16x8_t __arm_vqrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) +int16x8_t __arm_vqrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) +uint8x16_t __arm_vqrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) +uint8x16_t __arm_vqrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) +uint16x8_t __arm_vqrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) +uint16x8_t __arm_vqrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s16))) +int8x16_t __arm_vqrshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s16))) +int8x16_t __arm_vqrshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s32))) +int16x8_t __arm_vqrshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s32))) +int16x8_t __arm_vqrshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u16))) +uint8x16_t __arm_vqrshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u16))) +uint8x16_t __arm_vqrshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u32))) +uint16x8_t __arm_vqrshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u32))) +uint16x8_t __arm_vqrshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) +uint8x16_t __arm_vqrshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) +uint8x16_t __arm_vqrshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) +uint16x8_t __arm_vqrshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) +uint16x8_t __arm_vqrshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) +uint8x16_t __arm_vqrshrunbq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) +uint8x16_t __arm_vqrshrunbq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) +uint16x8_t __arm_vqrshrunbq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) +uint16x8_t __arm_vqrshrunbq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) +uint8x16_t __arm_vqrshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) +uint8x16_t __arm_vqrshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) +uint16x8_t __arm_vqrshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) +uint16x8_t __arm_vqrshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s16))) +uint8x16_t __arm_vqrshruntq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s16))) +uint8x16_t __arm_vqrshruntq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s32))) +uint16x8_t __arm_vqrshruntq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s32))) +uint16x8_t __arm_vqrshruntq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s16))) +int16x8_t __arm_vqshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s16))) +int16x8_t __arm_vqshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s32))) +int32x4_t __arm_vqshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s32))) +int32x4_t __arm_vqshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s8))) +int8x16_t __arm_vqshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s8))) +int8x16_t __arm_vqshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u16))) +uint16x8_t __arm_vqshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u16))) +uint16x8_t __arm_vqshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u32))) +uint32x4_t __arm_vqshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u32))) +uint32x4_t __arm_vqshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u8))) +uint8x16_t __arm_vqshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u8))) +uint8x16_t __arm_vqshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s16))) +int16x8_t __arm_vqshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s16))) +int16x8_t __arm_vqshlq_m_r(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s32))) +int32x4_t __arm_vqshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s32))) +int32x4_t __arm_vqshlq_m_r(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s8))) +int8x16_t __arm_vqshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s8))) +int8x16_t __arm_vqshlq_m_r(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u16))) +uint16x8_t __arm_vqshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u16))) +uint16x8_t __arm_vqshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u32))) +uint32x4_t __arm_vqshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u32))) +uint32x4_t __arm_vqshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u8))) +uint8x16_t __arm_vqshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u8))) +uint8x16_t __arm_vqshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s16))) +int16x8_t __arm_vqshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s16))) +int16x8_t __arm_vqshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s32))) +int32x4_t __arm_vqshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s32))) +int32x4_t __arm_vqshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s8))) +int8x16_t __arm_vqshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s8))) +int8x16_t __arm_vqshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u16))) +uint16x8_t __arm_vqshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u16))) +uint16x8_t __arm_vqshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u32))) +uint32x4_t __arm_vqshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u32))) +uint32x4_t __arm_vqshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u8))) +uint8x16_t __arm_vqshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u8))) +uint8x16_t __arm_vqshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s16))) +int16x8_t __arm_vqshlq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s16))) +int16x8_t __arm_vqshlq_n(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s32))) +int32x4_t __arm_vqshlq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s32))) +int32x4_t __arm_vqshlq_n(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s8))) +int8x16_t __arm_vqshlq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s8))) +int8x16_t __arm_vqshlq_n(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u16))) +uint16x8_t __arm_vqshlq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u16))) +uint16x8_t __arm_vqshlq_n(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u32))) +uint32x4_t __arm_vqshlq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u32))) +uint32x4_t __arm_vqshlq_n(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u8))) +uint8x16_t __arm_vqshlq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u8))) +uint8x16_t __arm_vqshlq_n(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s16))) +int16x8_t __arm_vqshlq_r_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s16))) +int16x8_t __arm_vqshlq_r(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s32))) +int32x4_t __arm_vqshlq_r_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s32))) +int32x4_t __arm_vqshlq_r(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s8))) +int8x16_t __arm_vqshlq_r_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s8))) +int8x16_t __arm_vqshlq_r(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u16))) +uint16x8_t __arm_vqshlq_r_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u16))) +uint16x8_t __arm_vqshlq_r(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u32))) +uint32x4_t __arm_vqshlq_r_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u32))) +uint32x4_t __arm_vqshlq_r(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u8))) +uint8x16_t __arm_vqshlq_r_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u8))) +uint8x16_t __arm_vqshlq_r(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s16))) +int16x8_t __arm_vqshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s16))) +int16x8_t __arm_vqshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s32))) +int32x4_t __arm_vqshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s32))) +int32x4_t __arm_vqshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s8))) +int8x16_t __arm_vqshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s8))) +int8x16_t __arm_vqshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u16))) +uint16x8_t __arm_vqshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u16))) +uint16x8_t __arm_vqshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u32))) +uint32x4_t __arm_vqshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u32))) +uint32x4_t __arm_vqshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u8))) +uint8x16_t __arm_vqshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u8))) +uint8x16_t __arm_vqshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s16))) +uint16x8_t __arm_vqshluq_m_n_s16(uint16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s16))) +uint16x8_t __arm_vqshluq_m(uint16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s32))) +uint32x4_t __arm_vqshluq_m_n_s32(uint32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s32))) +uint32x4_t __arm_vqshluq_m(uint32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s8))) +uint8x16_t __arm_vqshluq_m_n_s8(uint8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s8))) +uint8x16_t __arm_vqshluq_m(uint8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s16))) +uint16x8_t __arm_vqshluq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s16))) +uint16x8_t __arm_vqshluq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s32))) +uint32x4_t __arm_vqshluq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s32))) +uint32x4_t __arm_vqshluq(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s8))) +uint8x16_t __arm_vqshluq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s8))) +uint8x16_t __arm_vqshluq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) +int8x16_t __arm_vqshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) +int8x16_t __arm_vqshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) +int16x8_t __arm_vqshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) +int16x8_t __arm_vqshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) +uint8x16_t __arm_vqshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) +uint8x16_t __arm_vqshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) +uint16x8_t __arm_vqshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) +uint16x8_t __arm_vqshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s16))) +int8x16_t __arm_vqshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s16))) +int8x16_t __arm_vqshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s32))) +int16x8_t __arm_vqshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s32))) +int16x8_t __arm_vqshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u16))) +uint8x16_t __arm_vqshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u16))) +uint8x16_t __arm_vqshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u32))) +uint16x8_t __arm_vqshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u32))) +uint16x8_t __arm_vqshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) +int8x16_t __arm_vqshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) +int8x16_t __arm_vqshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) +int16x8_t __arm_vqshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) +int16x8_t __arm_vqshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) +uint8x16_t __arm_vqshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) +uint8x16_t __arm_vqshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) +uint16x8_t __arm_vqshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) +uint16x8_t __arm_vqshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s16))) +int8x16_t __arm_vqshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s16))) +int8x16_t __arm_vqshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s32))) +int16x8_t __arm_vqshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s32))) +int16x8_t __arm_vqshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u16))) +uint8x16_t __arm_vqshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u16))) +uint8x16_t __arm_vqshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u32))) +uint16x8_t __arm_vqshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u32))) +uint16x8_t __arm_vqshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) +uint8x16_t __arm_vqshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) +uint8x16_t __arm_vqshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) +uint16x8_t __arm_vqshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) +uint16x8_t __arm_vqshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s16))) +uint8x16_t __arm_vqshrunbq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s16))) +uint8x16_t __arm_vqshrunbq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s32))) +uint16x8_t __arm_vqshrunbq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s32))) +uint16x8_t __arm_vqshrunbq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) +uint8x16_t __arm_vqshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) +uint8x16_t __arm_vqshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) +uint16x8_t __arm_vqshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) +uint16x8_t __arm_vqshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s16))) +uint8x16_t __arm_vqshruntq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s16))) +uint8x16_t __arm_vqshruntq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s32))) +uint16x8_t __arm_vqshruntq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s32))) +uint16x8_t __arm_vqshruntq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s16))) +int16x8_t __arm_vqsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s16))) +int16x8_t __arm_vqsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s32))) +int32x4_t __arm_vqsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s32))) +int32x4_t __arm_vqsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s8))) +int8x16_t __arm_vqsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s8))) +int8x16_t __arm_vqsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u16))) +uint16x8_t __arm_vqsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u16))) +uint16x8_t __arm_vqsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u32))) +uint32x4_t __arm_vqsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u32))) +uint32x4_t __arm_vqsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u8))) +uint8x16_t __arm_vqsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u8))) +uint8x16_t __arm_vqsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s16))) +int16x8_t __arm_vqsubq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s16))) +int16x8_t __arm_vqsubq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s32))) +int32x4_t __arm_vqsubq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s32))) +int32x4_t __arm_vqsubq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s8))) +int8x16_t __arm_vqsubq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s8))) +int8x16_t __arm_vqsubq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u16))) +uint16x8_t __arm_vqsubq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u16))) +uint16x8_t __arm_vqsubq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u32))) +uint32x4_t __arm_vqsubq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u32))) +uint32x4_t __arm_vqsubq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u8))) +uint8x16_t __arm_vqsubq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u8))) +uint8x16_t __arm_vqsubq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) +int16x8_t __arm_vreinterpretq_s16_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) +int16x8_t __arm_vreinterpretq_s16(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) +int16x8_t __arm_vreinterpretq_s16_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) +int16x8_t __arm_vreinterpretq_s16(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) +int16x8_t __arm_vreinterpretq_s16_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) +int16x8_t __arm_vreinterpretq_s16(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) +int16x8_t __arm_vreinterpretq_s16_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) +int16x8_t __arm_vreinterpretq_s16(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) +int16x8_t __arm_vreinterpretq_s16_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) +int16x8_t __arm_vreinterpretq_s16(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) +int16x8_t __arm_vreinterpretq_s16_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) +int16x8_t __arm_vreinterpretq_s16(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) +int16x8_t __arm_vreinterpretq_s16_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) +int16x8_t __arm_vreinterpretq_s16(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) +int32x4_t __arm_vreinterpretq_s32_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) +int32x4_t __arm_vreinterpretq_s32(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) +int32x4_t __arm_vreinterpretq_s32_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) +int32x4_t __arm_vreinterpretq_s32(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) +int32x4_t __arm_vreinterpretq_s32_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) +int32x4_t __arm_vreinterpretq_s32(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) +int32x4_t __arm_vreinterpretq_s32_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) +int32x4_t __arm_vreinterpretq_s32(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) +int32x4_t __arm_vreinterpretq_s32_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) +int32x4_t __arm_vreinterpretq_s32(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) +int32x4_t __arm_vreinterpretq_s32_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) +int32x4_t __arm_vreinterpretq_s32(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) +int32x4_t __arm_vreinterpretq_s32_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) +int32x4_t __arm_vreinterpretq_s32(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) +int64x2_t __arm_vreinterpretq_s64_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) +int64x2_t __arm_vreinterpretq_s64(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) +int64x2_t __arm_vreinterpretq_s64_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) +int64x2_t __arm_vreinterpretq_s64(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) +int64x2_t __arm_vreinterpretq_s64_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) +int64x2_t __arm_vreinterpretq_s64(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) +int64x2_t __arm_vreinterpretq_s64_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) +int64x2_t __arm_vreinterpretq_s64(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) +int64x2_t __arm_vreinterpretq_s64_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) +int64x2_t __arm_vreinterpretq_s64(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) +int64x2_t __arm_vreinterpretq_s64_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) +int64x2_t __arm_vreinterpretq_s64(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) +int64x2_t __arm_vreinterpretq_s64_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) +int64x2_t __arm_vreinterpretq_s64(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) +int8x16_t __arm_vreinterpretq_s8_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) +int8x16_t __arm_vreinterpretq_s8(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) +int8x16_t __arm_vreinterpretq_s8_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) +int8x16_t __arm_vreinterpretq_s8(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) +int8x16_t __arm_vreinterpretq_s8_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) +int8x16_t __arm_vreinterpretq_s8(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) +int8x16_t __arm_vreinterpretq_s8_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) +int8x16_t __arm_vreinterpretq_s8(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) +int8x16_t __arm_vreinterpretq_s8_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) +int8x16_t __arm_vreinterpretq_s8(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) +int8x16_t __arm_vreinterpretq_s8_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) +int8x16_t __arm_vreinterpretq_s8(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) +int8x16_t __arm_vreinterpretq_s8_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) +int8x16_t __arm_vreinterpretq_s8(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) +uint16x8_t __arm_vreinterpretq_u16_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) +uint16x8_t __arm_vreinterpretq_u16(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) +uint16x8_t __arm_vreinterpretq_u16_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) +uint16x8_t __arm_vreinterpretq_u16(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) +uint16x8_t __arm_vreinterpretq_u16_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) +uint16x8_t __arm_vreinterpretq_u16(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) +uint16x8_t __arm_vreinterpretq_u16_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) +uint16x8_t __arm_vreinterpretq_u16(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) +uint16x8_t __arm_vreinterpretq_u16_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) +uint16x8_t __arm_vreinterpretq_u16(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) +uint16x8_t __arm_vreinterpretq_u16_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) +uint16x8_t __arm_vreinterpretq_u16(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) +uint16x8_t __arm_vreinterpretq_u16_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) +uint16x8_t __arm_vreinterpretq_u16(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) +uint32x4_t __arm_vreinterpretq_u32_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) +uint32x4_t __arm_vreinterpretq_u32(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) +uint32x4_t __arm_vreinterpretq_u32_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) +uint32x4_t __arm_vreinterpretq_u32(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) +uint32x4_t __arm_vreinterpretq_u32_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) +uint32x4_t __arm_vreinterpretq_u32(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) +uint32x4_t __arm_vreinterpretq_u32_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) +uint32x4_t __arm_vreinterpretq_u32(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) +uint32x4_t __arm_vreinterpretq_u32_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) +uint32x4_t __arm_vreinterpretq_u32(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) +uint32x4_t __arm_vreinterpretq_u32_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) +uint32x4_t __arm_vreinterpretq_u32(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) +uint32x4_t __arm_vreinterpretq_u32_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) +uint32x4_t __arm_vreinterpretq_u32(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) +uint64x2_t __arm_vreinterpretq_u64_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) +uint64x2_t __arm_vreinterpretq_u64(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) +uint64x2_t __arm_vreinterpretq_u64_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) +uint64x2_t __arm_vreinterpretq_u64(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) +uint64x2_t __arm_vreinterpretq_u64_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) +uint64x2_t __arm_vreinterpretq_u64(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) +uint64x2_t __arm_vreinterpretq_u64_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) +uint64x2_t __arm_vreinterpretq_u64(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) +uint64x2_t __arm_vreinterpretq_u64_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) +uint64x2_t __arm_vreinterpretq_u64(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) +uint64x2_t __arm_vreinterpretq_u64_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) +uint64x2_t __arm_vreinterpretq_u64(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) +uint64x2_t __arm_vreinterpretq_u64_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) +uint64x2_t __arm_vreinterpretq_u64(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) +uint8x16_t __arm_vreinterpretq_u8_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) +uint8x16_t __arm_vreinterpretq_u8(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) +uint8x16_t __arm_vreinterpretq_u8_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) +uint8x16_t __arm_vreinterpretq_u8(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) +uint8x16_t __arm_vreinterpretq_u8_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) +uint8x16_t __arm_vreinterpretq_u8(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) +uint8x16_t __arm_vreinterpretq_u8_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) +uint8x16_t __arm_vreinterpretq_u8(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) +uint8x16_t __arm_vreinterpretq_u8_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) +uint8x16_t __arm_vreinterpretq_u8(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) +uint8x16_t __arm_vreinterpretq_u8_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) +uint8x16_t __arm_vreinterpretq_u8(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) +uint8x16_t __arm_vreinterpretq_u8_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) +uint8x16_t __arm_vreinterpretq_u8(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s16))) +int16x8_t __arm_vrhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s16))) +int16x8_t __arm_vrhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s32))) +int32x4_t __arm_vrhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s32))) +int32x4_t __arm_vrhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s8))) +int8x16_t __arm_vrhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s8))) +int8x16_t __arm_vrhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u16))) +uint16x8_t __arm_vrhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u16))) +uint16x8_t __arm_vrhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u32))) +uint32x4_t __arm_vrhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u32))) +uint32x4_t __arm_vrhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u8))) +uint8x16_t __arm_vrhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u8))) +uint8x16_t __arm_vrhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s16))) +int16x8_t __arm_vrhaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s16))) +int16x8_t __arm_vrhaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s32))) +int32x4_t __arm_vrhaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s32))) +int32x4_t __arm_vrhaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s8))) +int8x16_t __arm_vrhaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s8))) +int8x16_t __arm_vrhaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u16))) +uint16x8_t __arm_vrhaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u16))) +uint16x8_t __arm_vrhaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u32))) +uint32x4_t __arm_vrhaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u32))) +uint32x4_t __arm_vrhaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u8))) +uint8x16_t __arm_vrhaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u8))) +uint8x16_t __arm_vrhaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s16))) +int16x8_t __arm_vrhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s16))) +int16x8_t __arm_vrhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s32))) +int32x4_t __arm_vrhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s32))) +int32x4_t __arm_vrhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s8))) +int8x16_t __arm_vrhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s8))) +int8x16_t __arm_vrhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u16))) +uint16x8_t __arm_vrhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u16))) +uint16x8_t __arm_vrhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u32))) +uint32x4_t __arm_vrhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u32))) +uint32x4_t __arm_vrhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u8))) +uint8x16_t __arm_vrhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u8))) +uint8x16_t __arm_vrhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) +int64_t __arm_vrmlaldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) +int64_t __arm_vrmlaldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) +uint64_t __arm_vrmlaldavhaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) +uint64_t __arm_vrmlaldavhaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) +int64_t __arm_vrmlaldavhaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) +int64_t __arm_vrmlaldavhaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) +uint64_t __arm_vrmlaldavhaq_u32(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) +uint64_t __arm_vrmlaldavhaq(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) +int64_t __arm_vrmlaldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) +int64_t __arm_vrmlaldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) +int64_t __arm_vrmlaldavhaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) +int64_t __arm_vrmlaldavhaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) +int64_t __arm_vrmlaldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) +int64_t __arm_vrmlaldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) +uint64_t __arm_vrmlaldavhq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) +uint64_t __arm_vrmlaldavhq_p(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_s32))) +int64_t __arm_vrmlaldavhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_s32))) +int64_t __arm_vrmlaldavhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_u32))) +uint64_t __arm_vrmlaldavhq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_u32))) +uint64_t __arm_vrmlaldavhq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) +int64_t __arm_vrmlaldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) +int64_t __arm_vrmlaldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) +int64_t __arm_vrmlaldavhxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) +int64_t __arm_vrmlaldavhxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) +int64_t __arm_vrmlsldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) +int64_t __arm_vrmlsldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) +int64_t __arm_vrmlsldavhaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) +int64_t __arm_vrmlsldavhaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) +int64_t __arm_vrmlsldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) +int64_t __arm_vrmlsldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) +int64_t __arm_vrmlsldavhaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) +int64_t __arm_vrmlsldavhaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) +int64_t __arm_vrmlsldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) +int64_t __arm_vrmlsldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_s32))) +int64_t __arm_vrmlsldavhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_s32))) +int64_t __arm_vrmlsldavhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) +int64_t __arm_vrmlsldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) +int64_t __arm_vrmlsldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) +int64_t __arm_vrmlsldavhxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) +int64_t __arm_vrmlsldavhxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s16))) +int16x8_t __arm_vrmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s16))) +int16x8_t __arm_vrmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s32))) +int32x4_t __arm_vrmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s32))) +int32x4_t __arm_vrmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s8))) +int8x16_t __arm_vrmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s8))) +int8x16_t __arm_vrmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u16))) +uint16x8_t __arm_vrmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u16))) +uint16x8_t __arm_vrmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u32))) +uint32x4_t __arm_vrmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u32))) +uint32x4_t __arm_vrmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u8))) +uint8x16_t __arm_vrmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u8))) +uint8x16_t __arm_vrmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s16))) +int16x8_t __arm_vrmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s16))) +int16x8_t __arm_vrmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s32))) +int32x4_t __arm_vrmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s32))) +int32x4_t __arm_vrmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s8))) +int8x16_t __arm_vrmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s8))) +int8x16_t __arm_vrmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u16))) +uint16x8_t __arm_vrmulhq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u16))) +uint16x8_t __arm_vrmulhq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u32))) +uint32x4_t __arm_vrmulhq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u32))) +uint32x4_t __arm_vrmulhq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u8))) +uint8x16_t __arm_vrmulhq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u8))) +uint8x16_t __arm_vrmulhq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s16))) +int16x8_t __arm_vrmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s16))) +int16x8_t __arm_vrmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s32))) +int32x4_t __arm_vrmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s32))) +int32x4_t __arm_vrmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s8))) +int8x16_t __arm_vrmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s8))) +int8x16_t __arm_vrmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u16))) +uint16x8_t __arm_vrmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u16))) +uint16x8_t __arm_vrmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u32))) +uint32x4_t __arm_vrmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u32))) +uint32x4_t __arm_vrmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u8))) +uint8x16_t __arm_vrmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u8))) +uint8x16_t __arm_vrmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s16))) +int16x8_t __arm_vrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s16))) +int16x8_t __arm_vrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s32))) +int32x4_t __arm_vrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s32))) +int32x4_t __arm_vrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s8))) +int8x16_t __arm_vrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s8))) +int8x16_t __arm_vrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u16))) +uint16x8_t __arm_vrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u16))) +uint16x8_t __arm_vrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u32))) +uint32x4_t __arm_vrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u32))) +uint32x4_t __arm_vrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u8))) +uint8x16_t __arm_vrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u8))) +uint8x16_t __arm_vrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s16))) +int16x8_t __arm_vrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s16))) +int16x8_t __arm_vrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s32))) +int32x4_t __arm_vrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s32))) +int32x4_t __arm_vrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s8))) +int8x16_t __arm_vrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s8))) +int8x16_t __arm_vrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u16))) +uint16x8_t __arm_vrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u16))) +uint16x8_t __arm_vrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u32))) +uint32x4_t __arm_vrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u32))) +uint32x4_t __arm_vrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u8))) +uint8x16_t __arm_vrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u8))) +uint8x16_t __arm_vrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s16))) +int16x8_t __arm_vrshlq_n_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s16))) +int16x8_t __arm_vrshlq(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s32))) +int32x4_t __arm_vrshlq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s32))) +int32x4_t __arm_vrshlq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s8))) +int8x16_t __arm_vrshlq_n_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s8))) +int8x16_t __arm_vrshlq(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u16))) +uint16x8_t __arm_vrshlq_n_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u16))) +uint16x8_t __arm_vrshlq(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u32))) +uint32x4_t __arm_vrshlq_n_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u32))) +uint32x4_t __arm_vrshlq(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u8))) +uint8x16_t __arm_vrshlq_n_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u8))) +uint8x16_t __arm_vrshlq(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s16))) +int16x8_t __arm_vrshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s16))) +int16x8_t __arm_vrshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s32))) +int32x4_t __arm_vrshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s32))) +int32x4_t __arm_vrshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s8))) +int8x16_t __arm_vrshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s8))) +int8x16_t __arm_vrshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u16))) +uint16x8_t __arm_vrshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u16))) +uint16x8_t __arm_vrshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u32))) +uint32x4_t __arm_vrshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u32))) +uint32x4_t __arm_vrshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u8))) +uint8x16_t __arm_vrshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u8))) +uint8x16_t __arm_vrshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s16))) +int16x8_t __arm_vrshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s16))) +int16x8_t __arm_vrshlq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s32))) +int32x4_t __arm_vrshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s32))) +int32x4_t __arm_vrshlq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s8))) +int8x16_t __arm_vrshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s8))) +int8x16_t __arm_vrshlq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u16))) +uint16x8_t __arm_vrshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u16))) +uint16x8_t __arm_vrshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u32))) +uint32x4_t __arm_vrshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u32))) +uint32x4_t __arm_vrshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u8))) +uint8x16_t __arm_vrshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u8))) +uint8x16_t __arm_vrshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) +int8x16_t __arm_vrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) +int8x16_t __arm_vrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) +int16x8_t __arm_vrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) +int16x8_t __arm_vrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) +uint8x16_t __arm_vrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) +uint8x16_t __arm_vrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) +uint16x8_t __arm_vrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) +uint16x8_t __arm_vrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s16))) +int8x16_t __arm_vrshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s16))) +int8x16_t __arm_vrshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s32))) +int16x8_t __arm_vrshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s32))) +int16x8_t __arm_vrshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u16))) +uint8x16_t __arm_vrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u16))) +uint8x16_t __arm_vrshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u32))) +uint16x8_t __arm_vrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u32))) +uint16x8_t __arm_vrshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) +int8x16_t __arm_vrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) +int8x16_t __arm_vrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) +int16x8_t __arm_vrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) +int16x8_t __arm_vrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) +uint8x16_t __arm_vrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) +uint8x16_t __arm_vrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) +uint16x8_t __arm_vrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) +uint16x8_t __arm_vrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s16))) +int8x16_t __arm_vrshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s16))) +int8x16_t __arm_vrshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s32))) +int16x8_t __arm_vrshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s32))) +int16x8_t __arm_vrshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u16))) +uint8x16_t __arm_vrshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u16))) +uint8x16_t __arm_vrshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u32))) +uint16x8_t __arm_vrshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u32))) +uint16x8_t __arm_vrshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s16))) +int16x8_t __arm_vrshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s16))) +int16x8_t __arm_vrshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s32))) +int32x4_t __arm_vrshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s32))) +int32x4_t __arm_vrshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s8))) +int8x16_t __arm_vrshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s8))) +int8x16_t __arm_vrshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u16))) +uint16x8_t __arm_vrshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u16))) +uint16x8_t __arm_vrshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u32))) +uint32x4_t __arm_vrshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u32))) +uint32x4_t __arm_vrshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u8))) +uint8x16_t __arm_vrshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u8))) +uint8x16_t __arm_vrshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s16))) +int16x8_t __arm_vrshrq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s16))) +int16x8_t __arm_vrshrq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s32))) +int32x4_t __arm_vrshrq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s32))) +int32x4_t __arm_vrshrq(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s8))) +int8x16_t __arm_vrshrq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s8))) +int8x16_t __arm_vrshrq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u16))) +uint16x8_t __arm_vrshrq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u16))) +uint16x8_t __arm_vrshrq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u32))) +uint32x4_t __arm_vrshrq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u32))) +uint32x4_t __arm_vrshrq(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u8))) +uint8x16_t __arm_vrshrq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u8))) +uint8x16_t __arm_vrshrq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s16))) +int16x8_t __arm_vrshrq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s16))) +int16x8_t __arm_vrshrq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s32))) +int32x4_t __arm_vrshrq_x_n_s32(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s32))) +int32x4_t __arm_vrshrq_x(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s8))) +int8x16_t __arm_vrshrq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s8))) +int8x16_t __arm_vrshrq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u16))) +uint16x8_t __arm_vrshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u16))) +uint16x8_t __arm_vrshrq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u32))) +uint32x4_t __arm_vrshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u32))) +uint32x4_t __arm_vrshrq_x(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u8))) +uint8x16_t __arm_vrshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u8))) +uint8x16_t __arm_vrshrq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s16))) +int16x8_t __arm_vsetq_lane_s16(int16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s16))) +int16x8_t __arm_vsetq_lane(int16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s32))) +int32x4_t __arm_vsetq_lane_s32(int32_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s32))) +int32x4_t __arm_vsetq_lane(int32_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s64))) +int64x2_t __arm_vsetq_lane_s64(int64_t, int64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s64))) +int64x2_t __arm_vsetq_lane(int64_t, int64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s8))) +int8x16_t __arm_vsetq_lane_s8(int8_t, int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s8))) +int8x16_t __arm_vsetq_lane(int8_t, int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u16))) +uint16x8_t __arm_vsetq_lane_u16(uint16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u16))) +uint16x8_t __arm_vsetq_lane(uint16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u32))) +uint32x4_t __arm_vsetq_lane_u32(uint32_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u32))) +uint32x4_t __arm_vsetq_lane(uint32_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u64))) +uint64x2_t __arm_vsetq_lane_u64(uint64_t, uint64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u64))) +uint64x2_t __arm_vsetq_lane(uint64_t, uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u8))) +uint8x16_t __arm_vsetq_lane_u8(uint8_t, uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u8))) +uint8x16_t __arm_vsetq_lane(uint8_t, uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s16))) +int32x4_t __arm_vshllbq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s16))) +int32x4_t __arm_vshllbq_m(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s8))) +int16x8_t __arm_vshllbq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s8))) +int16x8_t __arm_vshllbq_m(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u16))) +uint32x4_t __arm_vshllbq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u16))) +uint32x4_t __arm_vshllbq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u8))) +uint16x8_t __arm_vshllbq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u8))) +uint16x8_t __arm_vshllbq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s16))) +int32x4_t __arm_vshllbq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s16))) +int32x4_t __arm_vshllbq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s8))) +int16x8_t __arm_vshllbq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s8))) +int16x8_t __arm_vshllbq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u16))) +uint32x4_t __arm_vshllbq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u16))) +uint32x4_t __arm_vshllbq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u8))) +uint16x8_t __arm_vshllbq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u8))) +uint16x8_t __arm_vshllbq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s16))) +int32x4_t __arm_vshllbq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s16))) +int32x4_t __arm_vshllbq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s8))) +int16x8_t __arm_vshllbq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s8))) +int16x8_t __arm_vshllbq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u16))) +uint32x4_t __arm_vshllbq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u16))) +uint32x4_t __arm_vshllbq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u8))) +uint16x8_t __arm_vshllbq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u8))) +uint16x8_t __arm_vshllbq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s16))) +int32x4_t __arm_vshlltq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s16))) +int32x4_t __arm_vshlltq_m(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s8))) +int16x8_t __arm_vshlltq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s8))) +int16x8_t __arm_vshlltq_m(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u16))) +uint32x4_t __arm_vshlltq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u16))) +uint32x4_t __arm_vshlltq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u8))) +uint16x8_t __arm_vshlltq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u8))) +uint16x8_t __arm_vshlltq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s16))) +int32x4_t __arm_vshlltq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s16))) +int32x4_t __arm_vshlltq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s8))) +int16x8_t __arm_vshlltq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s8))) +int16x8_t __arm_vshlltq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u16))) +uint32x4_t __arm_vshlltq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u16))) +uint32x4_t __arm_vshlltq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u8))) +uint16x8_t __arm_vshlltq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u8))) +uint16x8_t __arm_vshlltq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s16))) +int32x4_t __arm_vshlltq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s16))) +int32x4_t __arm_vshlltq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s8))) +int16x8_t __arm_vshlltq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s8))) +int16x8_t __arm_vshlltq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u16))) +uint32x4_t __arm_vshlltq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u16))) +uint32x4_t __arm_vshlltq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u8))) +uint16x8_t __arm_vshlltq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u8))) +uint16x8_t __arm_vshlltq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s16))) +int16x8_t __arm_vshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s16))) +int16x8_t __arm_vshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s32))) +int32x4_t __arm_vshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s32))) +int32x4_t __arm_vshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s8))) +int8x16_t __arm_vshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s8))) +int8x16_t __arm_vshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u16))) +uint16x8_t __arm_vshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u16))) +uint16x8_t __arm_vshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u32))) +uint32x4_t __arm_vshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u32))) +uint32x4_t __arm_vshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u8))) +uint8x16_t __arm_vshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u8))) +uint8x16_t __arm_vshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s16))) +int16x8_t __arm_vshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s16))) +int16x8_t __arm_vshlq_m_r(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s32))) +int32x4_t __arm_vshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s32))) +int32x4_t __arm_vshlq_m_r(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s8))) +int8x16_t __arm_vshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s8))) +int8x16_t __arm_vshlq_m_r(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u16))) +uint16x8_t __arm_vshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u16))) +uint16x8_t __arm_vshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u32))) +uint32x4_t __arm_vshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u32))) +uint32x4_t __arm_vshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u8))) +uint8x16_t __arm_vshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u8))) +uint8x16_t __arm_vshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s16))) +int16x8_t __arm_vshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s16))) +int16x8_t __arm_vshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s32))) +int32x4_t __arm_vshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s32))) +int32x4_t __arm_vshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s8))) +int8x16_t __arm_vshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s8))) +int8x16_t __arm_vshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u16))) +uint16x8_t __arm_vshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u16))) +uint16x8_t __arm_vshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u32))) +uint32x4_t __arm_vshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u32))) +uint32x4_t __arm_vshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u8))) +uint8x16_t __arm_vshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u8))) +uint8x16_t __arm_vshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s16))) +int16x8_t __arm_vshlq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s16))) +int16x8_t __arm_vshlq_n(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s32))) +int32x4_t __arm_vshlq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s32))) +int32x4_t __arm_vshlq_n(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s8))) +int8x16_t __arm_vshlq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s8))) +int8x16_t __arm_vshlq_n(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u16))) +uint16x8_t __arm_vshlq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u16))) +uint16x8_t __arm_vshlq_n(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u32))) +uint32x4_t __arm_vshlq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u32))) +uint32x4_t __arm_vshlq_n(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u8))) +uint8x16_t __arm_vshlq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u8))) +uint8x16_t __arm_vshlq_n(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s16))) +int16x8_t __arm_vshlq_r_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s16))) +int16x8_t __arm_vshlq_r(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s32))) +int32x4_t __arm_vshlq_r_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s32))) +int32x4_t __arm_vshlq_r(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s8))) +int8x16_t __arm_vshlq_r_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s8))) +int8x16_t __arm_vshlq_r(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u16))) +uint16x8_t __arm_vshlq_r_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u16))) +uint16x8_t __arm_vshlq_r(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u32))) +uint32x4_t __arm_vshlq_r_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u32))) +uint32x4_t __arm_vshlq_r(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u8))) +uint8x16_t __arm_vshlq_r_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u8))) +uint8x16_t __arm_vshlq_r(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_s16))) +int16x8_t __arm_vshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_s16))) +int16x8_t __arm_vshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_s32))) +int32x4_t __arm_vshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_s32))) +int32x4_t __arm_vshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_s8))) +int8x16_t __arm_vshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_s8))) +int8x16_t __arm_vshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_u16))) +uint16x8_t __arm_vshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_u16))) +uint16x8_t __arm_vshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_u32))) +uint32x4_t __arm_vshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_u32))) +uint32x4_t __arm_vshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_u8))) +uint8x16_t __arm_vshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_u8))) +uint8x16_t __arm_vshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s16))) +int16x8_t __arm_vshlq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s16))) +int16x8_t __arm_vshlq_x_n(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s32))) +int32x4_t __arm_vshlq_x_n_s32(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s32))) +int32x4_t __arm_vshlq_x_n(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s8))) +int8x16_t __arm_vshlq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s8))) +int8x16_t __arm_vshlq_x_n(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u16))) +uint16x8_t __arm_vshlq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u16))) +uint16x8_t __arm_vshlq_x_n(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u32))) +uint32x4_t __arm_vshlq_x_n_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u32))) +uint32x4_t __arm_vshlq_x_n(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u8))) +uint8x16_t __arm_vshlq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u8))) +uint8x16_t __arm_vshlq_x_n(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s16))) +int16x8_t __arm_vshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s16))) +int16x8_t __arm_vshlq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s32))) +int32x4_t __arm_vshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s32))) +int32x4_t __arm_vshlq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s8))) +int8x16_t __arm_vshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s8))) +int8x16_t __arm_vshlq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u16))) +uint16x8_t __arm_vshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u16))) +uint16x8_t __arm_vshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u32))) +uint32x4_t __arm_vshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u32))) +uint32x4_t __arm_vshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u8))) +uint8x16_t __arm_vshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u8))) +uint8x16_t __arm_vshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) +int8x16_t __arm_vshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) +int8x16_t __arm_vshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) +int16x8_t __arm_vshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) +int16x8_t __arm_vshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) +uint8x16_t __arm_vshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) +uint8x16_t __arm_vshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) +uint16x8_t __arm_vshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) +uint16x8_t __arm_vshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s16))) +int8x16_t __arm_vshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s16))) +int8x16_t __arm_vshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s32))) +int16x8_t __arm_vshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s32))) +int16x8_t __arm_vshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u16))) +uint8x16_t __arm_vshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u16))) +uint8x16_t __arm_vshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u32))) +uint16x8_t __arm_vshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u32))) +uint16x8_t __arm_vshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s16))) +int8x16_t __arm_vshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s16))) +int8x16_t __arm_vshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s32))) +int16x8_t __arm_vshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s32))) +int16x8_t __arm_vshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u16))) +uint8x16_t __arm_vshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u16))) +uint8x16_t __arm_vshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u32))) +uint16x8_t __arm_vshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u32))) +uint16x8_t __arm_vshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s16))) +int8x16_t __arm_vshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s16))) +int8x16_t __arm_vshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s32))) +int16x8_t __arm_vshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s32))) +int16x8_t __arm_vshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u16))) +uint8x16_t __arm_vshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u16))) +uint8x16_t __arm_vshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u32))) +uint16x8_t __arm_vshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u32))) +uint16x8_t __arm_vshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s16))) +int16x8_t __arm_vshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s16))) +int16x8_t __arm_vshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s32))) +int32x4_t __arm_vshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s32))) +int32x4_t __arm_vshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s8))) +int8x16_t __arm_vshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s8))) +int8x16_t __arm_vshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u16))) +uint16x8_t __arm_vshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u16))) +uint16x8_t __arm_vshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u32))) +uint32x4_t __arm_vshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u32))) +uint32x4_t __arm_vshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u8))) +uint8x16_t __arm_vshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u8))) +uint8x16_t __arm_vshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s16))) +int16x8_t __arm_vshrq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s16))) +int16x8_t __arm_vshrq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s32))) +int32x4_t __arm_vshrq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s32))) +int32x4_t __arm_vshrq(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s8))) +int8x16_t __arm_vshrq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s8))) +int8x16_t __arm_vshrq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u16))) +uint16x8_t __arm_vshrq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u16))) +uint16x8_t __arm_vshrq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u32))) +uint32x4_t __arm_vshrq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u32))) +uint32x4_t __arm_vshrq(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u8))) +uint8x16_t __arm_vshrq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u8))) +uint8x16_t __arm_vshrq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s16))) +int16x8_t __arm_vshrq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s16))) +int16x8_t __arm_vshrq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s32))) +int32x4_t __arm_vshrq_x_n_s32(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s32))) +int32x4_t __arm_vshrq_x(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s8))) +int8x16_t __arm_vshrq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s8))) +int8x16_t __arm_vshrq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u16))) +uint16x8_t __arm_vshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u16))) +uint16x8_t __arm_vshrq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u32))) +uint32x4_t __arm_vshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u32))) +uint32x4_t __arm_vshrq_x(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u8))) +uint8x16_t __arm_vshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u8))) +uint8x16_t __arm_vshrq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s16))) +int16x8_t __arm_vsliq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s16))) +int16x8_t __arm_vsliq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s32))) +int32x4_t __arm_vsliq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s32))) +int32x4_t __arm_vsliq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s8))) +int8x16_t __arm_vsliq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s8))) +int8x16_t __arm_vsliq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u16))) +uint16x8_t __arm_vsliq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u16))) +uint16x8_t __arm_vsliq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u32))) +uint32x4_t __arm_vsliq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u32))) +uint32x4_t __arm_vsliq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u8))) +uint8x16_t __arm_vsliq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u8))) +uint8x16_t __arm_vsliq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s16))) +int16x8_t __arm_vsliq_n_s16(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s16))) +int16x8_t __arm_vsliq(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s32))) +int32x4_t __arm_vsliq_n_s32(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s32))) +int32x4_t __arm_vsliq(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s8))) +int8x16_t __arm_vsliq_n_s8(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s8))) +int8x16_t __arm_vsliq(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u16))) +uint16x8_t __arm_vsliq_n_u16(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u16))) +uint16x8_t __arm_vsliq(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u32))) +uint32x4_t __arm_vsliq_n_u32(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u32))) +uint32x4_t __arm_vsliq(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u8))) +uint8x16_t __arm_vsliq_n_u8(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u8))) +uint8x16_t __arm_vsliq(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s16))) +int16x8_t __arm_vsriq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s16))) +int16x8_t __arm_vsriq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s32))) +int32x4_t __arm_vsriq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s32))) +int32x4_t __arm_vsriq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s8))) +int8x16_t __arm_vsriq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s8))) +int8x16_t __arm_vsriq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u16))) +uint16x8_t __arm_vsriq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u16))) +uint16x8_t __arm_vsriq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u32))) +uint32x4_t __arm_vsriq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u32))) +uint32x4_t __arm_vsriq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u8))) +uint8x16_t __arm_vsriq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u8))) +uint8x16_t __arm_vsriq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s16))) +int16x8_t __arm_vsriq_n_s16(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s16))) +int16x8_t __arm_vsriq(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s32))) +int32x4_t __arm_vsriq_n_s32(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s32))) +int32x4_t __arm_vsriq(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s8))) +int8x16_t __arm_vsriq_n_s8(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s8))) +int8x16_t __arm_vsriq(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u16))) +uint16x8_t __arm_vsriq_n_u16(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u16))) +uint16x8_t __arm_vsriq(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u32))) +uint32x4_t __arm_vsriq_n_u32(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u32))) +uint32x4_t __arm_vsriq(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u8))) +uint8x16_t __arm_vsriq_n_u8(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u8))) +uint8x16_t __arm_vsriq(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s16))) +void __arm_vst1q_p_s16(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s16))) +void __arm_vst1q_p(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s32))) +void __arm_vst1q_p_s32(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s32))) +void __arm_vst1q_p(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s8))) +void __arm_vst1q_p_s8(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s8))) +void __arm_vst1q_p(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u16))) +void __arm_vst1q_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u16))) +void __arm_vst1q_p(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u32))) +void __arm_vst1q_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u32))) +void __arm_vst1q_p(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u8))) +void __arm_vst1q_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u8))) +void __arm_vst1q_p(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_s16))) +void __arm_vst1q_s16(int16_t *, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_s16))) +void __arm_vst1q(int16_t *, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_s32))) +void __arm_vst1q_s32(int32_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_s32))) +void __arm_vst1q(int32_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_s8))) +void __arm_vst1q_s8(int8_t *, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_s8))) +void __arm_vst1q(int8_t *, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_u16))) +void __arm_vst1q_u16(uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_u16))) +void __arm_vst1q(uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_u32))) +void __arm_vst1q_u32(uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_u32))) +void __arm_vst1q(uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_u8))) +void __arm_vst1q_u8(uint8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_u8))) +void __arm_vst1q(uint8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_s16))) +void __arm_vst2q_s16(int16_t *, int16x8x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_s16))) +void __arm_vst2q(int16_t *, int16x8x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_s32))) +void __arm_vst2q_s32(int32_t *, int32x4x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_s32))) +void __arm_vst2q(int32_t *, int32x4x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_s8))) +void __arm_vst2q_s8(int8_t *, int8x16x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_s8))) +void __arm_vst2q(int8_t *, int8x16x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_u16))) +void __arm_vst2q_u16(uint16_t *, uint16x8x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_u16))) +void __arm_vst2q(uint16_t *, uint16x8x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_u32))) +void __arm_vst2q_u32(uint32_t *, uint32x4x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_u32))) +void __arm_vst2q(uint32_t *, uint32x4x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_u8))) +void __arm_vst2q_u8(uint8_t *, uint8x16x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_u8))) +void __arm_vst2q(uint8_t *, uint8x16x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_s16))) +void __arm_vst4q_s16(int16_t *, int16x8x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_s16))) +void __arm_vst4q(int16_t *, int16x8x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_s32))) +void __arm_vst4q_s32(int32_t *, int32x4x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_s32))) +void __arm_vst4q(int32_t *, int32x4x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_s8))) +void __arm_vst4q_s8(int8_t *, int8x16x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_s8))) +void __arm_vst4q(int8_t *, int8x16x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_u16))) +void __arm_vst4q_u16(uint16_t *, uint16x8x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_u16))) +void __arm_vst4q(uint16_t *, uint16x8x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_u32))) +void __arm_vst4q_u32(uint32_t *, uint32x4x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_u32))) +void __arm_vst4q(uint32_t *, uint32x4x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_u8))) +void __arm_vst4q_u8(uint8_t *, uint8x16x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_u8))) +void __arm_vst4q(uint8_t *, uint8x16x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s16))) +void __arm_vstrbq_p_s16(int8_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s16))) +void __arm_vstrbq_p(int8_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s32))) +void __arm_vstrbq_p_s32(int8_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s32))) +void __arm_vstrbq_p(int8_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s8))) +void __arm_vstrbq_p_s8(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s8))) +void __arm_vstrbq_p(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u16))) +void __arm_vstrbq_p_u16(uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u16))) +void __arm_vstrbq_p(uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u32))) +void __arm_vstrbq_p_u32(uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u32))) +void __arm_vstrbq_p(uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u8))) +void __arm_vstrbq_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u8))) +void __arm_vstrbq_p(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s16))) +void __arm_vstrbq_s16(int8_t *, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s16))) +void __arm_vstrbq(int8_t *, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s32))) +void __arm_vstrbq_s32(int8_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s32))) +void __arm_vstrbq(int8_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s8))) +void __arm_vstrbq_s8(int8_t *, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s8))) +void __arm_vstrbq(int8_t *, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) +void __arm_vstrbq_scatter_offset_p_s16(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) +void __arm_vstrbq_scatter_offset_p(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) +void __arm_vstrbq_scatter_offset_p_s32(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) +void __arm_vstrbq_scatter_offset_p(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) +void __arm_vstrbq_scatter_offset_p_s8(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) +void __arm_vstrbq_scatter_offset_p(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) +void __arm_vstrbq_scatter_offset_p_u16(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) +void __arm_vstrbq_scatter_offset_p(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) +void __arm_vstrbq_scatter_offset_p_u32(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) +void __arm_vstrbq_scatter_offset_p(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) +void __arm_vstrbq_scatter_offset_p_u8(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) +void __arm_vstrbq_scatter_offset_p(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) +void __arm_vstrbq_scatter_offset_s16(int8_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) +void __arm_vstrbq_scatter_offset(int8_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) +void __arm_vstrbq_scatter_offset_s32(int8_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) +void __arm_vstrbq_scatter_offset(int8_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) +void __arm_vstrbq_scatter_offset_s8(int8_t *, uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) +void __arm_vstrbq_scatter_offset(int8_t *, uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) +void __arm_vstrbq_scatter_offset_u16(uint8_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) +void __arm_vstrbq_scatter_offset(uint8_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) +void __arm_vstrbq_scatter_offset_u32(uint8_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) +void __arm_vstrbq_scatter_offset(uint8_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) +void __arm_vstrbq_scatter_offset_u8(uint8_t *, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) +void __arm_vstrbq_scatter_offset(uint8_t *, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u16))) +void __arm_vstrbq_u16(uint8_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u16))) +void __arm_vstrbq(uint8_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u32))) +void __arm_vstrbq_u32(uint8_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u32))) +void __arm_vstrbq(uint8_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u8))) +void __arm_vstrbq_u8(uint8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u8))) +void __arm_vstrbq(uint8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) +void __arm_vstrdq_scatter_base_p_s64(uint64x2_t, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) +void __arm_vstrdq_scatter_base_p(uint64x2_t, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) +void __arm_vstrdq_scatter_base_p_u64(uint64x2_t, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) +void __arm_vstrdq_scatter_base_p(uint64x2_t, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) +void __arm_vstrdq_scatter_base_s64(uint64x2_t, int, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) +void __arm_vstrdq_scatter_base(uint64x2_t, int, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) +void __arm_vstrdq_scatter_base_u64(uint64x2_t, int, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) +void __arm_vstrdq_scatter_base(uint64x2_t, int, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) +void __arm_vstrdq_scatter_base_wb_p_s64(uint64x2_t *, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) +void __arm_vstrdq_scatter_base_wb_p(uint64x2_t *, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) +void __arm_vstrdq_scatter_base_wb_p_u64(uint64x2_t *, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) +void __arm_vstrdq_scatter_base_wb_p(uint64x2_t *, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) +void __arm_vstrdq_scatter_base_wb_s64(uint64x2_t *, int, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) +void __arm_vstrdq_scatter_base_wb(uint64x2_t *, int, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) +void __arm_vstrdq_scatter_base_wb_u64(uint64x2_t *, int, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) +void __arm_vstrdq_scatter_base_wb(uint64x2_t *, int, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) +void __arm_vstrdq_scatter_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) +void __arm_vstrdq_scatter_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) +void __arm_vstrdq_scatter_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) +void __arm_vstrdq_scatter_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) +void __arm_vstrdq_scatter_offset_s64(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) +void __arm_vstrdq_scatter_offset(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) +void __arm_vstrdq_scatter_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) +void __arm_vstrdq_scatter_offset(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) +void __arm_vstrdq_scatter_shifted_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) +void __arm_vstrdq_scatter_shifted_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) +void __arm_vstrdq_scatter_shifted_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) +void __arm_vstrdq_scatter_shifted_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) +void __arm_vstrdq_scatter_shifted_offset_s64(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) +void __arm_vstrdq_scatter_shifted_offset(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) +void __arm_vstrdq_scatter_shifted_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) +void __arm_vstrdq_scatter_shifted_offset(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s16))) +void __arm_vstrhq_p_s16(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s16))) +void __arm_vstrhq_p(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s32))) +void __arm_vstrhq_p_s32(int16_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s32))) +void __arm_vstrhq_p(int16_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u16))) +void __arm_vstrhq_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u16))) +void __arm_vstrhq_p(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u32))) +void __arm_vstrhq_p_u32(uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u32))) +void __arm_vstrhq_p(uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s16))) +void __arm_vstrhq_s16(int16_t *, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s16))) +void __arm_vstrhq(int16_t *, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s32))) +void __arm_vstrhq_s32(int16_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s32))) +void __arm_vstrhq(int16_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) +void __arm_vstrhq_scatter_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) +void __arm_vstrhq_scatter_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) +void __arm_vstrhq_scatter_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) +void __arm_vstrhq_scatter_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) +void __arm_vstrhq_scatter_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) +void __arm_vstrhq_scatter_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) +void __arm_vstrhq_scatter_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) +void __arm_vstrhq_scatter_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) +void __arm_vstrhq_scatter_offset_s16(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) +void __arm_vstrhq_scatter_offset(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) +void __arm_vstrhq_scatter_offset_s32(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) +void __arm_vstrhq_scatter_offset(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) +void __arm_vstrhq_scatter_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) +void __arm_vstrhq_scatter_offset(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) +void __arm_vstrhq_scatter_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) +void __arm_vstrhq_scatter_offset(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) +void __arm_vstrhq_scatter_shifted_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) +void __arm_vstrhq_scatter_shifted_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) +void __arm_vstrhq_scatter_shifted_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) +void __arm_vstrhq_scatter_shifted_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) +void __arm_vstrhq_scatter_shifted_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) +void __arm_vstrhq_scatter_shifted_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) +void __arm_vstrhq_scatter_shifted_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) +void __arm_vstrhq_scatter_shifted_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) +void __arm_vstrhq_scatter_shifted_offset_s16(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) +void __arm_vstrhq_scatter_shifted_offset(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) +void __arm_vstrhq_scatter_shifted_offset_s32(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) +void __arm_vstrhq_scatter_shifted_offset(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) +void __arm_vstrhq_scatter_shifted_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) +void __arm_vstrhq_scatter_shifted_offset(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) +void __arm_vstrhq_scatter_shifted_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) +void __arm_vstrhq_scatter_shifted_offset(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u16))) +void __arm_vstrhq_u16(uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u16))) +void __arm_vstrhq(uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u32))) +void __arm_vstrhq_u32(uint16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u32))) +void __arm_vstrhq(uint16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_s32))) +void __arm_vstrwq_p_s32(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_s32))) +void __arm_vstrwq_p(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_u32))) +void __arm_vstrwq_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_u32))) +void __arm_vstrwq_p(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_s32))) +void __arm_vstrwq_s32(int32_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_s32))) +void __arm_vstrwq(int32_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) +void __arm_vstrwq_scatter_base_p_s32(uint32x4_t, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) +void __arm_vstrwq_scatter_base_p(uint32x4_t, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) +void __arm_vstrwq_scatter_base_p_u32(uint32x4_t, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) +void __arm_vstrwq_scatter_base_p(uint32x4_t, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) +void __arm_vstrwq_scatter_base_s32(uint32x4_t, int, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) +void __arm_vstrwq_scatter_base(uint32x4_t, int, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) +void __arm_vstrwq_scatter_base_u32(uint32x4_t, int, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) +void __arm_vstrwq_scatter_base(uint32x4_t, int, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) +void __arm_vstrwq_scatter_base_wb_p_s32(uint32x4_t *, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) +void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) +void __arm_vstrwq_scatter_base_wb_p_u32(uint32x4_t *, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) +void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) +void __arm_vstrwq_scatter_base_wb_s32(uint32x4_t *, int, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) +void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) +void __arm_vstrwq_scatter_base_wb_u32(uint32x4_t *, int, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) +void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) +void __arm_vstrwq_scatter_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) +void __arm_vstrwq_scatter_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) +void __arm_vstrwq_scatter_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) +void __arm_vstrwq_scatter_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) +void __arm_vstrwq_scatter_offset_s32(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) +void __arm_vstrwq_scatter_offset(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) +void __arm_vstrwq_scatter_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) +void __arm_vstrwq_scatter_offset(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) +void __arm_vstrwq_scatter_shifted_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) +void __arm_vstrwq_scatter_shifted_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) +void __arm_vstrwq_scatter_shifted_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) +void __arm_vstrwq_scatter_shifted_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) +void __arm_vstrwq_scatter_shifted_offset_s32(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) +void __arm_vstrwq_scatter_shifted_offset(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) +void __arm_vstrwq_scatter_shifted_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) +void __arm_vstrwq_scatter_shifted_offset(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_u32))) +void __arm_vstrwq_u32(uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_u32))) +void __arm_vstrwq(uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s16))) +int16x8_t __arm_vsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s16))) +int16x8_t __arm_vsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s32))) +int32x4_t __arm_vsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s32))) +int32x4_t __arm_vsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s8))) +int8x16_t __arm_vsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s8))) +int8x16_t __arm_vsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u16))) +uint16x8_t __arm_vsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u16))) +uint16x8_t __arm_vsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u32))) +uint32x4_t __arm_vsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u32))) +uint32x4_t __arm_vsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u8))) +uint8x16_t __arm_vsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u8))) +uint8x16_t __arm_vsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_s16))) +int16x8_t __arm_vsubq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_s16))) +int16x8_t __arm_vsubq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_s32))) +int32x4_t __arm_vsubq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_s32))) +int32x4_t __arm_vsubq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_s8))) +int8x16_t __arm_vsubq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_s8))) +int8x16_t __arm_vsubq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_u16))) +uint16x8_t __arm_vsubq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_u16))) +uint16x8_t __arm_vsubq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_u32))) +uint32x4_t __arm_vsubq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_u32))) +uint32x4_t __arm_vsubq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_u8))) +uint8x16_t __arm_vsubq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_u8))) +uint8x16_t __arm_vsubq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s16))) +int16x8_t __arm_vsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s16))) +int16x8_t __arm_vsubq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s32))) +int32x4_t __arm_vsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s32))) +int32x4_t __arm_vsubq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s8))) +int8x16_t __arm_vsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s8))) +int8x16_t __arm_vsubq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u16))) +uint16x8_t __arm_vsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u16))) +uint16x8_t __arm_vsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u32))) +uint32x4_t __arm_vsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u32))) +uint32x4_t __arm_vsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u8))) +uint8x16_t __arm_vsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u8))) +uint8x16_t __arm_vsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s16))) +int16x8_t __arm_vuninitializedq(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s32))) +int32x4_t __arm_vuninitializedq(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s64))) +int64x2_t __arm_vuninitializedq(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s8))) +int8x16_t __arm_vuninitializedq(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u16))) +uint16x8_t __arm_vuninitializedq(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u32))) +uint32x4_t __arm_vuninitializedq(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u64))) +uint64x2_t __arm_vuninitializedq(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u8))) +uint8x16_t __arm_vuninitializedq(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s16))) +int16x8_t __arm_vuninitializedq_s16(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s32))) +int32x4_t __arm_vuninitializedq_s32(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s64))) +int64x2_t __arm_vuninitializedq_s64(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s8))) +int8x16_t __arm_vuninitializedq_s8(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u16))) +uint16x8_t __arm_vuninitializedq_u16(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u32))) +uint32x4_t __arm_vuninitializedq_u32(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u64))) +uint64x2_t __arm_vuninitializedq_u64(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u8))) +uint8x16_t __arm_vuninitializedq_u8(); + +#if (__ARM_FEATURE_MVE & 2) + +typedef __fp16 float16_t; +typedef float float32_t; +typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; +typedef struct { float16x8_t val[2]; } float16x8x2_t; +typedef struct { float16x8_t val[4]; } float16x8x4_t; +typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; +typedef struct { float32x4_t val[2]; } float32x4x2_t; +typedef struct { float32x4_t val[4]; } float32x4x4_t; + +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_f16))) +float16x8_t __arm_vabdq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_f16))) +float16x8_t __arm_vabdq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_f32))) +float32x4_t __arm_vabdq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_f32))) +float32x4_t __arm_vabdq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f16))) +float16x8_t __arm_vabdq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f16))) +float16x8_t __arm_vabdq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f32))) +float32x4_t __arm_vabdq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f32))) +float32x4_t __arm_vabdq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f16))) +float16x8_t __arm_vabdq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f16))) +float16x8_t __arm_vabdq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f32))) +float32x4_t __arm_vabdq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f32))) +float32x4_t __arm_vabdq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_f16))) +float16x8_t __arm_vaddq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_f16))) +float16x8_t __arm_vaddq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_f32))) +float32x4_t __arm_vaddq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_f32))) +float32x4_t __arm_vaddq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f16))) +float16x8_t __arm_vaddq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f16))) +float16x8_t __arm_vaddq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f32))) +float32x4_t __arm_vaddq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f32))) +float32x4_t __arm_vaddq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f16))) +float16x8_t __arm_vaddq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f16))) +float16x8_t __arm_vaddq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f32))) +float32x4_t __arm_vaddq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f32))) +float32x4_t __arm_vaddq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_f16))) +float16x8_t __arm_vandq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_f16))) +float16x8_t __arm_vandq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_f32))) +float32x4_t __arm_vandq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_f32))) +float32x4_t __arm_vandq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f16))) +float16x8_t __arm_vandq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f16))) +float16x8_t __arm_vandq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f32))) +float32x4_t __arm_vandq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f32))) +float32x4_t __arm_vandq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f16))) +float16x8_t __arm_vandq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f16))) +float16x8_t __arm_vandq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f32))) +float32x4_t __arm_vandq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f32))) +float32x4_t __arm_vandq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_f16))) +float16x8_t __arm_vbicq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_f16))) +float16x8_t __arm_vbicq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_f32))) +float32x4_t __arm_vbicq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_f32))) +float32x4_t __arm_vbicq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f16))) +float16x8_t __arm_vbicq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f16))) +float16x8_t __arm_vbicq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f32))) +float32x4_t __arm_vbicq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f32))) +float32x4_t __arm_vbicq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f16))) +float16x8_t __arm_vbicq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f16))) +float16x8_t __arm_vbicq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f32))) +float32x4_t __arm_vbicq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f32))) +float32x4_t __arm_vbicq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f16))) +float16x8_t __arm_vcaddq_rot270_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f16))) +float16x8_t __arm_vcaddq_rot270(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f32))) +float32x4_t __arm_vcaddq_rot270_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f32))) +float32x4_t __arm_vcaddq_rot270(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) +float16x8_t __arm_vcaddq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) +float16x8_t __arm_vcaddq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) +float32x4_t __arm_vcaddq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) +float32x4_t __arm_vcaddq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) +float16x8_t __arm_vcaddq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) +float16x8_t __arm_vcaddq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) +float32x4_t __arm_vcaddq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) +float32x4_t __arm_vcaddq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f16))) +float16x8_t __arm_vcaddq_rot90_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f16))) +float16x8_t __arm_vcaddq_rot90(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f32))) +float32x4_t __arm_vcaddq_rot90_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f32))) +float32x4_t __arm_vcaddq_rot90(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) +float16x8_t __arm_vcaddq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) +float16x8_t __arm_vcaddq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) +float32x4_t __arm_vcaddq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) +float32x4_t __arm_vcaddq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) +float16x8_t __arm_vcaddq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) +float16x8_t __arm_vcaddq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) +float32x4_t __arm_vcaddq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) +float32x4_t __arm_vcaddq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f16))) +float16x8_t __arm_vcmlaq_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f16))) +float16x8_t __arm_vcmlaq(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f32))) +float32x4_t __arm_vcmlaq_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f32))) +float32x4_t __arm_vcmlaq(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f16))) +float16x8_t __arm_vcmlaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f16))) +float16x8_t __arm_vcmlaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f32))) +float32x4_t __arm_vcmlaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f32))) +float32x4_t __arm_vcmlaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) +float16x8_t __arm_vcmlaq_rot180_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) +float16x8_t __arm_vcmlaq_rot180(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) +float32x4_t __arm_vcmlaq_rot180_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) +float32x4_t __arm_vcmlaq_rot180(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) +float16x8_t __arm_vcmlaq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) +float16x8_t __arm_vcmlaq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) +float32x4_t __arm_vcmlaq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) +float32x4_t __arm_vcmlaq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) +float16x8_t __arm_vcmlaq_rot270_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) +float16x8_t __arm_vcmlaq_rot270(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) +float32x4_t __arm_vcmlaq_rot270_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) +float32x4_t __arm_vcmlaq_rot270(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) +float16x8_t __arm_vcmlaq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) +float16x8_t __arm_vcmlaq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) +float32x4_t __arm_vcmlaq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) +float32x4_t __arm_vcmlaq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) +float16x8_t __arm_vcmlaq_rot90_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) +float16x8_t __arm_vcmlaq_rot90(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) +float32x4_t __arm_vcmlaq_rot90_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) +float32x4_t __arm_vcmlaq_rot90(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) +float16x8_t __arm_vcmlaq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) +float16x8_t __arm_vcmlaq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) +float32x4_t __arm_vcmlaq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) +float32x4_t __arm_vcmlaq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f16))) +mve_pred16_t __arm_vcmpeqq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f16))) +mve_pred16_t __arm_vcmpeqq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f32))) +mve_pred16_t __arm_vcmpeqq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f32))) +mve_pred16_t __arm_vcmpeqq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f16))) +mve_pred16_t __arm_vcmpeqq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f16))) +mve_pred16_t __arm_vcmpeqq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f32))) +mve_pred16_t __arm_vcmpeqq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f32))) +mve_pred16_t __arm_vcmpeqq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) +mve_pred16_t __arm_vcmpeqq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) +mve_pred16_t __arm_vcmpeqq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) +mve_pred16_t __arm_vcmpeqq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) +mve_pred16_t __arm_vcmpeqq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f16))) +mve_pred16_t __arm_vcmpeqq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f16))) +mve_pred16_t __arm_vcmpeqq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f32))) +mve_pred16_t __arm_vcmpeqq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f32))) +mve_pred16_t __arm_vcmpeqq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f16))) +mve_pred16_t __arm_vcmpgeq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f16))) +mve_pred16_t __arm_vcmpgeq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f32))) +mve_pred16_t __arm_vcmpgeq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f32))) +mve_pred16_t __arm_vcmpgeq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f16))) +mve_pred16_t __arm_vcmpgeq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f16))) +mve_pred16_t __arm_vcmpgeq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f32))) +mve_pred16_t __arm_vcmpgeq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f32))) +mve_pred16_t __arm_vcmpgeq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) +mve_pred16_t __arm_vcmpgeq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) +mve_pred16_t __arm_vcmpgeq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) +mve_pred16_t __arm_vcmpgeq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) +mve_pred16_t __arm_vcmpgeq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f16))) +mve_pred16_t __arm_vcmpgeq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f16))) +mve_pred16_t __arm_vcmpgeq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f32))) +mve_pred16_t __arm_vcmpgeq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f32))) +mve_pred16_t __arm_vcmpgeq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f16))) +mve_pred16_t __arm_vcmpgtq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f16))) +mve_pred16_t __arm_vcmpgtq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f32))) +mve_pred16_t __arm_vcmpgtq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f32))) +mve_pred16_t __arm_vcmpgtq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f16))) +mve_pred16_t __arm_vcmpgtq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f16))) +mve_pred16_t __arm_vcmpgtq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f32))) +mve_pred16_t __arm_vcmpgtq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f32))) +mve_pred16_t __arm_vcmpgtq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) +mve_pred16_t __arm_vcmpgtq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) +mve_pred16_t __arm_vcmpgtq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) +mve_pred16_t __arm_vcmpgtq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) +mve_pred16_t __arm_vcmpgtq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f16))) +mve_pred16_t __arm_vcmpgtq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f16))) +mve_pred16_t __arm_vcmpgtq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f32))) +mve_pred16_t __arm_vcmpgtq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f32))) +mve_pred16_t __arm_vcmpgtq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f16))) +mve_pred16_t __arm_vcmpleq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f16))) +mve_pred16_t __arm_vcmpleq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f32))) +mve_pred16_t __arm_vcmpleq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f32))) +mve_pred16_t __arm_vcmpleq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f16))) +mve_pred16_t __arm_vcmpleq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f16))) +mve_pred16_t __arm_vcmpleq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f32))) +mve_pred16_t __arm_vcmpleq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f32))) +mve_pred16_t __arm_vcmpleq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) +mve_pred16_t __arm_vcmpleq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) +mve_pred16_t __arm_vcmpleq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) +mve_pred16_t __arm_vcmpleq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) +mve_pred16_t __arm_vcmpleq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f16))) +mve_pred16_t __arm_vcmpleq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f16))) +mve_pred16_t __arm_vcmpleq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f32))) +mve_pred16_t __arm_vcmpleq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f32))) +mve_pred16_t __arm_vcmpleq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f16))) +mve_pred16_t __arm_vcmpltq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f16))) +mve_pred16_t __arm_vcmpltq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f32))) +mve_pred16_t __arm_vcmpltq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f32))) +mve_pred16_t __arm_vcmpltq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f16))) +mve_pred16_t __arm_vcmpltq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f16))) +mve_pred16_t __arm_vcmpltq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f32))) +mve_pred16_t __arm_vcmpltq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f32))) +mve_pred16_t __arm_vcmpltq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) +mve_pred16_t __arm_vcmpltq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) +mve_pred16_t __arm_vcmpltq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) +mve_pred16_t __arm_vcmpltq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) +mve_pred16_t __arm_vcmpltq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f16))) +mve_pred16_t __arm_vcmpltq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f16))) +mve_pred16_t __arm_vcmpltq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f32))) +mve_pred16_t __arm_vcmpltq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f32))) +mve_pred16_t __arm_vcmpltq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f16))) +mve_pred16_t __arm_vcmpneq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f16))) +mve_pred16_t __arm_vcmpneq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f32))) +mve_pred16_t __arm_vcmpneq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f32))) +mve_pred16_t __arm_vcmpneq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f16))) +mve_pred16_t __arm_vcmpneq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f16))) +mve_pred16_t __arm_vcmpneq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f32))) +mve_pred16_t __arm_vcmpneq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f32))) +mve_pred16_t __arm_vcmpneq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) +mve_pred16_t __arm_vcmpneq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) +mve_pred16_t __arm_vcmpneq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) +mve_pred16_t __arm_vcmpneq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) +mve_pred16_t __arm_vcmpneq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f16))) +mve_pred16_t __arm_vcmpneq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f16))) +mve_pred16_t __arm_vcmpneq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f32))) +mve_pred16_t __arm_vcmpneq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f32))) +mve_pred16_t __arm_vcmpneq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f16))) +float16x8_t __arm_vcmulq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f16))) +float16x8_t __arm_vcmulq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f32))) +float32x4_t __arm_vcmulq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f32))) +float32x4_t __arm_vcmulq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f16))) +float16x8_t __arm_vcmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f16))) +float16x8_t __arm_vcmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f32))) +float32x4_t __arm_vcmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f32))) +float32x4_t __arm_vcmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f16))) +float16x8_t __arm_vcmulq_rot180_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f16))) +float16x8_t __arm_vcmulq_rot180(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f32))) +float32x4_t __arm_vcmulq_rot180_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f32))) +float32x4_t __arm_vcmulq_rot180(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) +float16x8_t __arm_vcmulq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) +float16x8_t __arm_vcmulq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) +float32x4_t __arm_vcmulq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) +float32x4_t __arm_vcmulq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) +float16x8_t __arm_vcmulq_rot180_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) +float16x8_t __arm_vcmulq_rot180_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) +float32x4_t __arm_vcmulq_rot180_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) +float32x4_t __arm_vcmulq_rot180_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f16))) +float16x8_t __arm_vcmulq_rot270_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f16))) +float16x8_t __arm_vcmulq_rot270(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f32))) +float32x4_t __arm_vcmulq_rot270_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f32))) +float32x4_t __arm_vcmulq_rot270(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) +float16x8_t __arm_vcmulq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) +float16x8_t __arm_vcmulq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) +float32x4_t __arm_vcmulq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) +float32x4_t __arm_vcmulq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) +float16x8_t __arm_vcmulq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) +float16x8_t __arm_vcmulq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) +float32x4_t __arm_vcmulq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) +float32x4_t __arm_vcmulq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f16))) +float16x8_t __arm_vcmulq_rot90_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f16))) +float16x8_t __arm_vcmulq_rot90(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f32))) +float32x4_t __arm_vcmulq_rot90_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f32))) +float32x4_t __arm_vcmulq_rot90(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) +float16x8_t __arm_vcmulq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) +float16x8_t __arm_vcmulq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) +float32x4_t __arm_vcmulq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) +float32x4_t __arm_vcmulq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) +float16x8_t __arm_vcmulq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) +float16x8_t __arm_vcmulq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) +float32x4_t __arm_vcmulq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) +float32x4_t __arm_vcmulq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f16))) +float16x8_t __arm_vcmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f16))) +float16x8_t __arm_vcmulq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f32))) +float32x4_t __arm_vcmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f32))) +float32x4_t __arm_vcmulq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_f16))) +float16x8_t __arm_vcreateq_f16(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_f32))) +float32x4_t __arm_vcreateq_f32(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvtbq_f16_f32))) +float16x8_t __arm_vcvtbq_f16_f32(float16x8_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvtbq_m_f16_f32))) +float16x8_t __arm_vcvtbq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvttq_f16_f32))) +float16x8_t __arm_vcvttq_f16_f32(float16x8_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvttq_m_f16_f32))) +float16x8_t __arm_vcvttq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_f16))) +float16x8_t __arm_veorq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_f16))) +float16x8_t __arm_veorq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_f32))) +float32x4_t __arm_veorq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_f32))) +float32x4_t __arm_veorq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f16))) +float16x8_t __arm_veorq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f16))) +float16x8_t __arm_veorq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f32))) +float32x4_t __arm_veorq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f32))) +float32x4_t __arm_veorq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f16))) +float16x8_t __arm_veorq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f16))) +float16x8_t __arm_veorq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f32))) +float32x4_t __arm_veorq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f32))) +float32x4_t __arm_veorq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f16))) +float16_t __arm_vgetq_lane_f16(float16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f16))) +float16_t __arm_vgetq_lane(float16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f32))) +float32_t __arm_vgetq_lane_f32(float32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f32))) +float32_t __arm_vgetq_lane(float32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_f16))) +float16x8_t __arm_vld1q_f16(const float16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_f16))) +float16x8_t __arm_vld1q(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_f32))) +float32x4_t __arm_vld1q_f32(const float32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_f32))) +float32x4_t __arm_vld1q(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f16))) +float16x8_t __arm_vld1q_z_f16(const float16_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f16))) +float16x8_t __arm_vld1q_z(const float16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f32))) +float32x4_t __arm_vld1q_z_f32(const float32_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f32))) +float32x4_t __arm_vld1q_z(const float32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_f16))) +float16x8x2_t __arm_vld2q_f16(const float16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_f16))) +float16x8x2_t __arm_vld2q(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_f32))) +float32x4x2_t __arm_vld2q_f32(const float32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_f32))) +float32x4x2_t __arm_vld2q(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_f16))) +float16x8x4_t __arm_vld4q_f16(const float16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_f16))) +float16x8x4_t __arm_vld4q(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_f32))) +float32x4x4_t __arm_vld4q_f32(const float32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_f32))) +float32x4x4_t __arm_vld4q(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_f16))) +float16x8_t __arm_vldrhq_f16(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) +float16x8_t __arm_vldrhq_gather_offset_f16(const float16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) +float16x8_t __arm_vldrhq_gather_offset(const float16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) +float16x8_t __arm_vldrhq_gather_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) +float16x8_t __arm_vldrhq_gather_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) +float16x8_t __arm_vldrhq_gather_shifted_offset_f16(const float16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) +float16x8_t __arm_vldrhq_gather_shifted_offset(const float16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) +float16x8_t __arm_vldrhq_gather_shifted_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) +float16x8_t __arm_vldrhq_gather_shifted_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_f16))) +float16x8_t __arm_vldrhq_z_f16(const float16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_f32))) +float32x4_t __arm_vldrwq_f32(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_f32))) +float32x4_t __arm_vldrwq_gather_base_f32(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_f32))) +float32x4_t __arm_vldrwq_gather_base_wb_f32(uint32x4_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_f32))) +float32x4_t __arm_vldrwq_gather_base_wb_z_f32(uint32x4_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_z_f32))) +float32x4_t __arm_vldrwq_gather_base_z_f32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) +float32x4_t __arm_vldrwq_gather_offset_f32(const float32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) +float32x4_t __arm_vldrwq_gather_offset(const float32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) +float32x4_t __arm_vldrwq_gather_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) +float32x4_t __arm_vldrwq_gather_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) +float32x4_t __arm_vldrwq_gather_shifted_offset_f32(const float32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) +float32x4_t __arm_vldrwq_gather_shifted_offset(const float32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) +float32x4_t __arm_vldrwq_gather_shifted_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) +float32x4_t __arm_vldrwq_gather_shifted_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_z_f32))) +float32x4_t __arm_vldrwq_z_f32(const float32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f16))) +float16x8_t __arm_vmaxnmq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f16))) +float16x8_t __arm_vmaxnmq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f32))) +float32x4_t __arm_vmaxnmq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f32))) +float32x4_t __arm_vmaxnmq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f16))) +float16x8_t __arm_vmaxnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f16))) +float16x8_t __arm_vmaxnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f32))) +float32x4_t __arm_vmaxnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f32))) +float32x4_t __arm_vmaxnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f16))) +float16x8_t __arm_vmaxnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f16))) +float16x8_t __arm_vmaxnmq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f32))) +float32x4_t __arm_vmaxnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f32))) +float32x4_t __arm_vmaxnmq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f16))) +float16x8_t __arm_vminnmq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f16))) +float16x8_t __arm_vminnmq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f32))) +float32x4_t __arm_vminnmq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f32))) +float32x4_t __arm_vminnmq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f16))) +float16x8_t __arm_vminnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f16))) +float16x8_t __arm_vminnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f32))) +float32x4_t __arm_vminnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f32))) +float32x4_t __arm_vminnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f16))) +float16x8_t __arm_vminnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f16))) +float16x8_t __arm_vminnmq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f32))) +float32x4_t __arm_vminnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f32))) +float32x4_t __arm_vminnmq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_f16))) +float16x8_t __arm_vmulq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_f16))) +float16x8_t __arm_vmulq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_f32))) +float32x4_t __arm_vmulq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_f32))) +float32x4_t __arm_vmulq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f16))) +float16x8_t __arm_vmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f16))) +float16x8_t __arm_vmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f32))) +float32x4_t __arm_vmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f32))) +float32x4_t __arm_vmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f16))) +float16x8_t __arm_vmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f16))) +float16x8_t __arm_vmulq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f32))) +float32x4_t __arm_vmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f32))) +float32x4_t __arm_vmulq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_f16))) +float16x8_t __arm_vornq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_f16))) +float16x8_t __arm_vornq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_f32))) +float32x4_t __arm_vornq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_f32))) +float32x4_t __arm_vornq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f16))) +float16x8_t __arm_vornq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f16))) +float16x8_t __arm_vornq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f32))) +float32x4_t __arm_vornq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f32))) +float32x4_t __arm_vornq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f16))) +float16x8_t __arm_vornq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f16))) +float16x8_t __arm_vornq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f32))) +float32x4_t __arm_vornq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f32))) +float32x4_t __arm_vornq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_f16))) +float16x8_t __arm_vorrq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_f16))) +float16x8_t __arm_vorrq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_f32))) +float32x4_t __arm_vorrq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_f32))) +float32x4_t __arm_vorrq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f16))) +float16x8_t __arm_vorrq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f16))) +float16x8_t __arm_vorrq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f32))) +float32x4_t __arm_vorrq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f32))) +float32x4_t __arm_vorrq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f16))) +float16x8_t __arm_vorrq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f16))) +float16x8_t __arm_vorrq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f32))) +float32x4_t __arm_vorrq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f32))) +float32x4_t __arm_vorrq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_f16))) +float16x8_t __arm_vpselq_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_f16))) +float16x8_t __arm_vpselq(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_f32))) +float32x4_t __arm_vpselq_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_f32))) +float32x4_t __arm_vpselq(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) +float16x8_t __arm_vreinterpretq_f16_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) +float16x8_t __arm_vreinterpretq_f16(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) +float16x8_t __arm_vreinterpretq_f16_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) +float16x8_t __arm_vreinterpretq_f16(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) +float16x8_t __arm_vreinterpretq_f16_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) +float16x8_t __arm_vreinterpretq_f16(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) +float16x8_t __arm_vreinterpretq_f16_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) +float16x8_t __arm_vreinterpretq_f16(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) +float16x8_t __arm_vreinterpretq_f16_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) +float16x8_t __arm_vreinterpretq_f16(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) +float16x8_t __arm_vreinterpretq_f16_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) +float16x8_t __arm_vreinterpretq_f16(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) +float16x8_t __arm_vreinterpretq_f16_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) +float16x8_t __arm_vreinterpretq_f16(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) +float16x8_t __arm_vreinterpretq_f16_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) +float16x8_t __arm_vreinterpretq_f16(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) +float16x8_t __arm_vreinterpretq_f16_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) +float16x8_t __arm_vreinterpretq_f16(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) +float32x4_t __arm_vreinterpretq_f32_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) +float32x4_t __arm_vreinterpretq_f32(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) +float32x4_t __arm_vreinterpretq_f32_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) +float32x4_t __arm_vreinterpretq_f32(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) +float32x4_t __arm_vreinterpretq_f32_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) +float32x4_t __arm_vreinterpretq_f32(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) +float32x4_t __arm_vreinterpretq_f32_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) +float32x4_t __arm_vreinterpretq_f32(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) +float32x4_t __arm_vreinterpretq_f32_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) +float32x4_t __arm_vreinterpretq_f32(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) +float32x4_t __arm_vreinterpretq_f32_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) +float32x4_t __arm_vreinterpretq_f32(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) +float32x4_t __arm_vreinterpretq_f32_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) +float32x4_t __arm_vreinterpretq_f32(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) +float32x4_t __arm_vreinterpretq_f32_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) +float32x4_t __arm_vreinterpretq_f32(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) +float32x4_t __arm_vreinterpretq_f32_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) +float32x4_t __arm_vreinterpretq_f32(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) +int16x8_t __arm_vreinterpretq_s16_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) +int16x8_t __arm_vreinterpretq_s16(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) +int16x8_t __arm_vreinterpretq_s16_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) +int16x8_t __arm_vreinterpretq_s16(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) +int32x4_t __arm_vreinterpretq_s32_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) +int32x4_t __arm_vreinterpretq_s32(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) +int32x4_t __arm_vreinterpretq_s32_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) +int32x4_t __arm_vreinterpretq_s32(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) +int64x2_t __arm_vreinterpretq_s64_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) +int64x2_t __arm_vreinterpretq_s64(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) +int64x2_t __arm_vreinterpretq_s64_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) +int64x2_t __arm_vreinterpretq_s64(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) +int8x16_t __arm_vreinterpretq_s8_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) +int8x16_t __arm_vreinterpretq_s8(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) +int8x16_t __arm_vreinterpretq_s8_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) +int8x16_t __arm_vreinterpretq_s8(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) +uint16x8_t __arm_vreinterpretq_u16_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) +uint16x8_t __arm_vreinterpretq_u16(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) +uint16x8_t __arm_vreinterpretq_u16_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) +uint16x8_t __arm_vreinterpretq_u16(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) +uint32x4_t __arm_vreinterpretq_u32_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) +uint32x4_t __arm_vreinterpretq_u32(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) +uint32x4_t __arm_vreinterpretq_u32_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) +uint32x4_t __arm_vreinterpretq_u32(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) +uint64x2_t __arm_vreinterpretq_u64_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) +uint64x2_t __arm_vreinterpretq_u64(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) +uint64x2_t __arm_vreinterpretq_u64_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) +uint64x2_t __arm_vreinterpretq_u64(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) +uint8x16_t __arm_vreinterpretq_u8_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) +uint8x16_t __arm_vreinterpretq_u8(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) +uint8x16_t __arm_vreinterpretq_u8_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) +uint8x16_t __arm_vreinterpretq_u8(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f16))) +float16x8_t __arm_vsetq_lane_f16(float16_t, float16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f16))) +float16x8_t __arm_vsetq_lane(float16_t, float16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f32))) +float32x4_t __arm_vsetq_lane_f32(float32_t, float32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f32))) +float32x4_t __arm_vsetq_lane(float32_t, float32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_f16))) +void __arm_vst1q_f16(float16_t *, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_f16))) +void __arm_vst1q(float16_t *, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_f32))) +void __arm_vst1q_f32(float32_t *, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_f32))) +void __arm_vst1q(float32_t *, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f16))) +void __arm_vst1q_p_f16(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f16))) +void __arm_vst1q_p(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f32))) +void __arm_vst1q_p_f32(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f32))) +void __arm_vst1q_p(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_f16))) +void __arm_vst2q_f16(float16_t *, float16x8x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_f16))) +void __arm_vst2q(float16_t *, float16x8x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_f32))) +void __arm_vst2q_f32(float32_t *, float32x4x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_f32))) +void __arm_vst2q(float32_t *, float32x4x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_f16))) +void __arm_vst4q_f16(float16_t *, float16x8x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_f16))) +void __arm_vst4q(float16_t *, float16x8x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_f32))) +void __arm_vst4q_f32(float32_t *, float32x4x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_f32))) +void __arm_vst4q(float32_t *, float32x4x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_f16))) +void __arm_vstrhq_f16(float16_t *, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_f16))) +void __arm_vstrhq(float16_t *, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_f16))) +void __arm_vstrhq_p_f16(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_f16))) +void __arm_vstrhq_p(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) +void __arm_vstrhq_scatter_offset_f16(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) +void __arm_vstrhq_scatter_offset(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) +void __arm_vstrhq_scatter_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) +void __arm_vstrhq_scatter_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) +void __arm_vstrhq_scatter_shifted_offset_f16(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) +void __arm_vstrhq_scatter_shifted_offset(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) +void __arm_vstrhq_scatter_shifted_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) +void __arm_vstrhq_scatter_shifted_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_f32))) +void __arm_vstrwq_f32(float32_t *, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_f32))) +void __arm_vstrwq(float32_t *, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_f32))) +void __arm_vstrwq_p_f32(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_f32))) +void __arm_vstrwq_p(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) +void __arm_vstrwq_scatter_base_f32(uint32x4_t, int, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) +void __arm_vstrwq_scatter_base(uint32x4_t, int, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) +void __arm_vstrwq_scatter_base_p_f32(uint32x4_t, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) +void __arm_vstrwq_scatter_base_p(uint32x4_t, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) +void __arm_vstrwq_scatter_base_wb_f32(uint32x4_t *, int, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) +void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) +void __arm_vstrwq_scatter_base_wb_p_f32(uint32x4_t *, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) +void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) +void __arm_vstrwq_scatter_offset_f32(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) +void __arm_vstrwq_scatter_offset(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) +void __arm_vstrwq_scatter_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) +void __arm_vstrwq_scatter_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) +void __arm_vstrwq_scatter_shifted_offset_f32(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) +void __arm_vstrwq_scatter_shifted_offset(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) +void __arm_vstrwq_scatter_shifted_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) +void __arm_vstrwq_scatter_shifted_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_f16))) +float16x8_t __arm_vsubq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_f16))) +float16x8_t __arm_vsubq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_f32))) +float32x4_t __arm_vsubq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_f32))) +float32x4_t __arm_vsubq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f16))) +float16x8_t __arm_vsubq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f16))) +float16x8_t __arm_vsubq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f32))) +float32x4_t __arm_vsubq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f32))) +float32x4_t __arm_vsubq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f16))) +float16x8_t __arm_vsubq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f16))) +float16x8_t __arm_vsubq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f32))) +float32x4_t __arm_vsubq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f32))) +float32x4_t __arm_vsubq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_f16))) +float16x8_t __arm_vuninitializedq_f16(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_f32))) +float32x4_t __arm_vuninitializedq_f32(); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f16))) +float16x8_t __arm_vuninitializedq(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f32))) +float32x4_t __arm_vuninitializedq(float32x4_t); + +#endif /* (__ARM_FEATURE_MVE & 2) */ + +#if (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) + +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_asrl))) +int64_t asrl(int64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_lsll))) +uint64_t lsll(uint64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqrshr))) +int32_t sqrshr(int32_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqrshrl))) +int64_t sqrshrl(int64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqrshrl_sat48))) +int64_t sqrshrl_sat48(int64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqshl))) +int32_t sqshl(int32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_sqshll))) +int64_t sqshll(int64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_srshr))) +int32_t srshr(int32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_srshrl))) +int64_t srshrl(int64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqrshl))) +uint32_t uqrshl(uint32_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqrshll))) +uint64_t uqrshll(uint64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqrshll_sat48))) +uint64_t uqrshll_sat48(uint64_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqshl))) +uint32_t uqshl(uint32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_uqshll))) +uint64_t uqshll(uint64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_urshr))) +uint32_t urshr(uint32_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_urshrl))) +uint64_t urshrl(uint64_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s16))) +uint32_t vabavq_p_s16(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s16))) +uint32_t vabavq_p(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s32))) +uint32_t vabavq_p_s32(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s32))) +uint32_t vabavq_p(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s8))) +uint32_t vabavq_p_s8(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_s8))) +uint32_t vabavq_p(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u16))) +uint32_t vabavq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u16))) +uint32_t vabavq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u32))) +uint32_t vabavq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u32))) +uint32_t vabavq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u8))) +uint32_t vabavq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_p_u8))) +uint32_t vabavq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_s16))) +uint32_t vabavq_s16(uint32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_s16))) +uint32_t vabavq(uint32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_s32))) +uint32_t vabavq_s32(uint32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_s32))) +uint32_t vabavq(uint32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_s8))) +uint32_t vabavq_s8(uint32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_s8))) +uint32_t vabavq(uint32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_u16))) +uint32_t vabavq_u16(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_u16))) +uint32_t vabavq(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_u32))) +uint32_t vabavq_u32(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_u32))) +uint32_t vabavq(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabavq_u8))) +uint32_t vabavq_u8(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabavq_u8))) +uint32_t vabavq(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s16))) +int16x8_t vabdq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s16))) +int16x8_t vabdq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s32))) +int32x4_t vabdq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s32))) +int32x4_t vabdq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s8))) +int8x16_t vabdq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_s8))) +int8x16_t vabdq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u16))) +uint16x8_t vabdq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u16))) +uint16x8_t vabdq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u32))) +uint32x4_t vabdq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u32))) +uint32x4_t vabdq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u8))) +uint8x16_t vabdq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_u8))) +uint8x16_t vabdq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_s16))) +int16x8_t vabdq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_s16))) +int16x8_t vabdq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_s32))) +int32x4_t vabdq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_s32))) +int32x4_t vabdq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_s8))) +int8x16_t vabdq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_s8))) +int8x16_t vabdq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_u16))) +uint16x8_t vabdq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_u16))) +uint16x8_t vabdq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_u32))) +uint32x4_t vabdq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_u32))) +uint32x4_t vabdq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_u8))) +uint8x16_t vabdq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_u8))) +uint8x16_t vabdq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s16))) +int16x8_t vabdq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s16))) +int16x8_t vabdq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s32))) +int32x4_t vabdq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s32))) +int32x4_t vabdq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s8))) +int8x16_t vabdq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_s8))) +int8x16_t vabdq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u16))) +uint16x8_t vabdq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u16))) +uint16x8_t vabdq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u32))) +uint32x4_t vabdq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u32))) +uint32x4_t vabdq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u8))) +uint8x16_t vabdq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_u8))) +uint8x16_t vabdq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_s32))) +int32x4_t vadciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_s32))) +int32x4_t vadciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_u32))) +uint32x4_t vadciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_m_u32))) +uint32x4_t vadciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_s32))) +int32x4_t vadciq_s32(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_s32))) +int32x4_t vadciq(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadciq_u32))) +uint32x4_t vadciq_u32(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadciq_u32))) +uint32x4_t vadciq(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_s32))) +int32x4_t vadcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_s32))) +int32x4_t vadcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_u32))) +uint32x4_t vadcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_m_u32))) +uint32x4_t vadcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_s32))) +int32x4_t vadcq_s32(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_s32))) +int32x4_t vadcq(int32x4_t, int32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vadcq_u32))) +uint32x4_t vadcq_u32(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vadcq_u32))) +uint32x4_t vadcq(uint32x4_t, uint32x4_t, unsigned *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s16))) +int16x8_t vaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s16))) +int16x8_t vaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s32))) +int32x4_t vaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s32))) +int32x4_t vaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s8))) +int8x16_t vaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_s8))) +int8x16_t vaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u16))) +uint16x8_t vaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u16))) +uint16x8_t vaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u32))) +uint32x4_t vaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u32))) +uint32x4_t vaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u8))) +uint8x16_t vaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_u8))) +uint8x16_t vaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_s16))) +int16x8_t vaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_s16))) +int16x8_t vaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_s32))) +int32x4_t vaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_s32))) +int32x4_t vaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_s8))) +int8x16_t vaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_s8))) +int8x16_t vaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_u16))) +uint16x8_t vaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_u16))) +uint16x8_t vaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_u32))) +uint32x4_t vaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_u32))) +uint32x4_t vaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_u8))) +uint8x16_t vaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_u8))) +uint8x16_t vaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s16))) +int16x8_t vaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s16))) +int16x8_t vaddq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s32))) +int32x4_t vaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s32))) +int32x4_t vaddq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s8))) +int8x16_t vaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_s8))) +int8x16_t vaddq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u16))) +uint16x8_t vaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u16))) +uint16x8_t vaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u32))) +uint32x4_t vaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u32))) +uint32x4_t vaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u8))) +uint8x16_t vaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_u8))) +uint8x16_t vaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s16))) +int16x8_t vandq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s16))) +int16x8_t vandq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s32))) +int32x4_t vandq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s32))) +int32x4_t vandq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s8))) +int8x16_t vandq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_s8))) +int8x16_t vandq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u16))) +uint16x8_t vandq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u16))) +uint16x8_t vandq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u32))) +uint32x4_t vandq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u32))) +uint32x4_t vandq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u8))) +uint8x16_t vandq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_u8))) +uint8x16_t vandq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_s16))) +int16x8_t vandq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_s16))) +int16x8_t vandq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_s32))) +int32x4_t vandq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_s32))) +int32x4_t vandq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_s8))) +int8x16_t vandq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_s8))) +int8x16_t vandq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_u16))) +uint16x8_t vandq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_u16))) +uint16x8_t vandq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_u32))) +uint32x4_t vandq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_u32))) +uint32x4_t vandq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_u8))) +uint8x16_t vandq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_u8))) +uint8x16_t vandq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s16))) +int16x8_t vandq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s16))) +int16x8_t vandq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s32))) +int32x4_t vandq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s32))) +int32x4_t vandq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s8))) +int8x16_t vandq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_s8))) +int8x16_t vandq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u16))) +uint16x8_t vandq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u16))) +uint16x8_t vandq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u32))) +uint32x4_t vandq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u32))) +uint32x4_t vandq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u8))) +uint8x16_t vandq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_u8))) +uint8x16_t vandq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s16))) +int16x8_t vbicq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s16))) +int16x8_t vbicq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s32))) +int32x4_t vbicq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s32))) +int32x4_t vbicq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s8))) +int8x16_t vbicq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_s8))) +int8x16_t vbicq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u16))) +uint16x8_t vbicq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u16))) +uint16x8_t vbicq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u32))) +uint32x4_t vbicq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u32))) +uint32x4_t vbicq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u8))) +uint8x16_t vbicq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_u8))) +uint8x16_t vbicq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_s16))) +int16x8_t vbicq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_s16))) +int16x8_t vbicq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_s32))) +int32x4_t vbicq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_s32))) +int32x4_t vbicq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_s8))) +int8x16_t vbicq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_s8))) +int8x16_t vbicq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_u16))) +uint16x8_t vbicq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_u16))) +uint16x8_t vbicq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_u32))) +uint32x4_t vbicq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_u32))) +uint32x4_t vbicq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_u8))) +uint8x16_t vbicq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_u8))) +uint8x16_t vbicq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s16))) +int16x8_t vbicq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s16))) +int16x8_t vbicq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s32))) +int32x4_t vbicq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s32))) +int32x4_t vbicq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s8))) +int8x16_t vbicq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_s8))) +int8x16_t vbicq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u16))) +uint16x8_t vbicq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u16))) +uint16x8_t vbicq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u32))) +uint32x4_t vbicq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u32))) +uint32x4_t vbicq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u8))) +uint8x16_t vbicq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_u8))) +uint8x16_t vbicq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) +int16x8_t vcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) +int16x8_t vcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) +int32x4_t vcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) +int32x4_t vcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) +int8x16_t vcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) +int8x16_t vcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) +uint16x8_t vcaddq_rot270_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) +uint16x8_t vcaddq_rot270_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) +uint32x4_t vcaddq_rot270_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) +uint32x4_t vcaddq_rot270_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) +uint8x16_t vcaddq_rot270_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) +uint8x16_t vcaddq_rot270_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s16))) +int16x8_t vcaddq_rot270_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s16))) +int16x8_t vcaddq_rot270(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s32))) +int32x4_t vcaddq_rot270_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s32))) +int32x4_t vcaddq_rot270(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s8))) +int8x16_t vcaddq_rot270_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_s8))) +int8x16_t vcaddq_rot270(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u16))) +uint16x8_t vcaddq_rot270_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u16))) +uint16x8_t vcaddq_rot270(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u32))) +uint32x4_t vcaddq_rot270_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u32))) +uint32x4_t vcaddq_rot270(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u8))) +uint8x16_t vcaddq_rot270_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_u8))) +uint8x16_t vcaddq_rot270(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) +int16x8_t vcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) +int16x8_t vcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) +int32x4_t vcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) +int32x4_t vcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) +int8x16_t vcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) +int8x16_t vcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) +uint16x8_t vcaddq_rot270_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) +uint16x8_t vcaddq_rot270_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) +uint32x4_t vcaddq_rot270_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) +uint32x4_t vcaddq_rot270_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) +uint8x16_t vcaddq_rot270_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) +uint8x16_t vcaddq_rot270_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) +int16x8_t vcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) +int16x8_t vcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) +int32x4_t vcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) +int32x4_t vcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) +int8x16_t vcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) +int8x16_t vcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) +uint16x8_t vcaddq_rot90_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) +uint16x8_t vcaddq_rot90_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) +uint32x4_t vcaddq_rot90_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) +uint32x4_t vcaddq_rot90_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) +uint8x16_t vcaddq_rot90_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) +uint8x16_t vcaddq_rot90_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s16))) +int16x8_t vcaddq_rot90_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s16))) +int16x8_t vcaddq_rot90(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s32))) +int32x4_t vcaddq_rot90_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s32))) +int32x4_t vcaddq_rot90(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s8))) +int8x16_t vcaddq_rot90_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_s8))) +int8x16_t vcaddq_rot90(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u16))) +uint16x8_t vcaddq_rot90_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u16))) +uint16x8_t vcaddq_rot90(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u32))) +uint32x4_t vcaddq_rot90_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u32))) +uint32x4_t vcaddq_rot90(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u8))) +uint8x16_t vcaddq_rot90_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_u8))) +uint8x16_t vcaddq_rot90(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) +int16x8_t vcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) +int16x8_t vcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) +int32x4_t vcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) +int32x4_t vcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) +int8x16_t vcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) +int8x16_t vcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) +uint16x8_t vcaddq_rot90_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) +uint16x8_t vcaddq_rot90_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) +uint32x4_t vcaddq_rot90_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) +uint32x4_t vcaddq_rot90_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) +uint8x16_t vcaddq_rot90_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) +uint8x16_t vcaddq_rot90_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) +mve_pred16_t vcmpcsq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) +mve_pred16_t vcmpcsq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) +mve_pred16_t vcmpcsq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) +mve_pred16_t vcmpcsq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) +mve_pred16_t vcmpcsq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) +mve_pred16_t vcmpcsq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u16))) +mve_pred16_t vcmpcsq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u16))) +mve_pred16_t vcmpcsq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u32))) +mve_pred16_t vcmpcsq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u32))) +mve_pred16_t vcmpcsq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u8))) +mve_pred16_t vcmpcsq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_m_u8))) +mve_pred16_t vcmpcsq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u16))) +mve_pred16_t vcmpcsq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u16))) +mve_pred16_t vcmpcsq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u32))) +mve_pred16_t vcmpcsq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u32))) +mve_pred16_t vcmpcsq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u8))) +mve_pred16_t vcmpcsq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_n_u8))) +mve_pred16_t vcmpcsq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u16))) +mve_pred16_t vcmpcsq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u16))) +mve_pred16_t vcmpcsq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u32))) +mve_pred16_t vcmpcsq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u32))) +mve_pred16_t vcmpcsq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u8))) +mve_pred16_t vcmpcsq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpcsq_u8))) +mve_pred16_t vcmpcsq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) +mve_pred16_t vcmpeqq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) +mve_pred16_t vcmpeqq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) +mve_pred16_t vcmpeqq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) +mve_pred16_t vcmpeqq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) +mve_pred16_t vcmpeqq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) +mve_pred16_t vcmpeqq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) +mve_pred16_t vcmpeqq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) +mve_pred16_t vcmpeqq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) +mve_pred16_t vcmpeqq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) +mve_pred16_t vcmpeqq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) +mve_pred16_t vcmpeqq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) +mve_pred16_t vcmpeqq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s16))) +mve_pred16_t vcmpeqq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s16))) +mve_pred16_t vcmpeqq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s32))) +mve_pred16_t vcmpeqq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s32))) +mve_pred16_t vcmpeqq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s8))) +mve_pred16_t vcmpeqq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_s8))) +mve_pred16_t vcmpeqq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u16))) +mve_pred16_t vcmpeqq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u16))) +mve_pred16_t vcmpeqq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u32))) +mve_pred16_t vcmpeqq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u32))) +mve_pred16_t vcmpeqq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u8))) +mve_pred16_t vcmpeqq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_u8))) +mve_pred16_t vcmpeqq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s16))) +mve_pred16_t vcmpeqq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s16))) +mve_pred16_t vcmpeqq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s32))) +mve_pred16_t vcmpeqq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s32))) +mve_pred16_t vcmpeqq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s8))) +mve_pred16_t vcmpeqq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_s8))) +mve_pred16_t vcmpeqq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u16))) +mve_pred16_t vcmpeqq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u16))) +mve_pred16_t vcmpeqq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u32))) +mve_pred16_t vcmpeqq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u32))) +mve_pred16_t vcmpeqq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u8))) +mve_pred16_t vcmpeqq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_u8))) +mve_pred16_t vcmpeqq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s16))) +mve_pred16_t vcmpeqq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s16))) +mve_pred16_t vcmpeqq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s32))) +mve_pred16_t vcmpeqq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s32))) +mve_pred16_t vcmpeqq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s8))) +mve_pred16_t vcmpeqq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_s8))) +mve_pred16_t vcmpeqq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u16))) +mve_pred16_t vcmpeqq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u16))) +mve_pred16_t vcmpeqq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u32))) +mve_pred16_t vcmpeqq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u32))) +mve_pred16_t vcmpeqq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u8))) +mve_pred16_t vcmpeqq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_u8))) +mve_pred16_t vcmpeqq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) +mve_pred16_t vcmpgeq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) +mve_pred16_t vcmpgeq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) +mve_pred16_t vcmpgeq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) +mve_pred16_t vcmpgeq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) +mve_pred16_t vcmpgeq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) +mve_pred16_t vcmpgeq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s16))) +mve_pred16_t vcmpgeq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s16))) +mve_pred16_t vcmpgeq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s32))) +mve_pred16_t vcmpgeq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s32))) +mve_pred16_t vcmpgeq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s8))) +mve_pred16_t vcmpgeq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_s8))) +mve_pred16_t vcmpgeq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s16))) +mve_pred16_t vcmpgeq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s16))) +mve_pred16_t vcmpgeq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s32))) +mve_pred16_t vcmpgeq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s32))) +mve_pred16_t vcmpgeq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s8))) +mve_pred16_t vcmpgeq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_s8))) +mve_pred16_t vcmpgeq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s16))) +mve_pred16_t vcmpgeq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s16))) +mve_pred16_t vcmpgeq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s32))) +mve_pred16_t vcmpgeq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s32))) +mve_pred16_t vcmpgeq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s8))) +mve_pred16_t vcmpgeq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_s8))) +mve_pred16_t vcmpgeq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) +mve_pred16_t vcmpgtq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) +mve_pred16_t vcmpgtq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) +mve_pred16_t vcmpgtq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) +mve_pred16_t vcmpgtq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) +mve_pred16_t vcmpgtq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) +mve_pred16_t vcmpgtq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s16))) +mve_pred16_t vcmpgtq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s16))) +mve_pred16_t vcmpgtq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s32))) +mve_pred16_t vcmpgtq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s32))) +mve_pred16_t vcmpgtq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s8))) +mve_pred16_t vcmpgtq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_s8))) +mve_pred16_t vcmpgtq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s16))) +mve_pred16_t vcmpgtq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s16))) +mve_pred16_t vcmpgtq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s32))) +mve_pred16_t vcmpgtq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s32))) +mve_pred16_t vcmpgtq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s8))) +mve_pred16_t vcmpgtq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_s8))) +mve_pred16_t vcmpgtq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s16))) +mve_pred16_t vcmpgtq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s16))) +mve_pred16_t vcmpgtq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s32))) +mve_pred16_t vcmpgtq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s32))) +mve_pred16_t vcmpgtq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s8))) +mve_pred16_t vcmpgtq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_s8))) +mve_pred16_t vcmpgtq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) +mve_pred16_t vcmphiq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) +mve_pred16_t vcmphiq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) +mve_pred16_t vcmphiq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) +mve_pred16_t vcmphiq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) +mve_pred16_t vcmphiq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) +mve_pred16_t vcmphiq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u16))) +mve_pred16_t vcmphiq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u16))) +mve_pred16_t vcmphiq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u32))) +mve_pred16_t vcmphiq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u32))) +mve_pred16_t vcmphiq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u8))) +mve_pred16_t vcmphiq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_m_u8))) +mve_pred16_t vcmphiq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u16))) +mve_pred16_t vcmphiq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u16))) +mve_pred16_t vcmphiq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u32))) +mve_pred16_t vcmphiq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u32))) +mve_pred16_t vcmphiq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u8))) +mve_pred16_t vcmphiq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_n_u8))) +mve_pred16_t vcmphiq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u16))) +mve_pred16_t vcmphiq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u16))) +mve_pred16_t vcmphiq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u32))) +mve_pred16_t vcmphiq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u32))) +mve_pred16_t vcmphiq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u8))) +mve_pred16_t vcmphiq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmphiq_u8))) +mve_pred16_t vcmphiq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) +mve_pred16_t vcmpleq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) +mve_pred16_t vcmpleq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) +mve_pred16_t vcmpleq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) +mve_pred16_t vcmpleq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) +mve_pred16_t vcmpleq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) +mve_pred16_t vcmpleq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s16))) +mve_pred16_t vcmpleq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s16))) +mve_pred16_t vcmpleq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s32))) +mve_pred16_t vcmpleq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s32))) +mve_pred16_t vcmpleq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s8))) +mve_pred16_t vcmpleq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_s8))) +mve_pred16_t vcmpleq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s16))) +mve_pred16_t vcmpleq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s16))) +mve_pred16_t vcmpleq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s32))) +mve_pred16_t vcmpleq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s32))) +mve_pred16_t vcmpleq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s8))) +mve_pred16_t vcmpleq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_s8))) +mve_pred16_t vcmpleq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s16))) +mve_pred16_t vcmpleq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s16))) +mve_pred16_t vcmpleq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s32))) +mve_pred16_t vcmpleq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s32))) +mve_pred16_t vcmpleq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s8))) +mve_pred16_t vcmpleq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_s8))) +mve_pred16_t vcmpleq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) +mve_pred16_t vcmpltq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) +mve_pred16_t vcmpltq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) +mve_pred16_t vcmpltq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) +mve_pred16_t vcmpltq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) +mve_pred16_t vcmpltq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) +mve_pred16_t vcmpltq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s16))) +mve_pred16_t vcmpltq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s16))) +mve_pred16_t vcmpltq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s32))) +mve_pred16_t vcmpltq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s32))) +mve_pred16_t vcmpltq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s8))) +mve_pred16_t vcmpltq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_s8))) +mve_pred16_t vcmpltq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s16))) +mve_pred16_t vcmpltq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s16))) +mve_pred16_t vcmpltq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s32))) +mve_pred16_t vcmpltq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s32))) +mve_pred16_t vcmpltq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s8))) +mve_pred16_t vcmpltq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_s8))) +mve_pred16_t vcmpltq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s16))) +mve_pred16_t vcmpltq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s16))) +mve_pred16_t vcmpltq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s32))) +mve_pred16_t vcmpltq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s32))) +mve_pred16_t vcmpltq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s8))) +mve_pred16_t vcmpltq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_s8))) +mve_pred16_t vcmpltq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) +mve_pred16_t vcmpneq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) +mve_pred16_t vcmpneq_m(int16x8_t, int16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) +mve_pred16_t vcmpneq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) +mve_pred16_t vcmpneq_m(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) +mve_pred16_t vcmpneq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) +mve_pred16_t vcmpneq_m(int8x16_t, int8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) +mve_pred16_t vcmpneq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) +mve_pred16_t vcmpneq_m(uint16x8_t, uint16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) +mve_pred16_t vcmpneq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) +mve_pred16_t vcmpneq_m(uint32x4_t, uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) +mve_pred16_t vcmpneq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) +mve_pred16_t vcmpneq_m(uint8x16_t, uint8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s16))) +mve_pred16_t vcmpneq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s16))) +mve_pred16_t vcmpneq_m(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s32))) +mve_pred16_t vcmpneq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s32))) +mve_pred16_t vcmpneq_m(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s8))) +mve_pred16_t vcmpneq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_s8))) +mve_pred16_t vcmpneq_m(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u16))) +mve_pred16_t vcmpneq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u16))) +mve_pred16_t vcmpneq_m(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u32))) +mve_pred16_t vcmpneq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u32))) +mve_pred16_t vcmpneq_m(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u8))) +mve_pred16_t vcmpneq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_u8))) +mve_pred16_t vcmpneq_m(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s16))) +mve_pred16_t vcmpneq_n_s16(int16x8_t, int16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s16))) +mve_pred16_t vcmpneq(int16x8_t, int16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s32))) +mve_pred16_t vcmpneq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s32))) +mve_pred16_t vcmpneq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s8))) +mve_pred16_t vcmpneq_n_s8(int8x16_t, int8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_s8))) +mve_pred16_t vcmpneq(int8x16_t, int8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u16))) +mve_pred16_t vcmpneq_n_u16(uint16x8_t, uint16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u16))) +mve_pred16_t vcmpneq(uint16x8_t, uint16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u32))) +mve_pred16_t vcmpneq_n_u32(uint32x4_t, uint32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u32))) +mve_pred16_t vcmpneq(uint32x4_t, uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u8))) +mve_pred16_t vcmpneq_n_u8(uint8x16_t, uint8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_u8))) +mve_pred16_t vcmpneq(uint8x16_t, uint8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s16))) +mve_pred16_t vcmpneq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s16))) +mve_pred16_t vcmpneq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s32))) +mve_pred16_t vcmpneq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s32))) +mve_pred16_t vcmpneq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s8))) +mve_pred16_t vcmpneq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_s8))) +mve_pred16_t vcmpneq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u16))) +mve_pred16_t vcmpneq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u16))) +mve_pred16_t vcmpneq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u32))) +mve_pred16_t vcmpneq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u32))) +mve_pred16_t vcmpneq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u8))) +mve_pred16_t vcmpneq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_u8))) +mve_pred16_t vcmpneq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s16))) +int16x8_t vcreateq_s16(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s32))) +int32x4_t vcreateq_s32(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s64))) +int64x2_t vcreateq_s64(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_s8))) +int8x16_t vcreateq_s8(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u16))) +uint16x8_t vcreateq_u16(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u32))) +uint32x4_t vcreateq_u32(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u64))) +uint64x2_t vcreateq_u64(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_u8))) +uint8x16_t vcreateq_u8(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp16q))) +mve_pred16_t vctp16q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp16q_m))) +mve_pred16_t vctp16q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp32q))) +mve_pred16_t vctp32q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp32q_m))) +mve_pred16_t vctp32q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp64q))) +mve_pred16_t vctp64q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp64q_m))) +mve_pred16_t vctp64q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp8q))) +mve_pred16_t vctp8q(uint32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vctp8q_m))) +mve_pred16_t vctp8q_m(uint32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s16))) +int16x8_t veorq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s16))) +int16x8_t veorq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s32))) +int32x4_t veorq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s32))) +int32x4_t veorq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s8))) +int8x16_t veorq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_s8))) +int8x16_t veorq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u16))) +uint16x8_t veorq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u16))) +uint16x8_t veorq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u32))) +uint32x4_t veorq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u32))) +uint32x4_t veorq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u8))) +uint8x16_t veorq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_u8))) +uint8x16_t veorq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_s16))) +int16x8_t veorq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_s16))) +int16x8_t veorq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_s32))) +int32x4_t veorq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_s32))) +int32x4_t veorq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_s8))) +int8x16_t veorq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_s8))) +int8x16_t veorq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_u16))) +uint16x8_t veorq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_u16))) +uint16x8_t veorq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_u32))) +uint32x4_t veorq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_u32))) +uint32x4_t veorq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_u8))) +uint8x16_t veorq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_u8))) +uint8x16_t veorq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s16))) +int16x8_t veorq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s16))) +int16x8_t veorq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s32))) +int32x4_t veorq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s32))) +int32x4_t veorq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s8))) +int8x16_t veorq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_s8))) +int8x16_t veorq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u16))) +uint16x8_t veorq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u16))) +uint16x8_t veorq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u32))) +uint32x4_t veorq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u32))) +uint32x4_t veorq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u8))) +uint8x16_t veorq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_u8))) +uint8x16_t veorq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s16))) +int16_t vgetq_lane_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s16))) +int16_t vgetq_lane(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s32))) +int32_t vgetq_lane_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s32))) +int32_t vgetq_lane(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s64))) +int64_t vgetq_lane_s64(int64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s64))) +int64_t vgetq_lane(int64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s8))) +int8_t vgetq_lane_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_s8))) +int8_t vgetq_lane(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u16))) +uint16_t vgetq_lane_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u16))) +uint16_t vgetq_lane(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u32))) +uint32_t vgetq_lane_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u32))) +uint32_t vgetq_lane(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u64))) +uint64_t vgetq_lane_u64(uint64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u64))) +uint64_t vgetq_lane(uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u8))) +uint8_t vgetq_lane_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_u8))) +uint8_t vgetq_lane(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s16))) +int16x8_t vhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s16))) +int16x8_t vhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s32))) +int32x4_t vhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s32))) +int32x4_t vhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s8))) +int8x16_t vhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_s8))) +int8x16_t vhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u16))) +uint16x8_t vhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u16))) +uint16x8_t vhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u32))) +uint32x4_t vhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u32))) +uint32x4_t vhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u8))) +uint8x16_t vhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_m_u8))) +uint8x16_t vhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s16))) +int16x8_t vhaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s16))) +int16x8_t vhaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s32))) +int32x4_t vhaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s32))) +int32x4_t vhaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s8))) +int8x16_t vhaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_s8))) +int8x16_t vhaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u16))) +uint16x8_t vhaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u16))) +uint16x8_t vhaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u32))) +uint32x4_t vhaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u32))) +uint32x4_t vhaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u8))) +uint8x16_t vhaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_u8))) +uint8x16_t vhaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s16))) +int16x8_t vhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s16))) +int16x8_t vhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s32))) +int32x4_t vhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s32))) +int32x4_t vhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s8))) +int8x16_t vhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_s8))) +int8x16_t vhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u16))) +uint16x8_t vhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u16))) +uint16x8_t vhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u32))) +uint32x4_t vhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u32))) +uint32x4_t vhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u8))) +uint8x16_t vhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhaddq_x_u8))) +uint8x16_t vhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) +int16x8_t vhcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) +int16x8_t vhcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) +int32x4_t vhcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) +int32x4_t vhcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) +int8x16_t vhcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) +int8x16_t vhcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) +int16x8_t vhcaddq_rot270_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) +int16x8_t vhcaddq_rot270(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) +int32x4_t vhcaddq_rot270_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) +int32x4_t vhcaddq_rot270(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) +int8x16_t vhcaddq_rot270_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) +int8x16_t vhcaddq_rot270(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) +int16x8_t vhcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) +int16x8_t vhcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) +int32x4_t vhcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) +int32x4_t vhcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) +int8x16_t vhcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) +int8x16_t vhcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) +int16x8_t vhcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) +int16x8_t vhcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) +int32x4_t vhcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) +int32x4_t vhcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) +int8x16_t vhcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) +int8x16_t vhcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) +int16x8_t vhcaddq_rot90_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) +int16x8_t vhcaddq_rot90(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) +int32x4_t vhcaddq_rot90_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) +int32x4_t vhcaddq_rot90(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) +int8x16_t vhcaddq_rot90_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) +int8x16_t vhcaddq_rot90(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) +int16x8_t vhcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) +int16x8_t vhcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) +int32x4_t vhcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) +int32x4_t vhcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) +int8x16_t vhcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) +int8x16_t vhcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s16))) +int16x8_t vhsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s16))) +int16x8_t vhsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s32))) +int32x4_t vhsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s32))) +int32x4_t vhsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s8))) +int8x16_t vhsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_s8))) +int8x16_t vhsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u16))) +uint16x8_t vhsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u16))) +uint16x8_t vhsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u32))) +uint32x4_t vhsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u32))) +uint32x4_t vhsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u8))) +uint8x16_t vhsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_m_u8))) +uint8x16_t vhsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s16))) +int16x8_t vhsubq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s16))) +int16x8_t vhsubq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s32))) +int32x4_t vhsubq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s32))) +int32x4_t vhsubq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s8))) +int8x16_t vhsubq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_s8))) +int8x16_t vhsubq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u16))) +uint16x8_t vhsubq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u16))) +uint16x8_t vhsubq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u32))) +uint32x4_t vhsubq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u32))) +uint32x4_t vhsubq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u8))) +uint8x16_t vhsubq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_u8))) +uint8x16_t vhsubq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s16))) +int16x8_t vhsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s16))) +int16x8_t vhsubq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s32))) +int32x4_t vhsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s32))) +int32x4_t vhsubq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s8))) +int8x16_t vhsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_s8))) +int8x16_t vhsubq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u16))) +uint16x8_t vhsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u16))) +uint16x8_t vhsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u32))) +uint32x4_t vhsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u32))) +uint32x4_t vhsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u8))) +uint8x16_t vhsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vhsubq_x_u8))) +uint8x16_t vhsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_s16))) +int16x8_t vld1q_s16(const int16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_s16))) +int16x8_t vld1q(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_s32))) +int32x4_t vld1q_s32(const int32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_s32))) +int32x4_t vld1q(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_s8))) +int8x16_t vld1q_s8(const int8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_s8))) +int8x16_t vld1q(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_u16))) +uint16x8_t vld1q_u16(const uint16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_u16))) +uint16x8_t vld1q(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_u32))) +uint32x4_t vld1q_u32(const uint32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_u32))) +uint32x4_t vld1q(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_u8))) +uint8x16_t vld1q_u8(const uint8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_u8))) +uint8x16_t vld1q(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s16))) +int16x8_t vld1q_z_s16(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s16))) +int16x8_t vld1q_z(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s32))) +int32x4_t vld1q_z_s32(const int32_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s32))) +int32x4_t vld1q_z(const int32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s8))) +int8x16_t vld1q_z_s8(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_s8))) +int8x16_t vld1q_z(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u16))) +uint16x8_t vld1q_z_u16(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u16))) +uint16x8_t vld1q_z(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u32))) +uint32x4_t vld1q_z_u32(const uint32_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u32))) +uint32x4_t vld1q_z(const uint32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u8))) +uint8x16_t vld1q_z_u8(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_u8))) +uint8x16_t vld1q_z(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_s16))) +int16x8x2_t vld2q_s16(const int16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_s16))) +int16x8x2_t vld2q(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_s32))) +int32x4x2_t vld2q_s32(const int32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_s32))) +int32x4x2_t vld2q(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_s8))) +int8x16x2_t vld2q_s8(const int8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_s8))) +int8x16x2_t vld2q(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_u16))) +uint16x8x2_t vld2q_u16(const uint16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_u16))) +uint16x8x2_t vld2q(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_u32))) +uint32x4x2_t vld2q_u32(const uint32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_u32))) +uint32x4x2_t vld2q(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_u8))) +uint8x16x2_t vld2q_u8(const uint8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_u8))) +uint8x16x2_t vld2q(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_s16))) +int16x8x4_t vld4q_s16(const int16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_s16))) +int16x8x4_t vld4q(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_s32))) +int32x4x4_t vld4q_s32(const int32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_s32))) +int32x4x4_t vld4q(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_s8))) +int8x16x4_t vld4q_s8(const int8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_s8))) +int8x16x4_t vld4q(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_u16))) +uint16x8x4_t vld4q_u16(const uint16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_u16))) +uint16x8x4_t vld4q(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_u32))) +uint32x4x4_t vld4q_u32(const uint32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_u32))) +uint32x4x4_t vld4q(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_u8))) +uint8x16x4_t vld4q_u8(const uint8_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_u8))) +uint8x16x4_t vld4q(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) +int16x8_t vldrbq_gather_offset_s16(const int8_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) +int16x8_t vldrbq_gather_offset(const int8_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) +int32x4_t vldrbq_gather_offset_s32(const int8_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) +int32x4_t vldrbq_gather_offset(const int8_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) +int8x16_t vldrbq_gather_offset_s8(const int8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) +int8x16_t vldrbq_gather_offset(const int8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) +uint16x8_t vldrbq_gather_offset_u16(const uint8_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) +uint16x8_t vldrbq_gather_offset(const uint8_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) +uint32x4_t vldrbq_gather_offset_u32(const uint8_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) +uint32x4_t vldrbq_gather_offset(const uint8_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) +uint8x16_t vldrbq_gather_offset_u8(const uint8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) +uint8x16_t vldrbq_gather_offset(const uint8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) +int16x8_t vldrbq_gather_offset_z_s16(const int8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) +int16x8_t vldrbq_gather_offset_z(const int8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) +int32x4_t vldrbq_gather_offset_z_s32(const int8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) +int32x4_t vldrbq_gather_offset_z(const int8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) +int8x16_t vldrbq_gather_offset_z_s8(const int8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) +int8x16_t vldrbq_gather_offset_z(const int8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) +uint16x8_t vldrbq_gather_offset_z_u16(const uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) +uint16x8_t vldrbq_gather_offset_z(const uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) +uint32x4_t vldrbq_gather_offset_z_u32(const uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) +uint32x4_t vldrbq_gather_offset_z(const uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) +uint8x16_t vldrbq_gather_offset_z_u8(const uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) +uint8x16_t vldrbq_gather_offset_z(const uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_s16))) +int16x8_t vldrbq_s16(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_s32))) +int32x4_t vldrbq_s32(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_s8))) +int8x16_t vldrbq_s8(const int8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_u16))) +uint16x8_t vldrbq_u16(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_u32))) +uint32x4_t vldrbq_u32(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_u8))) +uint8x16_t vldrbq_u8(const uint8_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_s16))) +int16x8_t vldrbq_z_s16(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_s32))) +int32x4_t vldrbq_z_s32(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_s8))) +int8x16_t vldrbq_z_s8(const int8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_u16))) +uint16x8_t vldrbq_z_u16(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_u32))) +uint32x4_t vldrbq_z_u32(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrbq_z_u8))) +uint8x16_t vldrbq_z_u8(const uint8_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_s64))) +int64x2_t vldrdq_gather_base_s64(uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_u64))) +uint64x2_t vldrdq_gather_base_u64(uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_s64))) +int64x2_t vldrdq_gather_base_wb_s64(uint64x2_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_u64))) +uint64x2_t vldrdq_gather_base_wb_u64(uint64x2_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_s64))) +int64x2_t vldrdq_gather_base_wb_z_s64(uint64x2_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_u64))) +uint64x2_t vldrdq_gather_base_wb_z_u64(uint64x2_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_z_s64))) +int64x2_t vldrdq_gather_base_z_s64(uint64x2_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_base_z_u64))) +uint64x2_t vldrdq_gather_base_z_u64(uint64x2_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) +int64x2_t vldrdq_gather_offset_s64(const int64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) +int64x2_t vldrdq_gather_offset(const int64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) +uint64x2_t vldrdq_gather_offset_u64(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) +uint64x2_t vldrdq_gather_offset(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) +int64x2_t vldrdq_gather_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) +int64x2_t vldrdq_gather_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) +uint64x2_t vldrdq_gather_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) +uint64x2_t vldrdq_gather_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) +int64x2_t vldrdq_gather_shifted_offset_s64(const int64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) +int64x2_t vldrdq_gather_shifted_offset(const int64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) +uint64x2_t vldrdq_gather_shifted_offset_u64(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) +uint64x2_t vldrdq_gather_shifted_offset(const uint64_t *, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) +int64x2_t vldrdq_gather_shifted_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) +int64x2_t vldrdq_gather_shifted_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) +uint64x2_t vldrdq_gather_shifted_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) +uint64x2_t vldrdq_gather_shifted_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) +int16x8_t vldrhq_gather_offset_s16(const int16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) +int16x8_t vldrhq_gather_offset(const int16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) +int32x4_t vldrhq_gather_offset_s32(const int16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) +int32x4_t vldrhq_gather_offset(const int16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) +uint16x8_t vldrhq_gather_offset_u16(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) +uint16x8_t vldrhq_gather_offset(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) +uint32x4_t vldrhq_gather_offset_u32(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) +uint32x4_t vldrhq_gather_offset(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) +int16x8_t vldrhq_gather_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) +int16x8_t vldrhq_gather_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) +int32x4_t vldrhq_gather_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) +int32x4_t vldrhq_gather_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) +uint16x8_t vldrhq_gather_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) +uint16x8_t vldrhq_gather_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) +uint32x4_t vldrhq_gather_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) +uint32x4_t vldrhq_gather_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) +int16x8_t vldrhq_gather_shifted_offset_s16(const int16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) +int16x8_t vldrhq_gather_shifted_offset(const int16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) +int32x4_t vldrhq_gather_shifted_offset_s32(const int16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) +int32x4_t vldrhq_gather_shifted_offset(const int16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) +uint16x8_t vldrhq_gather_shifted_offset_u16(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) +uint16x8_t vldrhq_gather_shifted_offset(const uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) +uint32x4_t vldrhq_gather_shifted_offset_u32(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) +uint32x4_t vldrhq_gather_shifted_offset(const uint16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) +int16x8_t vldrhq_gather_shifted_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) +int16x8_t vldrhq_gather_shifted_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) +int32x4_t vldrhq_gather_shifted_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) +int32x4_t vldrhq_gather_shifted_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) +uint16x8_t vldrhq_gather_shifted_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) +uint16x8_t vldrhq_gather_shifted_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) +uint32x4_t vldrhq_gather_shifted_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) +uint32x4_t vldrhq_gather_shifted_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_s16))) +int16x8_t vldrhq_s16(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_s32))) +int32x4_t vldrhq_s32(const int16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_u16))) +uint16x8_t vldrhq_u16(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_u32))) +uint32x4_t vldrhq_u32(const uint16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_s16))) +int16x8_t vldrhq_z_s16(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_s32))) +int32x4_t vldrhq_z_s32(const int16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_u16))) +uint16x8_t vldrhq_z_u16(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_u32))) +uint32x4_t vldrhq_z_u32(const uint16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_s32))) +int32x4_t vldrwq_gather_base_s32(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_u32))) +uint32x4_t vldrwq_gather_base_u32(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_s32))) +int32x4_t vldrwq_gather_base_wb_s32(uint32x4_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_u32))) +uint32x4_t vldrwq_gather_base_wb_u32(uint32x4_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_s32))) +int32x4_t vldrwq_gather_base_wb_z_s32(uint32x4_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_u32))) +uint32x4_t vldrwq_gather_base_wb_z_u32(uint32x4_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_z_s32))) +int32x4_t vldrwq_gather_base_z_s32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_z_u32))) +uint32x4_t vldrwq_gather_base_z_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) +int32x4_t vldrwq_gather_offset_s32(const int32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) +int32x4_t vldrwq_gather_offset(const int32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) +uint32x4_t vldrwq_gather_offset_u32(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) +uint32x4_t vldrwq_gather_offset(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) +int32x4_t vldrwq_gather_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) +int32x4_t vldrwq_gather_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) +uint32x4_t vldrwq_gather_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) +uint32x4_t vldrwq_gather_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) +int32x4_t vldrwq_gather_shifted_offset_s32(const int32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) +int32x4_t vldrwq_gather_shifted_offset(const int32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) +uint32x4_t vldrwq_gather_shifted_offset_u32(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) +uint32x4_t vldrwq_gather_shifted_offset(const uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) +int32x4_t vldrwq_gather_shifted_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) +int32x4_t vldrwq_gather_shifted_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) +uint32x4_t vldrwq_gather_shifted_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) +uint32x4_t vldrwq_gather_shifted_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_s32))) +int32x4_t vldrwq_s32(const int32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_u32))) +uint32x4_t vldrwq_u32(const uint32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_z_s32))) +int32x4_t vldrwq_z_s32(const int32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_z_u32))) +uint32x4_t vldrwq_z_u32(const uint32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s16))) +int16x8_t vmaxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s16))) +int16x8_t vmaxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s32))) +int32x4_t vmaxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s32))) +int32x4_t vmaxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s8))) +int8x16_t vmaxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_s8))) +int8x16_t vmaxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u16))) +uint16x8_t vmaxq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u16))) +uint16x8_t vmaxq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u32))) +uint32x4_t vmaxq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u32))) +uint32x4_t vmaxq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u8))) +uint8x16_t vmaxq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_m_u8))) +uint8x16_t vmaxq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s16))) +int16x8_t vmaxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s16))) +int16x8_t vmaxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s32))) +int32x4_t vmaxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s32))) +int32x4_t vmaxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s8))) +int8x16_t vmaxq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_s8))) +int8x16_t vmaxq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u16))) +uint16x8_t vmaxq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u16))) +uint16x8_t vmaxq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u32))) +uint32x4_t vmaxq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u32))) +uint32x4_t vmaxq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u8))) +uint8x16_t vmaxq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_u8))) +uint8x16_t vmaxq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s16))) +int16x8_t vmaxq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s16))) +int16x8_t vmaxq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s32))) +int32x4_t vmaxq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s32))) +int32x4_t vmaxq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s8))) +int8x16_t vmaxq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_s8))) +int8x16_t vmaxq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u16))) +uint16x8_t vmaxq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u16))) +uint16x8_t vmaxq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u32))) +uint32x4_t vmaxq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u32))) +uint32x4_t vmaxq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u8))) +uint8x16_t vmaxq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxq_x_u8))) +uint8x16_t vmaxq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s16))) +int16_t vmaxvq_s16(int16_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s16))) +int16_t vmaxvq(int16_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s32))) +int32_t vmaxvq_s32(int32_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s32))) +int32_t vmaxvq(int32_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s8))) +int8_t vmaxvq_s8(int8_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_s8))) +int8_t vmaxvq(int8_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u16))) +uint16_t vmaxvq_u16(uint16_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u16))) +uint16_t vmaxvq(uint16_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u32))) +uint32_t vmaxvq_u32(uint32_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u32))) +uint32_t vmaxvq(uint32_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u8))) +uint8_t vmaxvq_u8(uint8_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxvq_u8))) +uint8_t vmaxvq(uint8_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s16))) +int16x8_t vminq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s16))) +int16x8_t vminq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s32))) +int32x4_t vminq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s32))) +int32x4_t vminq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s8))) +int8x16_t vminq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_s8))) +int8x16_t vminq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u16))) +uint16x8_t vminq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u16))) +uint16x8_t vminq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u32))) +uint32x4_t vminq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u32))) +uint32x4_t vminq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u8))) +uint8x16_t vminq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_m_u8))) +uint8x16_t vminq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_s16))) +int16x8_t vminq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_s16))) +int16x8_t vminq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_s32))) +int32x4_t vminq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_s32))) +int32x4_t vminq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_s8))) +int8x16_t vminq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_s8))) +int8x16_t vminq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_u16))) +uint16x8_t vminq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_u16))) +uint16x8_t vminq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_u32))) +uint32x4_t vminq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_u32))) +uint32x4_t vminq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_u8))) +uint8x16_t vminq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_u8))) +uint8x16_t vminq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s16))) +int16x8_t vminq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s16))) +int16x8_t vminq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s32))) +int32x4_t vminq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s32))) +int32x4_t vminq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s8))) +int8x16_t vminq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_s8))) +int8x16_t vminq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u16))) +uint16x8_t vminq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u16))) +uint16x8_t vminq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u32))) +uint32x4_t vminq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u32))) +uint32x4_t vminq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u8))) +uint8x16_t vminq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminq_x_u8))) +uint8x16_t vminq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_s16))) +int16_t vminvq_s16(int16_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_s16))) +int16_t vminvq(int16_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_s32))) +int32_t vminvq_s32(int32_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_s32))) +int32_t vminvq(int32_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_s8))) +int8_t vminvq_s8(int8_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_s8))) +int8_t vminvq(int8_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_u16))) +uint16_t vminvq_u16(uint16_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_u16))) +uint16_t vminvq(uint16_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_u32))) +uint32_t vminvq_u32(uint32_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_u32))) +uint32_t vminvq(uint32_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminvq_u8))) +uint8_t vminvq_u8(uint8_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminvq_u8))) +uint8_t vminvq(uint8_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s16))) +int32_t vmladavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s16))) +int32_t vmladavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s32))) +int32_t vmladavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s32))) +int32_t vmladavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s8))) +int32_t vmladavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_s8))) +int32_t vmladavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u16))) +uint32_t vmladavaq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u16))) +uint32_t vmladavaq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u32))) +uint32_t vmladavaq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u32))) +uint32_t vmladavaq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u8))) +uint32_t vmladavaq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_p_u8))) +uint32_t vmladavaq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s16))) +int32_t vmladavaq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s16))) +int32_t vmladavaq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s32))) +int32_t vmladavaq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s32))) +int32_t vmladavaq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s8))) +int32_t vmladavaq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_s8))) +int32_t vmladavaq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u16))) +uint32_t vmladavaq_u16(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u16))) +uint32_t vmladavaq(uint32_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u32))) +uint32_t vmladavaq_u32(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u32))) +uint32_t vmladavaq(uint32_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u8))) +uint32_t vmladavaq_u8(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaq_u8))) +uint32_t vmladavaq(uint32_t, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s16))) +int32_t vmladavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s16))) +int32_t vmladavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s32))) +int32_t vmladavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s32))) +int32_t vmladavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s8))) +int32_t vmladavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_p_s8))) +int32_t vmladavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s16))) +int32_t vmladavaxq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s16))) +int32_t vmladavaxq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s32))) +int32_t vmladavaxq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s32))) +int32_t vmladavaxq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s8))) +int32_t vmladavaxq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavaxq_s8))) +int32_t vmladavaxq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s16))) +int32_t vmladavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s16))) +int32_t vmladavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s32))) +int32_t vmladavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s32))) +int32_t vmladavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s8))) +int32_t vmladavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_s8))) +int32_t vmladavq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u16))) +uint32_t vmladavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u16))) +uint32_t vmladavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u32))) +uint32_t vmladavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u32))) +uint32_t vmladavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u8))) +uint32_t vmladavq_p_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_p_u8))) +uint32_t vmladavq_p(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s16))) +int32_t vmladavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s16))) +int32_t vmladavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s32))) +int32_t vmladavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s32))) +int32_t vmladavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s8))) +int32_t vmladavq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_s8))) +int32_t vmladavq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u16))) +uint32_t vmladavq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u16))) +uint32_t vmladavq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u32))) +uint32_t vmladavq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u32))) +uint32_t vmladavq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u8))) +uint32_t vmladavq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavq_u8))) +uint32_t vmladavq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s16))) +int32_t vmladavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s16))) +int32_t vmladavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s32))) +int32_t vmladavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s32))) +int32_t vmladavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s8))) +int32_t vmladavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_p_s8))) +int32_t vmladavxq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s16))) +int32_t vmladavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s16))) +int32_t vmladavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s32))) +int32_t vmladavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s32))) +int32_t vmladavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s8))) +int32_t vmladavxq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmladavxq_s8))) +int32_t vmladavxq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) +int64_t vmlaldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) +int64_t vmlaldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) +int64_t vmlaldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) +int64_t vmlaldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) +uint64_t vmlaldavaq_p_u16(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) +uint64_t vmlaldavaq_p(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) +uint64_t vmlaldavaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) +uint64_t vmlaldavaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s16))) +int64_t vmlaldavaq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s16))) +int64_t vmlaldavaq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s32))) +int64_t vmlaldavaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_s32))) +int64_t vmlaldavaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u16))) +uint64_t vmlaldavaq_u16(uint64_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u16))) +uint64_t vmlaldavaq(uint64_t, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u32))) +uint64_t vmlaldavaq_u32(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaq_u32))) +uint64_t vmlaldavaq(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) +int64_t vmlaldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) +int64_t vmlaldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) +int64_t vmlaldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) +int64_t vmlaldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s16))) +int64_t vmlaldavaxq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s16))) +int64_t vmlaldavaxq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s32))) +int64_t vmlaldavaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavaxq_s32))) +int64_t vmlaldavaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s16))) +int64_t vmlaldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s16))) +int64_t vmlaldavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s32))) +int64_t vmlaldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_s32))) +int64_t vmlaldavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u16))) +uint64_t vmlaldavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u16))) +uint64_t vmlaldavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u32))) +uint64_t vmlaldavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_p_u32))) +uint64_t vmlaldavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s16))) +int64_t vmlaldavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s16))) +int64_t vmlaldavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s32))) +int64_t vmlaldavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_s32))) +int64_t vmlaldavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u16))) +uint64_t vmlaldavq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u16))) +uint64_t vmlaldavq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u32))) +uint64_t vmlaldavq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavq_u32))) +uint64_t vmlaldavq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) +int64_t vmlaldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) +int64_t vmlaldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) +int64_t vmlaldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) +int64_t vmlaldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s16))) +int64_t vmlaldavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s16))) +int64_t vmlaldavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s32))) +int64_t vmlaldavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlaldavxq_s32))) +int64_t vmlaldavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) +int32_t vmlsdavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) +int32_t vmlsdavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) +int32_t vmlsdavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) +int32_t vmlsdavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) +int32_t vmlsdavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) +int32_t vmlsdavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s16))) +int32_t vmlsdavaq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s16))) +int32_t vmlsdavaq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s32))) +int32_t vmlsdavaq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s32))) +int32_t vmlsdavaq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s8))) +int32_t vmlsdavaq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaq_s8))) +int32_t vmlsdavaq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) +int32_t vmlsdavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) +int32_t vmlsdavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) +int32_t vmlsdavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) +int32_t vmlsdavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) +int32_t vmlsdavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) +int32_t vmlsdavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s16))) +int32_t vmlsdavaxq_s16(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s16))) +int32_t vmlsdavaxq(int32_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s32))) +int32_t vmlsdavaxq_s32(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s32))) +int32_t vmlsdavaxq(int32_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s8))) +int32_t vmlsdavaxq_s8(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavaxq_s8))) +int32_t vmlsdavaxq(int32_t, int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s16))) +int32_t vmlsdavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s16))) +int32_t vmlsdavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s32))) +int32_t vmlsdavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s32))) +int32_t vmlsdavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s8))) +int32_t vmlsdavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_p_s8))) +int32_t vmlsdavq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s16))) +int32_t vmlsdavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s16))) +int32_t vmlsdavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s32))) +int32_t vmlsdavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s32))) +int32_t vmlsdavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s8))) +int32_t vmlsdavq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavq_s8))) +int32_t vmlsdavq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) +int32_t vmlsdavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) +int32_t vmlsdavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) +int32_t vmlsdavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) +int32_t vmlsdavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) +int32_t vmlsdavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) +int32_t vmlsdavxq_p(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s16))) +int32_t vmlsdavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s16))) +int32_t vmlsdavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s32))) +int32_t vmlsdavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s32))) +int32_t vmlsdavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s8))) +int32_t vmlsdavxq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsdavxq_s8))) +int32_t vmlsdavxq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) +int64_t vmlsldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) +int64_t vmlsldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) +int64_t vmlsldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) +int64_t vmlsldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s16))) +int64_t vmlsldavaq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s16))) +int64_t vmlsldavaq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s32))) +int64_t vmlsldavaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaq_s32))) +int64_t vmlsldavaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) +int64_t vmlsldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) +int64_t vmlsldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) +int64_t vmlsldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) +int64_t vmlsldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s16))) +int64_t vmlsldavaxq_s16(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s16))) +int64_t vmlsldavaxq(int64_t, int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s32))) +int64_t vmlsldavaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavaxq_s32))) +int64_t vmlsldavaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s16))) +int64_t vmlsldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s16))) +int64_t vmlsldavq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s32))) +int64_t vmlsldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_p_s32))) +int64_t vmlsldavq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s16))) +int64_t vmlsldavq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s16))) +int64_t vmlsldavq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s32))) +int64_t vmlsldavq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavq_s32))) +int64_t vmlsldavq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) +int64_t vmlsldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) +int64_t vmlsldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) +int64_t vmlsldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) +int64_t vmlsldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s16))) +int64_t vmlsldavxq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s16))) +int64_t vmlsldavxq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s32))) +int64_t vmlsldavxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmlsldavxq_s32))) +int64_t vmlsldavxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s16))) +int16x8_t vmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s16))) +int16x8_t vmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s32))) +int32x4_t vmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s32))) +int32x4_t vmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s8))) +int8x16_t vmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_s8))) +int8x16_t vmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u16))) +uint16x8_t vmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u16))) +uint16x8_t vmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u32))) +uint32x4_t vmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u32))) +uint32x4_t vmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u8))) +uint8x16_t vmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_m_u8))) +uint8x16_t vmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s16))) +int16x8_t vmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s16))) +int16x8_t vmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s32))) +int32x4_t vmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s32))) +int32x4_t vmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s8))) +int8x16_t vmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_s8))) +int8x16_t vmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u16))) +uint16x8_t vmulhq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u16))) +uint16x8_t vmulhq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u32))) +uint32x4_t vmulhq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u32))) +uint32x4_t vmulhq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u8))) +uint8x16_t vmulhq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_u8))) +uint8x16_t vmulhq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s16))) +int16x8_t vmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s16))) +int16x8_t vmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s32))) +int32x4_t vmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s32))) +int32x4_t vmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s8))) +int8x16_t vmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_s8))) +int8x16_t vmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u16))) +uint16x8_t vmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u16))) +uint16x8_t vmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u32))) +uint32x4_t vmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u32))) +uint32x4_t vmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u8))) +uint8x16_t vmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulhq_x_u8))) +uint8x16_t vmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s16))) +int32x4_t vmullbq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s16))) +int32x4_t vmullbq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s32))) +int64x2_t vmullbq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s32))) +int64x2_t vmullbq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s8))) +int16x8_t vmullbq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_s8))) +int16x8_t vmullbq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u16))) +uint32x4_t vmullbq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u16))) +uint32x4_t vmullbq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u32))) +uint64x2_t vmullbq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u32))) +uint64x2_t vmullbq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u8))) +uint16x8_t vmullbq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_m_u8))) +uint16x8_t vmullbq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s16))) +int32x4_t vmullbq_int_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s16))) +int32x4_t vmullbq_int(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s32))) +int64x2_t vmullbq_int_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s32))) +int64x2_t vmullbq_int(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s8))) +int16x8_t vmullbq_int_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_s8))) +int16x8_t vmullbq_int(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u16))) +uint32x4_t vmullbq_int_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u16))) +uint32x4_t vmullbq_int(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u32))) +uint64x2_t vmullbq_int_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u32))) +uint64x2_t vmullbq_int(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u8))) +uint16x8_t vmullbq_int_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_u8))) +uint16x8_t vmullbq_int(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s16))) +int32x4_t vmullbq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s16))) +int32x4_t vmullbq_int_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s32))) +int64x2_t vmullbq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s32))) +int64x2_t vmullbq_int_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s8))) +int16x8_t vmullbq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_s8))) +int16x8_t vmullbq_int_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u16))) +uint32x4_t vmullbq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u16))) +uint32x4_t vmullbq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u32))) +uint64x2_t vmullbq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u32))) +uint64x2_t vmullbq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u8))) +uint16x8_t vmullbq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_int_x_u8))) +uint16x8_t vmullbq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) +uint32x4_t vmullbq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) +uint32x4_t vmullbq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) +uint16x8_t vmullbq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) +uint16x8_t vmullbq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p16))) +uint32x4_t vmullbq_poly_p16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p16))) +uint32x4_t vmullbq_poly(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p8))) +uint16x8_t vmullbq_poly_p8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_p8))) +uint16x8_t vmullbq_poly(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) +uint32x4_t vmullbq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) +uint32x4_t vmullbq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) +uint16x8_t vmullbq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) +uint16x8_t vmullbq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s16))) +int32x4_t vmulltq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s16))) +int32x4_t vmulltq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s32))) +int64x2_t vmulltq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s32))) +int64x2_t vmulltq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s8))) +int16x8_t vmulltq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_s8))) +int16x8_t vmulltq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u16))) +uint32x4_t vmulltq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u16))) +uint32x4_t vmulltq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u32))) +uint64x2_t vmulltq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u32))) +uint64x2_t vmulltq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u8))) +uint16x8_t vmulltq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_m_u8))) +uint16x8_t vmulltq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s16))) +int32x4_t vmulltq_int_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s16))) +int32x4_t vmulltq_int(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s32))) +int64x2_t vmulltq_int_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s32))) +int64x2_t vmulltq_int(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s8))) +int16x8_t vmulltq_int_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_s8))) +int16x8_t vmulltq_int(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u16))) +uint32x4_t vmulltq_int_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u16))) +uint32x4_t vmulltq_int(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u32))) +uint64x2_t vmulltq_int_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u32))) +uint64x2_t vmulltq_int(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u8))) +uint16x8_t vmulltq_int_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_u8))) +uint16x8_t vmulltq_int(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s16))) +int32x4_t vmulltq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s16))) +int32x4_t vmulltq_int_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s32))) +int64x2_t vmulltq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s32))) +int64x2_t vmulltq_int_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s8))) +int16x8_t vmulltq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_s8))) +int16x8_t vmulltq_int_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u16))) +uint32x4_t vmulltq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u16))) +uint32x4_t vmulltq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u32))) +uint64x2_t vmulltq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u32))) +uint64x2_t vmulltq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u8))) +uint16x8_t vmulltq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_int_x_u8))) +uint16x8_t vmulltq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) +uint32x4_t vmulltq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) +uint32x4_t vmulltq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) +uint16x8_t vmulltq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) +uint16x8_t vmulltq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p16))) +uint32x4_t vmulltq_poly_p16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p16))) +uint32x4_t vmulltq_poly(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p8))) +uint16x8_t vmulltq_poly_p8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_p8))) +uint16x8_t vmulltq_poly(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) +uint32x4_t vmulltq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) +uint32x4_t vmulltq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) +uint16x8_t vmulltq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) +uint16x8_t vmulltq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s16))) +int16x8_t vmulq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s16))) +int16x8_t vmulq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s32))) +int32x4_t vmulq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s32))) +int32x4_t vmulq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s8))) +int8x16_t vmulq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_s8))) +int8x16_t vmulq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u16))) +uint16x8_t vmulq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u16))) +uint16x8_t vmulq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u32))) +uint32x4_t vmulq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u32))) +uint32x4_t vmulq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u8))) +uint8x16_t vmulq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_u8))) +uint8x16_t vmulq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_s16))) +int16x8_t vmulq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_s16))) +int16x8_t vmulq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_s32))) +int32x4_t vmulq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_s32))) +int32x4_t vmulq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_s8))) +int8x16_t vmulq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_s8))) +int8x16_t vmulq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_u16))) +uint16x8_t vmulq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_u16))) +uint16x8_t vmulq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_u32))) +uint32x4_t vmulq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_u32))) +uint32x4_t vmulq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_u8))) +uint8x16_t vmulq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_u8))) +uint8x16_t vmulq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s16))) +int16x8_t vmulq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s16))) +int16x8_t vmulq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s32))) +int32x4_t vmulq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s32))) +int32x4_t vmulq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s8))) +int8x16_t vmulq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_s8))) +int8x16_t vmulq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u16))) +uint16x8_t vmulq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u16))) +uint16x8_t vmulq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u32))) +uint32x4_t vmulq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u32))) +uint32x4_t vmulq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u8))) +uint8x16_t vmulq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_u8))) +uint8x16_t vmulq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s16))) +int16x8_t vornq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s16))) +int16x8_t vornq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s32))) +int32x4_t vornq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s32))) +int32x4_t vornq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s8))) +int8x16_t vornq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_s8))) +int8x16_t vornq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u16))) +uint16x8_t vornq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u16))) +uint16x8_t vornq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u32))) +uint32x4_t vornq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u32))) +uint32x4_t vornq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u8))) +uint8x16_t vornq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_u8))) +uint8x16_t vornq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_s16))) +int16x8_t vornq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_s16))) +int16x8_t vornq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_s32))) +int32x4_t vornq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_s32))) +int32x4_t vornq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_s8))) +int8x16_t vornq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_s8))) +int8x16_t vornq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_u16))) +uint16x8_t vornq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_u16))) +uint16x8_t vornq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_u32))) +uint32x4_t vornq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_u32))) +uint32x4_t vornq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_u8))) +uint8x16_t vornq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_u8))) +uint8x16_t vornq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s16))) +int16x8_t vornq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s16))) +int16x8_t vornq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s32))) +int32x4_t vornq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s32))) +int32x4_t vornq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s8))) +int8x16_t vornq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_s8))) +int8x16_t vornq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u16))) +uint16x8_t vornq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u16))) +uint16x8_t vornq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u32))) +uint32x4_t vornq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u32))) +uint32x4_t vornq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u8))) +uint8x16_t vornq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_u8))) +uint8x16_t vornq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s16))) +int16x8_t vorrq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s16))) +int16x8_t vorrq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s32))) +int32x4_t vorrq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s32))) +int32x4_t vorrq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s8))) +int8x16_t vorrq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_s8))) +int8x16_t vorrq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u16))) +uint16x8_t vorrq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u16))) +uint16x8_t vorrq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u32))) +uint32x4_t vorrq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u32))) +uint32x4_t vorrq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u8))) +uint8x16_t vorrq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_u8))) +uint8x16_t vorrq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_s16))) +int16x8_t vorrq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_s16))) +int16x8_t vorrq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_s32))) +int32x4_t vorrq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_s32))) +int32x4_t vorrq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_s8))) +int8x16_t vorrq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_s8))) +int8x16_t vorrq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_u16))) +uint16x8_t vorrq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_u16))) +uint16x8_t vorrq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_u32))) +uint32x4_t vorrq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_u32))) +uint32x4_t vorrq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_u8))) +uint8x16_t vorrq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_u8))) +uint8x16_t vorrq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s16))) +int16x8_t vorrq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s16))) +int16x8_t vorrq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s32))) +int32x4_t vorrq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s32))) +int32x4_t vorrq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s8))) +int8x16_t vorrq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_s8))) +int8x16_t vorrq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u16))) +uint16x8_t vorrq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u16))) +uint16x8_t vorrq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u32))) +uint32x4_t vorrq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u32))) +uint32x4_t vorrq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u8))) +uint8x16_t vorrq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_u8))) +uint8x16_t vorrq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpnot))) +mve_pred16_t vpnot(mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s16))) +int16x8_t vpselq_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s16))) +int16x8_t vpselq(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s32))) +int32x4_t vpselq_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s32))) +int32x4_t vpselq(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s64))) +int64x2_t vpselq_s64(int64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s64))) +int64x2_t vpselq(int64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_s8))) +int8x16_t vpselq_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_s8))) +int8x16_t vpselq(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u16))) +uint16x8_t vpselq_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u16))) +uint16x8_t vpselq(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u32))) +uint32x4_t vpselq_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u32))) +uint32x4_t vpselq(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u64))) +uint64x2_t vpselq_u64(uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u64))) +uint64x2_t vpselq(uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_u8))) +uint8x16_t vpselq_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_u8))) +uint8x16_t vpselq(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s16))) +int16x8_t vqaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s16))) +int16x8_t vqaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s32))) +int32x4_t vqaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s32))) +int32x4_t vqaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s8))) +int8x16_t vqaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_s8))) +int8x16_t vqaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u16))) +uint16x8_t vqaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u16))) +uint16x8_t vqaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u32))) +uint32x4_t vqaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u32))) +uint32x4_t vqaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u8))) +uint8x16_t vqaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_m_u8))) +uint8x16_t vqaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s16))) +int16x8_t vqaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s16))) +int16x8_t vqaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s32))) +int32x4_t vqaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s32))) +int32x4_t vqaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s8))) +int8x16_t vqaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_s8))) +int8x16_t vqaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u16))) +uint16x8_t vqaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u16))) +uint16x8_t vqaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u32))) +uint32x4_t vqaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u32))) +uint32x4_t vqaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u8))) +uint8x16_t vqaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqaddq_u8))) +uint8x16_t vqaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s16))) +int16x8_t vqdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s16))) +int16x8_t vqdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s32))) +int32x4_t vqdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s32))) +int32x4_t vqdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s8))) +int8x16_t vqdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_m_s8))) +int8x16_t vqdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s16))) +int16x8_t vqdmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s16))) +int16x8_t vqdmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s32))) +int32x4_t vqdmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s32))) +int32x4_t vqdmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s8))) +int8x16_t vqdmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqdmulhq_s8))) +int8x16_t vqdmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) +int16x8_t vqrdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) +int16x8_t vqrdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) +int32x4_t vqrdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) +int32x4_t vqrdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) +int8x16_t vqrdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) +int8x16_t vqrdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s16))) +int16x8_t vqrdmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s16))) +int16x8_t vqrdmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s32))) +int32x4_t vqrdmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s32))) +int32x4_t vqrdmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s8))) +int8x16_t vqrdmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrdmulhq_s8))) +int8x16_t vqrdmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) +int16x8_t vqrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) +int16x8_t vqrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) +int32x4_t vqrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) +int32x4_t vqrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) +int8x16_t vqrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) +int8x16_t vqrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) +uint16x8_t vqrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) +uint16x8_t vqrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) +uint32x4_t vqrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) +uint32x4_t vqrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) +uint8x16_t vqrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) +uint8x16_t vqrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s16))) +int16x8_t vqrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s16))) +int16x8_t vqrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s32))) +int32x4_t vqrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s32))) +int32x4_t vqrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s8))) +int8x16_t vqrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_s8))) +int8x16_t vqrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u16))) +uint16x8_t vqrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u16))) +uint16x8_t vqrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u32))) +uint32x4_t vqrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u32))) +uint32x4_t vqrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u8))) +uint8x16_t vqrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_m_u8))) +uint8x16_t vqrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s16))) +int16x8_t vqrshlq_n_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s16))) +int16x8_t vqrshlq(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s32))) +int32x4_t vqrshlq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s32))) +int32x4_t vqrshlq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s8))) +int8x16_t vqrshlq_n_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_s8))) +int8x16_t vqrshlq(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u16))) +uint16x8_t vqrshlq_n_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u16))) +uint16x8_t vqrshlq(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u32))) +uint32x4_t vqrshlq_n_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u32))) +uint32x4_t vqrshlq(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u8))) +uint8x16_t vqrshlq_n_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_n_u8))) +uint8x16_t vqrshlq(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s16))) +int16x8_t vqrshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s16))) +int16x8_t vqrshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s32))) +int32x4_t vqrshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s32))) +int32x4_t vqrshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s8))) +int8x16_t vqrshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_s8))) +int8x16_t vqrshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u16))) +uint16x8_t vqrshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u16))) +uint16x8_t vqrshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u32))) +uint32x4_t vqrshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u32))) +uint32x4_t vqrshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u8))) +uint8x16_t vqrshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshlq_u8))) +uint8x16_t vqrshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) +int8x16_t vqrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) +int8x16_t vqrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) +int16x8_t vqrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) +int16x8_t vqrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) +uint8x16_t vqrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) +uint8x16_t vqrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) +uint16x8_t vqrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) +uint16x8_t vqrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) +int8x16_t vqrshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) +int8x16_t vqrshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) +int16x8_t vqrshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) +int16x8_t vqrshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) +uint8x16_t vqrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) +uint8x16_t vqrshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) +uint16x8_t vqrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) +uint16x8_t vqrshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) +int8x16_t vqrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) +int8x16_t vqrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) +int16x8_t vqrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) +int16x8_t vqrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) +uint8x16_t vqrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) +uint8x16_t vqrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) +uint16x8_t vqrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) +uint16x8_t vqrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s16))) +int8x16_t vqrshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s16))) +int8x16_t vqrshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s32))) +int16x8_t vqrshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_s32))) +int16x8_t vqrshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u16))) +uint8x16_t vqrshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u16))) +uint8x16_t vqrshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u32))) +uint16x8_t vqrshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrntq_n_u32))) +uint16x8_t vqrshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) +uint8x16_t vqrshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) +uint8x16_t vqrshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) +uint16x8_t vqrshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) +uint16x8_t vqrshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) +uint8x16_t vqrshrunbq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) +uint8x16_t vqrshrunbq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) +uint16x8_t vqrshrunbq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) +uint16x8_t vqrshrunbq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) +uint8x16_t vqrshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) +uint8x16_t vqrshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) +uint16x8_t vqrshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) +uint16x8_t vqrshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s16))) +uint8x16_t vqrshruntq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s16))) +uint8x16_t vqrshruntq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s32))) +uint16x8_t vqrshruntq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqrshruntq_n_s32))) +uint16x8_t vqrshruntq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s16))) +int16x8_t vqshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s16))) +int16x8_t vqshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s32))) +int32x4_t vqshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s32))) +int32x4_t vqshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s8))) +int8x16_t vqshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_s8))) +int8x16_t vqshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u16))) +uint16x8_t vqshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u16))) +uint16x8_t vqshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u32))) +uint32x4_t vqshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u32))) +uint32x4_t vqshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u8))) +uint8x16_t vqshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_n_u8))) +uint8x16_t vqshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s16))) +int16x8_t vqshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s16))) +int16x8_t vqshlq_m_r(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s32))) +int32x4_t vqshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s32))) +int32x4_t vqshlq_m_r(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s8))) +int8x16_t vqshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_s8))) +int8x16_t vqshlq_m_r(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u16))) +uint16x8_t vqshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u16))) +uint16x8_t vqshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u32))) +uint32x4_t vqshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u32))) +uint32x4_t vqshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u8))) +uint8x16_t vqshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_r_u8))) +uint8x16_t vqshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s16))) +int16x8_t vqshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s16))) +int16x8_t vqshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s32))) +int32x4_t vqshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s32))) +int32x4_t vqshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s8))) +int8x16_t vqshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_s8))) +int8x16_t vqshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u16))) +uint16x8_t vqshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u16))) +uint16x8_t vqshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u32))) +uint32x4_t vqshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u32))) +uint32x4_t vqshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u8))) +uint8x16_t vqshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_m_u8))) +uint8x16_t vqshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s16))) +int16x8_t vqshlq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s16))) +int16x8_t vqshlq_n(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s32))) +int32x4_t vqshlq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s32))) +int32x4_t vqshlq_n(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s8))) +int8x16_t vqshlq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_s8))) +int8x16_t vqshlq_n(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u16))) +uint16x8_t vqshlq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u16))) +uint16x8_t vqshlq_n(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u32))) +uint32x4_t vqshlq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u32))) +uint32x4_t vqshlq_n(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u8))) +uint8x16_t vqshlq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_n_u8))) +uint8x16_t vqshlq_n(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s16))) +int16x8_t vqshlq_r_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s16))) +int16x8_t vqshlq_r(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s32))) +int32x4_t vqshlq_r_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s32))) +int32x4_t vqshlq_r(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s8))) +int8x16_t vqshlq_r_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_s8))) +int8x16_t vqshlq_r(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u16))) +uint16x8_t vqshlq_r_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u16))) +uint16x8_t vqshlq_r(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u32))) +uint32x4_t vqshlq_r_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u32))) +uint32x4_t vqshlq_r(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u8))) +uint8x16_t vqshlq_r_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_r_u8))) +uint8x16_t vqshlq_r(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s16))) +int16x8_t vqshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s16))) +int16x8_t vqshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s32))) +int32x4_t vqshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s32))) +int32x4_t vqshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s8))) +int8x16_t vqshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_s8))) +int8x16_t vqshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u16))) +uint16x8_t vqshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u16))) +uint16x8_t vqshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u32))) +uint32x4_t vqshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u32))) +uint32x4_t vqshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u8))) +uint8x16_t vqshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshlq_u8))) +uint8x16_t vqshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s16))) +uint16x8_t vqshluq_m_n_s16(uint16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s16))) +uint16x8_t vqshluq_m(uint16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s32))) +uint32x4_t vqshluq_m_n_s32(uint32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s32))) +uint32x4_t vqshluq_m(uint32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s8))) +uint8x16_t vqshluq_m_n_s8(uint8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_m_n_s8))) +uint8x16_t vqshluq_m(uint8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s16))) +uint16x8_t vqshluq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s16))) +uint16x8_t vqshluq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s32))) +uint32x4_t vqshluq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s32))) +uint32x4_t vqshluq(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s8))) +uint8x16_t vqshluq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshluq_n_s8))) +uint8x16_t vqshluq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) +int8x16_t vqshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) +int8x16_t vqshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) +int16x8_t vqshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) +int16x8_t vqshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) +uint8x16_t vqshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) +uint8x16_t vqshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) +uint16x8_t vqshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) +uint16x8_t vqshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s16))) +int8x16_t vqshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s16))) +int8x16_t vqshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s32))) +int16x8_t vqshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_s32))) +int16x8_t vqshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u16))) +uint8x16_t vqshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u16))) +uint8x16_t vqshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u32))) +uint16x8_t vqshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrnbq_n_u32))) +uint16x8_t vqshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) +int8x16_t vqshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) +int8x16_t vqshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) +int16x8_t vqshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) +int16x8_t vqshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) +uint8x16_t vqshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) +uint8x16_t vqshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) +uint16x8_t vqshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) +uint16x8_t vqshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s16))) +int8x16_t vqshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s16))) +int8x16_t vqshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s32))) +int16x8_t vqshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_s32))) +int16x8_t vqshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u16))) +uint8x16_t vqshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u16))) +uint8x16_t vqshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u32))) +uint16x8_t vqshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrntq_n_u32))) +uint16x8_t vqshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) +uint8x16_t vqshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) +uint8x16_t vqshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) +uint16x8_t vqshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) +uint16x8_t vqshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s16))) +uint8x16_t vqshrunbq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s16))) +uint8x16_t vqshrunbq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s32))) +uint16x8_t vqshrunbq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshrunbq_n_s32))) +uint16x8_t vqshrunbq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) +uint8x16_t vqshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) +uint8x16_t vqshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) +uint16x8_t vqshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) +uint16x8_t vqshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s16))) +uint8x16_t vqshruntq_n_s16(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s16))) +uint8x16_t vqshruntq(uint8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s32))) +uint16x8_t vqshruntq_n_s32(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqshruntq_n_s32))) +uint16x8_t vqshruntq(uint16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s16))) +int16x8_t vqsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s16))) +int16x8_t vqsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s32))) +int32x4_t vqsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s32))) +int32x4_t vqsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s8))) +int8x16_t vqsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_s8))) +int8x16_t vqsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u16))) +uint16x8_t vqsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u16))) +uint16x8_t vqsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u32))) +uint32x4_t vqsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u32))) +uint32x4_t vqsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u8))) +uint8x16_t vqsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_m_u8))) +uint8x16_t vqsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s16))) +int16x8_t vqsubq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s16))) +int16x8_t vqsubq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s32))) +int32x4_t vqsubq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s32))) +int32x4_t vqsubq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s8))) +int8x16_t vqsubq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_s8))) +int8x16_t vqsubq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u16))) +uint16x8_t vqsubq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u16))) +uint16x8_t vqsubq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u32))) +uint32x4_t vqsubq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u32))) +uint32x4_t vqsubq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u8))) +uint8x16_t vqsubq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vqsubq_u8))) +uint8x16_t vqsubq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) +int16x8_t vreinterpretq_s16_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) +int16x8_t vreinterpretq_s16(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) +int16x8_t vreinterpretq_s16_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) +int16x8_t vreinterpretq_s16(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) +int16x8_t vreinterpretq_s16_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) +int16x8_t vreinterpretq_s16(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) +int16x8_t vreinterpretq_s16_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) +int16x8_t vreinterpretq_s16(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) +int16x8_t vreinterpretq_s16_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) +int16x8_t vreinterpretq_s16(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) +int16x8_t vreinterpretq_s16_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) +int16x8_t vreinterpretq_s16(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) +int16x8_t vreinterpretq_s16_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) +int16x8_t vreinterpretq_s16(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) +int32x4_t vreinterpretq_s32_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) +int32x4_t vreinterpretq_s32(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) +int32x4_t vreinterpretq_s32_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) +int32x4_t vreinterpretq_s32(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) +int32x4_t vreinterpretq_s32_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) +int32x4_t vreinterpretq_s32(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) +int32x4_t vreinterpretq_s32_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) +int32x4_t vreinterpretq_s32(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) +int32x4_t vreinterpretq_s32_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) +int32x4_t vreinterpretq_s32(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) +int32x4_t vreinterpretq_s32_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) +int32x4_t vreinterpretq_s32(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) +int32x4_t vreinterpretq_s32_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) +int32x4_t vreinterpretq_s32(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) +int64x2_t vreinterpretq_s64_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) +int64x2_t vreinterpretq_s64(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) +int64x2_t vreinterpretq_s64_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) +int64x2_t vreinterpretq_s64(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) +int64x2_t vreinterpretq_s64_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) +int64x2_t vreinterpretq_s64(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) +int64x2_t vreinterpretq_s64_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) +int64x2_t vreinterpretq_s64(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) +int64x2_t vreinterpretq_s64_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) +int64x2_t vreinterpretq_s64(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) +int64x2_t vreinterpretq_s64_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) +int64x2_t vreinterpretq_s64(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) +int64x2_t vreinterpretq_s64_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) +int64x2_t vreinterpretq_s64(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) +int8x16_t vreinterpretq_s8_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) +int8x16_t vreinterpretq_s8(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) +int8x16_t vreinterpretq_s8_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) +int8x16_t vreinterpretq_s8(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) +int8x16_t vreinterpretq_s8_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) +int8x16_t vreinterpretq_s8(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) +int8x16_t vreinterpretq_s8_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) +int8x16_t vreinterpretq_s8(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) +int8x16_t vreinterpretq_s8_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) +int8x16_t vreinterpretq_s8(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) +int8x16_t vreinterpretq_s8_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) +int8x16_t vreinterpretq_s8(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) +int8x16_t vreinterpretq_s8_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) +int8x16_t vreinterpretq_s8(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) +uint16x8_t vreinterpretq_u16_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) +uint16x8_t vreinterpretq_u16(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) +uint16x8_t vreinterpretq_u16_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) +uint16x8_t vreinterpretq_u16(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) +uint16x8_t vreinterpretq_u16_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) +uint16x8_t vreinterpretq_u16(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) +uint16x8_t vreinterpretq_u16_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) +uint16x8_t vreinterpretq_u16(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) +uint16x8_t vreinterpretq_u16_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) +uint16x8_t vreinterpretq_u16(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) +uint16x8_t vreinterpretq_u16_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) +uint16x8_t vreinterpretq_u16(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) +uint16x8_t vreinterpretq_u16_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) +uint16x8_t vreinterpretq_u16(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) +uint32x4_t vreinterpretq_u32_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) +uint32x4_t vreinterpretq_u32(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) +uint32x4_t vreinterpretq_u32_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) +uint32x4_t vreinterpretq_u32(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) +uint32x4_t vreinterpretq_u32_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) +uint32x4_t vreinterpretq_u32(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) +uint32x4_t vreinterpretq_u32_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) +uint32x4_t vreinterpretq_u32(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) +uint32x4_t vreinterpretq_u32_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) +uint32x4_t vreinterpretq_u32(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) +uint32x4_t vreinterpretq_u32_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) +uint32x4_t vreinterpretq_u32(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) +uint32x4_t vreinterpretq_u32_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) +uint32x4_t vreinterpretq_u32(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) +uint64x2_t vreinterpretq_u64_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) +uint64x2_t vreinterpretq_u64(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) +uint64x2_t vreinterpretq_u64_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) +uint64x2_t vreinterpretq_u64(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) +uint64x2_t vreinterpretq_u64_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) +uint64x2_t vreinterpretq_u64(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) +uint64x2_t vreinterpretq_u64_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) +uint64x2_t vreinterpretq_u64(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) +uint64x2_t vreinterpretq_u64_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) +uint64x2_t vreinterpretq_u64(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) +uint64x2_t vreinterpretq_u64_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) +uint64x2_t vreinterpretq_u64(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) +uint64x2_t vreinterpretq_u64_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) +uint64x2_t vreinterpretq_u64(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) +uint8x16_t vreinterpretq_u8_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) +uint8x16_t vreinterpretq_u8(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) +uint8x16_t vreinterpretq_u8_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) +uint8x16_t vreinterpretq_u8(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) +uint8x16_t vreinterpretq_u8_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) +uint8x16_t vreinterpretq_u8(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) +uint8x16_t vreinterpretq_u8_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) +uint8x16_t vreinterpretq_u8(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) +uint8x16_t vreinterpretq_u8_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) +uint8x16_t vreinterpretq_u8(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) +uint8x16_t vreinterpretq_u8_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) +uint8x16_t vreinterpretq_u8(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) +uint8x16_t vreinterpretq_u8_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) +uint8x16_t vreinterpretq_u8(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s16))) +int16x8_t vrhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s16))) +int16x8_t vrhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s32))) +int32x4_t vrhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s32))) +int32x4_t vrhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s8))) +int8x16_t vrhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_s8))) +int8x16_t vrhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u16))) +uint16x8_t vrhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u16))) +uint16x8_t vrhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u32))) +uint32x4_t vrhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u32))) +uint32x4_t vrhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u8))) +uint8x16_t vrhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_m_u8))) +uint8x16_t vrhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s16))) +int16x8_t vrhaddq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s16))) +int16x8_t vrhaddq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s32))) +int32x4_t vrhaddq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s32))) +int32x4_t vrhaddq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s8))) +int8x16_t vrhaddq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_s8))) +int8x16_t vrhaddq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u16))) +uint16x8_t vrhaddq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u16))) +uint16x8_t vrhaddq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u32))) +uint32x4_t vrhaddq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u32))) +uint32x4_t vrhaddq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u8))) +uint8x16_t vrhaddq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_u8))) +uint8x16_t vrhaddq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s16))) +int16x8_t vrhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s16))) +int16x8_t vrhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s32))) +int32x4_t vrhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s32))) +int32x4_t vrhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s8))) +int8x16_t vrhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_s8))) +int8x16_t vrhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u16))) +uint16x8_t vrhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u16))) +uint16x8_t vrhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u32))) +uint32x4_t vrhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u32))) +uint32x4_t vrhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u8))) +uint8x16_t vrhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrhaddq_x_u8))) +uint8x16_t vrhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) +int64_t vrmlaldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) +int64_t vrmlaldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) +uint64_t vrmlaldavhaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) +uint64_t vrmlaldavhaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) +int64_t vrmlaldavhaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) +int64_t vrmlaldavhaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) +uint64_t vrmlaldavhaq_u32(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) +uint64_t vrmlaldavhaq(uint64_t, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) +int64_t vrmlaldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) +int64_t vrmlaldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) +int64_t vrmlaldavhaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) +int64_t vrmlaldavhaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) +int64_t vrmlaldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) +int64_t vrmlaldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) +uint64_t vrmlaldavhq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) +uint64_t vrmlaldavhq_p(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_s32))) +int64_t vrmlaldavhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_s32))) +int64_t vrmlaldavhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_u32))) +uint64_t vrmlaldavhq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhq_u32))) +uint64_t vrmlaldavhq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) +int64_t vrmlaldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) +int64_t vrmlaldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) +int64_t vrmlaldavhxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) +int64_t vrmlaldavhxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) +int64_t vrmlsldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) +int64_t vrmlsldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) +int64_t vrmlsldavhaq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) +int64_t vrmlsldavhaq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) +int64_t vrmlsldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) +int64_t vrmlsldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) +int64_t vrmlsldavhaxq_s32(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) +int64_t vrmlsldavhaxq(int64_t, int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) +int64_t vrmlsldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) +int64_t vrmlsldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_s32))) +int64_t vrmlsldavhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhq_s32))) +int64_t vrmlsldavhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) +int64_t vrmlsldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) +int64_t vrmlsldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) +int64_t vrmlsldavhxq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) +int64_t vrmlsldavhxq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s16))) +int16x8_t vrmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s16))) +int16x8_t vrmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s32))) +int32x4_t vrmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s32))) +int32x4_t vrmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s8))) +int8x16_t vrmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_s8))) +int8x16_t vrmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u16))) +uint16x8_t vrmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u16))) +uint16x8_t vrmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u32))) +uint32x4_t vrmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u32))) +uint32x4_t vrmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u8))) +uint8x16_t vrmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_m_u8))) +uint8x16_t vrmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s16))) +int16x8_t vrmulhq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s16))) +int16x8_t vrmulhq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s32))) +int32x4_t vrmulhq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s32))) +int32x4_t vrmulhq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s8))) +int8x16_t vrmulhq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_s8))) +int8x16_t vrmulhq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u16))) +uint16x8_t vrmulhq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u16))) +uint16x8_t vrmulhq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u32))) +uint32x4_t vrmulhq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u32))) +uint32x4_t vrmulhq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u8))) +uint8x16_t vrmulhq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_u8))) +uint8x16_t vrmulhq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s16))) +int16x8_t vrmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s16))) +int16x8_t vrmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s32))) +int32x4_t vrmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s32))) +int32x4_t vrmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s8))) +int8x16_t vrmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_s8))) +int8x16_t vrmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u16))) +uint16x8_t vrmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u16))) +uint16x8_t vrmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u32))) +uint32x4_t vrmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u32))) +uint32x4_t vrmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u8))) +uint8x16_t vrmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrmulhq_x_u8))) +uint8x16_t vrmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s16))) +int16x8_t vrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s16))) +int16x8_t vrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s32))) +int32x4_t vrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s32))) +int32x4_t vrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s8))) +int8x16_t vrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_s8))) +int8x16_t vrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u16))) +uint16x8_t vrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u16))) +uint16x8_t vrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u32))) +uint32x4_t vrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u32))) +uint32x4_t vrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u8))) +uint8x16_t vrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_n_u8))) +uint8x16_t vrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s16))) +int16x8_t vrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s16))) +int16x8_t vrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s32))) +int32x4_t vrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s32))) +int32x4_t vrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s8))) +int8x16_t vrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_s8))) +int8x16_t vrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u16))) +uint16x8_t vrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u16))) +uint16x8_t vrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u32))) +uint32x4_t vrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u32))) +uint32x4_t vrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u8))) +uint8x16_t vrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_m_u8))) +uint8x16_t vrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s16))) +int16x8_t vrshlq_n_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s16))) +int16x8_t vrshlq(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s32))) +int32x4_t vrshlq_n_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s32))) +int32x4_t vrshlq(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s8))) +int8x16_t vrshlq_n_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_s8))) +int8x16_t vrshlq(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u16))) +uint16x8_t vrshlq_n_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u16))) +uint16x8_t vrshlq(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u32))) +uint32x4_t vrshlq_n_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u32))) +uint32x4_t vrshlq(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u8))) +uint8x16_t vrshlq_n_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_n_u8))) +uint8x16_t vrshlq(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s16))) +int16x8_t vrshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s16))) +int16x8_t vrshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s32))) +int32x4_t vrshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s32))) +int32x4_t vrshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s8))) +int8x16_t vrshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_s8))) +int8x16_t vrshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u16))) +uint16x8_t vrshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u16))) +uint16x8_t vrshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u32))) +uint32x4_t vrshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u32))) +uint32x4_t vrshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u8))) +uint8x16_t vrshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_u8))) +uint8x16_t vrshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s16))) +int16x8_t vrshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s16))) +int16x8_t vrshlq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s32))) +int32x4_t vrshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s32))) +int32x4_t vrshlq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s8))) +int8x16_t vrshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_s8))) +int8x16_t vrshlq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u16))) +uint16x8_t vrshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u16))) +uint16x8_t vrshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u32))) +uint32x4_t vrshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u32))) +uint32x4_t vrshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u8))) +uint8x16_t vrshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshlq_x_u8))) +uint8x16_t vrshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) +int8x16_t vrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) +int8x16_t vrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) +int16x8_t vrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) +int16x8_t vrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) +uint8x16_t vrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) +uint8x16_t vrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) +uint16x8_t vrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) +uint16x8_t vrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s16))) +int8x16_t vrshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s16))) +int8x16_t vrshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s32))) +int16x8_t vrshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_s32))) +int16x8_t vrshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u16))) +uint8x16_t vrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u16))) +uint8x16_t vrshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u32))) +uint16x8_t vrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrnbq_n_u32))) +uint16x8_t vrshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) +int8x16_t vrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) +int8x16_t vrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) +int16x8_t vrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) +int16x8_t vrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) +uint8x16_t vrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) +uint8x16_t vrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) +uint16x8_t vrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) +uint16x8_t vrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s16))) +int8x16_t vrshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s16))) +int8x16_t vrshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s32))) +int16x8_t vrshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_s32))) +int16x8_t vrshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u16))) +uint8x16_t vrshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u16))) +uint8x16_t vrshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u32))) +uint16x8_t vrshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrntq_n_u32))) +uint16x8_t vrshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s16))) +int16x8_t vrshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s16))) +int16x8_t vrshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s32))) +int32x4_t vrshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s32))) +int32x4_t vrshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s8))) +int8x16_t vrshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_s8))) +int8x16_t vrshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u16))) +uint16x8_t vrshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u16))) +uint16x8_t vrshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u32))) +uint32x4_t vrshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u32))) +uint32x4_t vrshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u8))) +uint8x16_t vrshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_m_n_u8))) +uint8x16_t vrshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s16))) +int16x8_t vrshrq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s16))) +int16x8_t vrshrq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s32))) +int32x4_t vrshrq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s32))) +int32x4_t vrshrq(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s8))) +int8x16_t vrshrq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_s8))) +int8x16_t vrshrq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u16))) +uint16x8_t vrshrq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u16))) +uint16x8_t vrshrq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u32))) +uint32x4_t vrshrq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u32))) +uint32x4_t vrshrq(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u8))) +uint8x16_t vrshrq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_n_u8))) +uint8x16_t vrshrq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s16))) +int16x8_t vrshrq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s16))) +int16x8_t vrshrq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s32))) +int32x4_t vrshrq_x_n_s32(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s32))) +int32x4_t vrshrq_x(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s8))) +int8x16_t vrshrq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_s8))) +int8x16_t vrshrq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u16))) +uint16x8_t vrshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u16))) +uint16x8_t vrshrq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u32))) +uint32x4_t vrshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u32))) +uint32x4_t vrshrq_x(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u8))) +uint8x16_t vrshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vrshrq_x_n_u8))) +uint8x16_t vrshrq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s16))) +int16x8_t vsetq_lane_s16(int16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s16))) +int16x8_t vsetq_lane(int16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s32))) +int32x4_t vsetq_lane_s32(int32_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s32))) +int32x4_t vsetq_lane(int32_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s64))) +int64x2_t vsetq_lane_s64(int64_t, int64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s64))) +int64x2_t vsetq_lane(int64_t, int64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s8))) +int8x16_t vsetq_lane_s8(int8_t, int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_s8))) +int8x16_t vsetq_lane(int8_t, int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u16))) +uint16x8_t vsetq_lane_u16(uint16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u16))) +uint16x8_t vsetq_lane(uint16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u32))) +uint32x4_t vsetq_lane_u32(uint32_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u32))) +uint32x4_t vsetq_lane(uint32_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u64))) +uint64x2_t vsetq_lane_u64(uint64_t, uint64x2_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u64))) +uint64x2_t vsetq_lane(uint64_t, uint64x2_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u8))) +uint8x16_t vsetq_lane_u8(uint8_t, uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_u8))) +uint8x16_t vsetq_lane(uint8_t, uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s16))) +int32x4_t vshllbq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s16))) +int32x4_t vshllbq_m(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s8))) +int16x8_t vshllbq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_s8))) +int16x8_t vshllbq_m(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u16))) +uint32x4_t vshllbq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u16))) +uint32x4_t vshllbq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u8))) +uint16x8_t vshllbq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_m_n_u8))) +uint16x8_t vshllbq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s16))) +int32x4_t vshllbq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s16))) +int32x4_t vshllbq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s8))) +int16x8_t vshllbq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_s8))) +int16x8_t vshllbq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u16))) +uint32x4_t vshllbq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u16))) +uint32x4_t vshllbq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u8))) +uint16x8_t vshllbq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_n_u8))) +uint16x8_t vshllbq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s16))) +int32x4_t vshllbq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s16))) +int32x4_t vshllbq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s8))) +int16x8_t vshllbq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_s8))) +int16x8_t vshllbq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u16))) +uint32x4_t vshllbq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u16))) +uint32x4_t vshllbq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u8))) +uint16x8_t vshllbq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshllbq_x_n_u8))) +uint16x8_t vshllbq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s16))) +int32x4_t vshlltq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s16))) +int32x4_t vshlltq_m(int32x4_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s8))) +int16x8_t vshlltq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_s8))) +int16x8_t vshlltq_m(int16x8_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u16))) +uint32x4_t vshlltq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u16))) +uint32x4_t vshlltq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u8))) +uint16x8_t vshlltq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_m_n_u8))) +uint16x8_t vshlltq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s16))) +int32x4_t vshlltq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s16))) +int32x4_t vshlltq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s8))) +int16x8_t vshlltq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_s8))) +int16x8_t vshlltq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u16))) +uint32x4_t vshlltq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u16))) +uint32x4_t vshlltq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u8))) +uint16x8_t vshlltq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_n_u8))) +uint16x8_t vshlltq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s16))) +int32x4_t vshlltq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s16))) +int32x4_t vshlltq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s8))) +int16x8_t vshlltq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_s8))) +int16x8_t vshlltq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u16))) +uint32x4_t vshlltq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u16))) +uint32x4_t vshlltq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u8))) +uint16x8_t vshlltq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlltq_x_n_u8))) +uint16x8_t vshlltq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s16))) +int16x8_t vshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s16))) +int16x8_t vshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s32))) +int32x4_t vshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s32))) +int32x4_t vshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s8))) +int8x16_t vshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_s8))) +int8x16_t vshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u16))) +uint16x8_t vshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u16))) +uint16x8_t vshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u32))) +uint32x4_t vshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u32))) +uint32x4_t vshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u8))) +uint8x16_t vshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_n_u8))) +uint8x16_t vshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s16))) +int16x8_t vshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s16))) +int16x8_t vshlq_m_r(int16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s32))) +int32x4_t vshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s32))) +int32x4_t vshlq_m_r(int32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s8))) +int8x16_t vshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_s8))) +int8x16_t vshlq_m_r(int8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u16))) +uint16x8_t vshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u16))) +uint16x8_t vshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u32))) +uint32x4_t vshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u32))) +uint32x4_t vshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u8))) +uint8x16_t vshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_r_u8))) +uint8x16_t vshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s16))) +int16x8_t vshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s16))) +int16x8_t vshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s32))) +int32x4_t vshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s32))) +int32x4_t vshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s8))) +int8x16_t vshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_s8))) +int8x16_t vshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u16))) +uint16x8_t vshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u16))) +uint16x8_t vshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u32))) +uint32x4_t vshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u32))) +uint32x4_t vshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u8))) +uint8x16_t vshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_m_u8))) +uint8x16_t vshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s16))) +int16x8_t vshlq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s16))) +int16x8_t vshlq_n(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s32))) +int32x4_t vshlq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s32))) +int32x4_t vshlq_n(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s8))) +int8x16_t vshlq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_s8))) +int8x16_t vshlq_n(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u16))) +uint16x8_t vshlq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u16))) +uint16x8_t vshlq_n(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u32))) +uint32x4_t vshlq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u32))) +uint32x4_t vshlq_n(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u8))) +uint8x16_t vshlq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_n_u8))) +uint8x16_t vshlq_n(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s16))) +int16x8_t vshlq_r_s16(int16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s16))) +int16x8_t vshlq_r(int16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s32))) +int32x4_t vshlq_r_s32(int32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s32))) +int32x4_t vshlq_r(int32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s8))) +int8x16_t vshlq_r_s8(int8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_s8))) +int8x16_t vshlq_r(int8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u16))) +uint16x8_t vshlq_r_u16(uint16x8_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u16))) +uint16x8_t vshlq_r(uint16x8_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u32))) +uint32x4_t vshlq_r_u32(uint32x4_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u32))) +uint32x4_t vshlq_r(uint32x4_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u8))) +uint8x16_t vshlq_r_u8(uint8x16_t, int32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_r_u8))) +uint8x16_t vshlq_r(uint8x16_t, int32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_s16))) +int16x8_t vshlq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_s16))) +int16x8_t vshlq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_s32))) +int32x4_t vshlq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_s32))) +int32x4_t vshlq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_s8))) +int8x16_t vshlq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_s8))) +int8x16_t vshlq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_u16))) +uint16x8_t vshlq_u16(uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_u16))) +uint16x8_t vshlq(uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_u32))) +uint32x4_t vshlq_u32(uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_u32))) +uint32x4_t vshlq(uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_u8))) +uint8x16_t vshlq_u8(uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_u8))) +uint8x16_t vshlq(uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s16))) +int16x8_t vshlq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s16))) +int16x8_t vshlq_x_n(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s32))) +int32x4_t vshlq_x_n_s32(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s32))) +int32x4_t vshlq_x_n(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s8))) +int8x16_t vshlq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_s8))) +int8x16_t vshlq_x_n(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u16))) +uint16x8_t vshlq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u16))) +uint16x8_t vshlq_x_n(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u32))) +uint32x4_t vshlq_x_n_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u32))) +uint32x4_t vshlq_x_n(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u8))) +uint8x16_t vshlq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_n_u8))) +uint8x16_t vshlq_x_n(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s16))) +int16x8_t vshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s16))) +int16x8_t vshlq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s32))) +int32x4_t vshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s32))) +int32x4_t vshlq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s8))) +int8x16_t vshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_s8))) +int8x16_t vshlq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u16))) +uint16x8_t vshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u16))) +uint16x8_t vshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u32))) +uint32x4_t vshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u32))) +uint32x4_t vshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u8))) +uint8x16_t vshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshlq_x_u8))) +uint8x16_t vshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) +int8x16_t vshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) +int8x16_t vshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) +int16x8_t vshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) +int16x8_t vshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) +uint8x16_t vshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) +uint8x16_t vshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) +uint16x8_t vshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) +uint16x8_t vshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s16))) +int8x16_t vshrnbq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s16))) +int8x16_t vshrnbq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s32))) +int16x8_t vshrnbq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_s32))) +int16x8_t vshrnbq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u16))) +uint8x16_t vshrnbq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u16))) +uint8x16_t vshrnbq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u32))) +uint16x8_t vshrnbq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrnbq_n_u32))) +uint16x8_t vshrnbq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s16))) +int8x16_t vshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s16))) +int8x16_t vshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s32))) +int16x8_t vshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_s32))) +int16x8_t vshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u16))) +uint8x16_t vshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u16))) +uint8x16_t vshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u32))) +uint16x8_t vshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_m_n_u32))) +uint16x8_t vshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s16))) +int8x16_t vshrntq_n_s16(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s16))) +int8x16_t vshrntq(int8x16_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s32))) +int16x8_t vshrntq_n_s32(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_s32))) +int16x8_t vshrntq(int16x8_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u16))) +uint8x16_t vshrntq_n_u16(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u16))) +uint8x16_t vshrntq(uint8x16_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u32))) +uint16x8_t vshrntq_n_u32(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrntq_n_u32))) +uint16x8_t vshrntq(uint16x8_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s16))) +int16x8_t vshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s16))) +int16x8_t vshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s32))) +int32x4_t vshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s32))) +int32x4_t vshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s8))) +int8x16_t vshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_s8))) +int8x16_t vshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u16))) +uint16x8_t vshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u16))) +uint16x8_t vshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u32))) +uint32x4_t vshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u32))) +uint32x4_t vshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u8))) +uint8x16_t vshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_m_n_u8))) +uint8x16_t vshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s16))) +int16x8_t vshrq_n_s16(int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s16))) +int16x8_t vshrq(int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s32))) +int32x4_t vshrq_n_s32(int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s32))) +int32x4_t vshrq(int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s8))) +int8x16_t vshrq_n_s8(int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_s8))) +int8x16_t vshrq(int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u16))) +uint16x8_t vshrq_n_u16(uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u16))) +uint16x8_t vshrq(uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u32))) +uint32x4_t vshrq_n_u32(uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u32))) +uint32x4_t vshrq(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u8))) +uint8x16_t vshrq_n_u8(uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_n_u8))) +uint8x16_t vshrq(uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s16))) +int16x8_t vshrq_x_n_s16(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s16))) +int16x8_t vshrq_x(int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s32))) +int32x4_t vshrq_x_n_s32(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s32))) +int32x4_t vshrq_x(int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s8))) +int8x16_t vshrq_x_n_s8(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_s8))) +int8x16_t vshrq_x(int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u16))) +uint16x8_t vshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u16))) +uint16x8_t vshrq_x(uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u32))) +uint32x4_t vshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u32))) +uint32x4_t vshrq_x(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u8))) +uint8x16_t vshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vshrq_x_n_u8))) +uint8x16_t vshrq_x(uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s16))) +int16x8_t vsliq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s16))) +int16x8_t vsliq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s32))) +int32x4_t vsliq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s32))) +int32x4_t vsliq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s8))) +int8x16_t vsliq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_s8))) +int8x16_t vsliq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u16))) +uint16x8_t vsliq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u16))) +uint16x8_t vsliq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u32))) +uint32x4_t vsliq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u32))) +uint32x4_t vsliq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u8))) +uint8x16_t vsliq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_m_n_u8))) +uint8x16_t vsliq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s16))) +int16x8_t vsliq_n_s16(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s16))) +int16x8_t vsliq(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s32))) +int32x4_t vsliq_n_s32(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s32))) +int32x4_t vsliq(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s8))) +int8x16_t vsliq_n_s8(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_s8))) +int8x16_t vsliq(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u16))) +uint16x8_t vsliq_n_u16(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u16))) +uint16x8_t vsliq(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u32))) +uint32x4_t vsliq_n_u32(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u32))) +uint32x4_t vsliq(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u8))) +uint8x16_t vsliq_n_u8(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsliq_n_u8))) +uint8x16_t vsliq(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s16))) +int16x8_t vsriq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s16))) +int16x8_t vsriq_m(int16x8_t, int16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s32))) +int32x4_t vsriq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s32))) +int32x4_t vsriq_m(int32x4_t, int32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s8))) +int8x16_t vsriq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_s8))) +int8x16_t vsriq_m(int8x16_t, int8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u16))) +uint16x8_t vsriq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u16))) +uint16x8_t vsriq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u32))) +uint32x4_t vsriq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u32))) +uint32x4_t vsriq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u8))) +uint8x16_t vsriq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_m_n_u8))) +uint8x16_t vsriq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s16))) +int16x8_t vsriq_n_s16(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s16))) +int16x8_t vsriq(int16x8_t, int16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s32))) +int32x4_t vsriq_n_s32(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s32))) +int32x4_t vsriq(int32x4_t, int32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s8))) +int8x16_t vsriq_n_s8(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_s8))) +int8x16_t vsriq(int8x16_t, int8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u16))) +uint16x8_t vsriq_n_u16(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u16))) +uint16x8_t vsriq(uint16x8_t, uint16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u32))) +uint32x4_t vsriq_n_u32(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u32))) +uint32x4_t vsriq(uint32x4_t, uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u8))) +uint8x16_t vsriq_n_u8(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsriq_n_u8))) +uint8x16_t vsriq(uint8x16_t, uint8x16_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s16))) +void vst1q_p_s16(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s16))) +void vst1q_p(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s32))) +void vst1q_p_s32(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s32))) +void vst1q_p(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s8))) +void vst1q_p_s8(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_s8))) +void vst1q_p(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u16))) +void vst1q_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u16))) +void vst1q_p(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u32))) +void vst1q_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u32))) +void vst1q_p(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u8))) +void vst1q_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_u8))) +void vst1q_p(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_s16))) +void vst1q_s16(int16_t *, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_s16))) +void vst1q(int16_t *, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_s32))) +void vst1q_s32(int32_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_s32))) +void vst1q(int32_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_s8))) +void vst1q_s8(int8_t *, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_s8))) +void vst1q(int8_t *, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_u16))) +void vst1q_u16(uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_u16))) +void vst1q(uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_u32))) +void vst1q_u32(uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_u32))) +void vst1q(uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_u8))) +void vst1q_u8(uint8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_u8))) +void vst1q(uint8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_s16))) +void vst2q_s16(int16_t *, int16x8x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_s16))) +void vst2q(int16_t *, int16x8x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_s32))) +void vst2q_s32(int32_t *, int32x4x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_s32))) +void vst2q(int32_t *, int32x4x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_s8))) +void vst2q_s8(int8_t *, int8x16x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_s8))) +void vst2q(int8_t *, int8x16x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_u16))) +void vst2q_u16(uint16_t *, uint16x8x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_u16))) +void vst2q(uint16_t *, uint16x8x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_u32))) +void vst2q_u32(uint32_t *, uint32x4x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_u32))) +void vst2q(uint32_t *, uint32x4x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_u8))) +void vst2q_u8(uint8_t *, uint8x16x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_u8))) +void vst2q(uint8_t *, uint8x16x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_s16))) +void vst4q_s16(int16_t *, int16x8x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_s16))) +void vst4q(int16_t *, int16x8x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_s32))) +void vst4q_s32(int32_t *, int32x4x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_s32))) +void vst4q(int32_t *, int32x4x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_s8))) +void vst4q_s8(int8_t *, int8x16x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_s8))) +void vst4q(int8_t *, int8x16x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_u16))) +void vst4q_u16(uint16_t *, uint16x8x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_u16))) +void vst4q(uint16_t *, uint16x8x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_u32))) +void vst4q_u32(uint32_t *, uint32x4x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_u32))) +void vst4q(uint32_t *, uint32x4x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_u8))) +void vst4q_u8(uint8_t *, uint8x16x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_u8))) +void vst4q(uint8_t *, uint8x16x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s16))) +void vstrbq_p_s16(int8_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s16))) +void vstrbq_p(int8_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s32))) +void vstrbq_p_s32(int8_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s32))) +void vstrbq_p(int8_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s8))) +void vstrbq_p_s8(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_s8))) +void vstrbq_p(int8_t *, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u16))) +void vstrbq_p_u16(uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u16))) +void vstrbq_p(uint8_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u32))) +void vstrbq_p_u32(uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u32))) +void vstrbq_p(uint8_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u8))) +void vstrbq_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_p_u8))) +void vstrbq_p(uint8_t *, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s16))) +void vstrbq_s16(int8_t *, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s16))) +void vstrbq(int8_t *, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s32))) +void vstrbq_s32(int8_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s32))) +void vstrbq(int8_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s8))) +void vstrbq_s8(int8_t *, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_s8))) +void vstrbq(int8_t *, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) +void vstrbq_scatter_offset_p_s16(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) +void vstrbq_scatter_offset_p(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) +void vstrbq_scatter_offset_p_s32(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) +void vstrbq_scatter_offset_p(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) +void vstrbq_scatter_offset_p_s8(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) +void vstrbq_scatter_offset_p(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) +void vstrbq_scatter_offset_p_u16(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) +void vstrbq_scatter_offset_p(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) +void vstrbq_scatter_offset_p_u32(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) +void vstrbq_scatter_offset_p(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) +void vstrbq_scatter_offset_p_u8(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) +void vstrbq_scatter_offset_p(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) +void vstrbq_scatter_offset_s16(int8_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) +void vstrbq_scatter_offset(int8_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) +void vstrbq_scatter_offset_s32(int8_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) +void vstrbq_scatter_offset(int8_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) +void vstrbq_scatter_offset_s8(int8_t *, uint8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) +void vstrbq_scatter_offset(int8_t *, uint8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) +void vstrbq_scatter_offset_u16(uint8_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) +void vstrbq_scatter_offset(uint8_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) +void vstrbq_scatter_offset_u32(uint8_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) +void vstrbq_scatter_offset(uint8_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) +void vstrbq_scatter_offset_u8(uint8_t *, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) +void vstrbq_scatter_offset(uint8_t *, uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u16))) +void vstrbq_u16(uint8_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u16))) +void vstrbq(uint8_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u32))) +void vstrbq_u32(uint8_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u32))) +void vstrbq(uint8_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u8))) +void vstrbq_u8(uint8_t *, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrbq_u8))) +void vstrbq(uint8_t *, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) +void vstrdq_scatter_base_p_s64(uint64x2_t, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) +void vstrdq_scatter_base_p(uint64x2_t, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) +void vstrdq_scatter_base_p_u64(uint64x2_t, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) +void vstrdq_scatter_base_p(uint64x2_t, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) +void vstrdq_scatter_base_s64(uint64x2_t, int, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) +void vstrdq_scatter_base(uint64x2_t, int, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) +void vstrdq_scatter_base_u64(uint64x2_t, int, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) +void vstrdq_scatter_base(uint64x2_t, int, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) +void vstrdq_scatter_base_wb_p_s64(uint64x2_t *, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) +void vstrdq_scatter_base_wb_p(uint64x2_t *, int, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) +void vstrdq_scatter_base_wb_p_u64(uint64x2_t *, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) +void vstrdq_scatter_base_wb_p(uint64x2_t *, int, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) +void vstrdq_scatter_base_wb_s64(uint64x2_t *, int, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) +void vstrdq_scatter_base_wb(uint64x2_t *, int, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) +void vstrdq_scatter_base_wb_u64(uint64x2_t *, int, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) +void vstrdq_scatter_base_wb(uint64x2_t *, int, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) +void vstrdq_scatter_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) +void vstrdq_scatter_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) +void vstrdq_scatter_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) +void vstrdq_scatter_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) +void vstrdq_scatter_offset_s64(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) +void vstrdq_scatter_offset(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) +void vstrdq_scatter_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) +void vstrdq_scatter_offset(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) +void vstrdq_scatter_shifted_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) +void vstrdq_scatter_shifted_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) +void vstrdq_scatter_shifted_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) +void vstrdq_scatter_shifted_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) +void vstrdq_scatter_shifted_offset_s64(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) +void vstrdq_scatter_shifted_offset(int64_t *, uint64x2_t, int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) +void vstrdq_scatter_shifted_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) +void vstrdq_scatter_shifted_offset(uint64_t *, uint64x2_t, uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s16))) +void vstrhq_p_s16(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s16))) +void vstrhq_p(int16_t *, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s32))) +void vstrhq_p_s32(int16_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_s32))) +void vstrhq_p(int16_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u16))) +void vstrhq_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u16))) +void vstrhq_p(uint16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u32))) +void vstrhq_p_u32(uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_u32))) +void vstrhq_p(uint16_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s16))) +void vstrhq_s16(int16_t *, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s16))) +void vstrhq(int16_t *, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s32))) +void vstrhq_s32(int16_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_s32))) +void vstrhq(int16_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) +void vstrhq_scatter_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) +void vstrhq_scatter_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) +void vstrhq_scatter_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) +void vstrhq_scatter_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) +void vstrhq_scatter_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) +void vstrhq_scatter_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) +void vstrhq_scatter_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) +void vstrhq_scatter_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) +void vstrhq_scatter_offset_s16(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) +void vstrhq_scatter_offset(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) +void vstrhq_scatter_offset_s32(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) +void vstrhq_scatter_offset(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) +void vstrhq_scatter_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) +void vstrhq_scatter_offset(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) +void vstrhq_scatter_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) +void vstrhq_scatter_offset(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) +void vstrhq_scatter_shifted_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) +void vstrhq_scatter_shifted_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) +void vstrhq_scatter_shifted_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) +void vstrhq_scatter_shifted_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) +void vstrhq_scatter_shifted_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) +void vstrhq_scatter_shifted_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) +void vstrhq_scatter_shifted_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) +void vstrhq_scatter_shifted_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) +void vstrhq_scatter_shifted_offset_s16(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) +void vstrhq_scatter_shifted_offset(int16_t *, uint16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) +void vstrhq_scatter_shifted_offset_s32(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) +void vstrhq_scatter_shifted_offset(int16_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) +void vstrhq_scatter_shifted_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) +void vstrhq_scatter_shifted_offset(uint16_t *, uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) +void vstrhq_scatter_shifted_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) +void vstrhq_scatter_shifted_offset(uint16_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u16))) +void vstrhq_u16(uint16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u16))) +void vstrhq(uint16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u32))) +void vstrhq_u32(uint16_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_u32))) +void vstrhq(uint16_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_s32))) +void vstrwq_p_s32(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_s32))) +void vstrwq_p(int32_t *, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_u32))) +void vstrwq_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_u32))) +void vstrwq_p(uint32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_s32))) +void vstrwq_s32(int32_t *, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_s32))) +void vstrwq(int32_t *, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) +void vstrwq_scatter_base_p_s32(uint32x4_t, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) +void vstrwq_scatter_base_p(uint32x4_t, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) +void vstrwq_scatter_base_p_u32(uint32x4_t, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) +void vstrwq_scatter_base_p(uint32x4_t, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) +void vstrwq_scatter_base_s32(uint32x4_t, int, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) +void vstrwq_scatter_base(uint32x4_t, int, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) +void vstrwq_scatter_base_u32(uint32x4_t, int, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) +void vstrwq_scatter_base(uint32x4_t, int, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) +void vstrwq_scatter_base_wb_p_s32(uint32x4_t *, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) +void vstrwq_scatter_base_wb_p(uint32x4_t *, int, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) +void vstrwq_scatter_base_wb_p_u32(uint32x4_t *, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) +void vstrwq_scatter_base_wb_p(uint32x4_t *, int, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) +void vstrwq_scatter_base_wb_s32(uint32x4_t *, int, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) +void vstrwq_scatter_base_wb(uint32x4_t *, int, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) +void vstrwq_scatter_base_wb_u32(uint32x4_t *, int, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) +void vstrwq_scatter_base_wb(uint32x4_t *, int, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) +void vstrwq_scatter_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) +void vstrwq_scatter_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) +void vstrwq_scatter_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) +void vstrwq_scatter_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) +void vstrwq_scatter_offset_s32(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) +void vstrwq_scatter_offset(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) +void vstrwq_scatter_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) +void vstrwq_scatter_offset(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) +void vstrwq_scatter_shifted_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) +void vstrwq_scatter_shifted_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) +void vstrwq_scatter_shifted_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) +void vstrwq_scatter_shifted_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) +void vstrwq_scatter_shifted_offset_s32(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) +void vstrwq_scatter_shifted_offset(int32_t *, uint32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) +void vstrwq_scatter_shifted_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) +void vstrwq_scatter_shifted_offset(uint32_t *, uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_u32))) +void vstrwq_u32(uint32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_u32))) +void vstrwq(uint32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s16))) +int16x8_t vsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s16))) +int16x8_t vsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s32))) +int32x4_t vsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s32))) +int32x4_t vsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s8))) +int8x16_t vsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_s8))) +int8x16_t vsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u16))) +uint16x8_t vsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u16))) +uint16x8_t vsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u32))) +uint32x4_t vsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u32))) +uint32x4_t vsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u8))) +uint8x16_t vsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_u8))) +uint8x16_t vsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_s16))) +int16x8_t vsubq_s16(int16x8_t, int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_s16))) +int16x8_t vsubq(int16x8_t, int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_s32))) +int32x4_t vsubq_s32(int32x4_t, int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_s32))) +int32x4_t vsubq(int32x4_t, int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_s8))) +int8x16_t vsubq_s8(int8x16_t, int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_s8))) +int8x16_t vsubq(int8x16_t, int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_u16))) +uint16x8_t vsubq_u16(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_u16))) +uint16x8_t vsubq(uint16x8_t, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_u32))) +uint32x4_t vsubq_u32(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_u32))) +uint32x4_t vsubq(uint32x4_t, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_u8))) +uint8x16_t vsubq_u8(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_u8))) +uint8x16_t vsubq(uint8x16_t, uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s16))) +int16x8_t vsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s16))) +int16x8_t vsubq_x(int16x8_t, int16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s32))) +int32x4_t vsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s32))) +int32x4_t vsubq_x(int32x4_t, int32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s8))) +int8x16_t vsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_s8))) +int8x16_t vsubq_x(int8x16_t, int8x16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u16))) +uint16x8_t vsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u16))) +uint16x8_t vsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u32))) +uint32x4_t vsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u32))) +uint32x4_t vsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u8))) +uint8x16_t vsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_u8))) +uint8x16_t vsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s16))) +int16x8_t vuninitializedq(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s32))) +int32x4_t vuninitializedq(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s64))) +int64x2_t vuninitializedq(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s8))) +int8x16_t vuninitializedq(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u16))) +uint16x8_t vuninitializedq(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u32))) +uint32x4_t vuninitializedq(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u64))) +uint64x2_t vuninitializedq(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u8))) +uint8x16_t vuninitializedq(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s16))) +int16x8_t vuninitializedq_s16(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s32))) +int32x4_t vuninitializedq_s32(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s64))) +int64x2_t vuninitializedq_s64(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_s8))) +int8x16_t vuninitializedq_s8(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u16))) +uint16x8_t vuninitializedq_u16(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u32))) +uint32x4_t vuninitializedq_u32(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u64))) +uint64x2_t vuninitializedq_u64(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_u8))) +uint8x16_t vuninitializedq_u8(); + +#endif /* (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) */ + +#if (__ARM_FEATURE_MVE & 2) && (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) + +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_f16))) +float16x8_t vabdq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_f16))) +float16x8_t vabdq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_f32))) +float32x4_t vabdq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_f32))) +float32x4_t vabdq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f16))) +float16x8_t vabdq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f16))) +float16x8_t vabdq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f32))) +float32x4_t vabdq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_m_f32))) +float32x4_t vabdq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f16))) +float16x8_t vabdq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f16))) +float16x8_t vabdq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f32))) +float32x4_t vabdq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vabdq_x_f32))) +float32x4_t vabdq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_f16))) +float16x8_t vaddq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_f16))) +float16x8_t vaddq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_f32))) +float32x4_t vaddq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_f32))) +float32x4_t vaddq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f16))) +float16x8_t vaddq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f16))) +float16x8_t vaddq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f32))) +float32x4_t vaddq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_m_f32))) +float32x4_t vaddq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f16))) +float16x8_t vaddq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f16))) +float16x8_t vaddq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f32))) +float32x4_t vaddq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vaddq_x_f32))) +float32x4_t vaddq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_f16))) +float16x8_t vandq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_f16))) +float16x8_t vandq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_f32))) +float32x4_t vandq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_f32))) +float32x4_t vandq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f16))) +float16x8_t vandq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f16))) +float16x8_t vandq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f32))) +float32x4_t vandq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_m_f32))) +float32x4_t vandq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f16))) +float16x8_t vandq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f16))) +float16x8_t vandq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f32))) +float32x4_t vandq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vandq_x_f32))) +float32x4_t vandq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_f16))) +float16x8_t vbicq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_f16))) +float16x8_t vbicq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_f32))) +float32x4_t vbicq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_f32))) +float32x4_t vbicq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f16))) +float16x8_t vbicq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f16))) +float16x8_t vbicq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f32))) +float32x4_t vbicq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_m_f32))) +float32x4_t vbicq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f16))) +float16x8_t vbicq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f16))) +float16x8_t vbicq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f32))) +float32x4_t vbicq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vbicq_x_f32))) +float32x4_t vbicq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f16))) +float16x8_t vcaddq_rot270_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f16))) +float16x8_t vcaddq_rot270(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f32))) +float32x4_t vcaddq_rot270_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_f32))) +float32x4_t vcaddq_rot270(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) +float16x8_t vcaddq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) +float16x8_t vcaddq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) +float32x4_t vcaddq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) +float32x4_t vcaddq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) +float16x8_t vcaddq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) +float16x8_t vcaddq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) +float32x4_t vcaddq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) +float32x4_t vcaddq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f16))) +float16x8_t vcaddq_rot90_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f16))) +float16x8_t vcaddq_rot90(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f32))) +float32x4_t vcaddq_rot90_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_f32))) +float32x4_t vcaddq_rot90(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) +float16x8_t vcaddq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) +float16x8_t vcaddq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) +float32x4_t vcaddq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) +float32x4_t vcaddq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) +float16x8_t vcaddq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) +float16x8_t vcaddq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) +float32x4_t vcaddq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) +float32x4_t vcaddq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f16))) +float16x8_t vcmlaq_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f16))) +float16x8_t vcmlaq(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f32))) +float32x4_t vcmlaq_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_f32))) +float32x4_t vcmlaq(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f16))) +float16x8_t vcmlaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f16))) +float16x8_t vcmlaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f32))) +float32x4_t vcmlaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_m_f32))) +float32x4_t vcmlaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) +float16x8_t vcmlaq_rot180_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) +float16x8_t vcmlaq_rot180(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) +float32x4_t vcmlaq_rot180_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) +float32x4_t vcmlaq_rot180(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) +float16x8_t vcmlaq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) +float16x8_t vcmlaq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) +float32x4_t vcmlaq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) +float32x4_t vcmlaq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) +float16x8_t vcmlaq_rot270_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) +float16x8_t vcmlaq_rot270(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) +float32x4_t vcmlaq_rot270_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) +float32x4_t vcmlaq_rot270(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) +float16x8_t vcmlaq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) +float16x8_t vcmlaq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) +float32x4_t vcmlaq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) +float32x4_t vcmlaq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) +float16x8_t vcmlaq_rot90_f16(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) +float16x8_t vcmlaq_rot90(float16x8_t, float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) +float32x4_t vcmlaq_rot90_f32(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) +float32x4_t vcmlaq_rot90(float32x4_t, float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) +float16x8_t vcmlaq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) +float16x8_t vcmlaq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) +float32x4_t vcmlaq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) +float32x4_t vcmlaq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f16))) +mve_pred16_t vcmpeqq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f16))) +mve_pred16_t vcmpeqq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f32))) +mve_pred16_t vcmpeqq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_f32))) +mve_pred16_t vcmpeqq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f16))) +mve_pred16_t vcmpeqq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f16))) +mve_pred16_t vcmpeqq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f32))) +mve_pred16_t vcmpeqq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_f32))) +mve_pred16_t vcmpeqq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) +mve_pred16_t vcmpeqq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) +mve_pred16_t vcmpeqq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) +mve_pred16_t vcmpeqq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) +mve_pred16_t vcmpeqq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f16))) +mve_pred16_t vcmpeqq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f16))) +mve_pred16_t vcmpeqq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f32))) +mve_pred16_t vcmpeqq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpeqq_n_f32))) +mve_pred16_t vcmpeqq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f16))) +mve_pred16_t vcmpgeq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f16))) +mve_pred16_t vcmpgeq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f32))) +mve_pred16_t vcmpgeq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_f32))) +mve_pred16_t vcmpgeq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f16))) +mve_pred16_t vcmpgeq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f16))) +mve_pred16_t vcmpgeq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f32))) +mve_pred16_t vcmpgeq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_f32))) +mve_pred16_t vcmpgeq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) +mve_pred16_t vcmpgeq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) +mve_pred16_t vcmpgeq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) +mve_pred16_t vcmpgeq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) +mve_pred16_t vcmpgeq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f16))) +mve_pred16_t vcmpgeq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f16))) +mve_pred16_t vcmpgeq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f32))) +mve_pred16_t vcmpgeq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgeq_n_f32))) +mve_pred16_t vcmpgeq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f16))) +mve_pred16_t vcmpgtq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f16))) +mve_pred16_t vcmpgtq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f32))) +mve_pred16_t vcmpgtq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_f32))) +mve_pred16_t vcmpgtq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f16))) +mve_pred16_t vcmpgtq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f16))) +mve_pred16_t vcmpgtq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f32))) +mve_pred16_t vcmpgtq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_f32))) +mve_pred16_t vcmpgtq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) +mve_pred16_t vcmpgtq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) +mve_pred16_t vcmpgtq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) +mve_pred16_t vcmpgtq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) +mve_pred16_t vcmpgtq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f16))) +mve_pred16_t vcmpgtq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f16))) +mve_pred16_t vcmpgtq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f32))) +mve_pred16_t vcmpgtq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpgtq_n_f32))) +mve_pred16_t vcmpgtq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f16))) +mve_pred16_t vcmpleq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f16))) +mve_pred16_t vcmpleq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f32))) +mve_pred16_t vcmpleq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_f32))) +mve_pred16_t vcmpleq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f16))) +mve_pred16_t vcmpleq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f16))) +mve_pred16_t vcmpleq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f32))) +mve_pred16_t vcmpleq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_f32))) +mve_pred16_t vcmpleq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) +mve_pred16_t vcmpleq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) +mve_pred16_t vcmpleq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) +mve_pred16_t vcmpleq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) +mve_pred16_t vcmpleq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f16))) +mve_pred16_t vcmpleq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f16))) +mve_pred16_t vcmpleq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f32))) +mve_pred16_t vcmpleq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpleq_n_f32))) +mve_pred16_t vcmpleq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f16))) +mve_pred16_t vcmpltq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f16))) +mve_pred16_t vcmpltq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f32))) +mve_pred16_t vcmpltq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_f32))) +mve_pred16_t vcmpltq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f16))) +mve_pred16_t vcmpltq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f16))) +mve_pred16_t vcmpltq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f32))) +mve_pred16_t vcmpltq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_f32))) +mve_pred16_t vcmpltq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) +mve_pred16_t vcmpltq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) +mve_pred16_t vcmpltq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) +mve_pred16_t vcmpltq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) +mve_pred16_t vcmpltq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f16))) +mve_pred16_t vcmpltq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f16))) +mve_pred16_t vcmpltq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f32))) +mve_pred16_t vcmpltq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpltq_n_f32))) +mve_pred16_t vcmpltq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f16))) +mve_pred16_t vcmpneq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f16))) +mve_pred16_t vcmpneq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f32))) +mve_pred16_t vcmpneq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_f32))) +mve_pred16_t vcmpneq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f16))) +mve_pred16_t vcmpneq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f16))) +mve_pred16_t vcmpneq_m(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f32))) +mve_pred16_t vcmpneq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_f32))) +mve_pred16_t vcmpneq_m(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) +mve_pred16_t vcmpneq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) +mve_pred16_t vcmpneq_m(float16x8_t, float16_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) +mve_pred16_t vcmpneq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) +mve_pred16_t vcmpneq_m(float32x4_t, float32_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f16))) +mve_pred16_t vcmpneq_n_f16(float16x8_t, float16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f16))) +mve_pred16_t vcmpneq(float16x8_t, float16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f32))) +mve_pred16_t vcmpneq_n_f32(float32x4_t, float32_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmpneq_n_f32))) +mve_pred16_t vcmpneq(float32x4_t, float32_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f16))) +float16x8_t vcmulq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f16))) +float16x8_t vcmulq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f32))) +float32x4_t vcmulq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_f32))) +float32x4_t vcmulq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f16))) +float16x8_t vcmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f16))) +float16x8_t vcmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f32))) +float32x4_t vcmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_m_f32))) +float32x4_t vcmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f16))) +float16x8_t vcmulq_rot180_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f16))) +float16x8_t vcmulq_rot180(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f32))) +float32x4_t vcmulq_rot180_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_f32))) +float32x4_t vcmulq_rot180(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) +float16x8_t vcmulq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) +float16x8_t vcmulq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) +float32x4_t vcmulq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) +float32x4_t vcmulq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) +float16x8_t vcmulq_rot180_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) +float16x8_t vcmulq_rot180_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) +float32x4_t vcmulq_rot180_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) +float32x4_t vcmulq_rot180_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f16))) +float16x8_t vcmulq_rot270_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f16))) +float16x8_t vcmulq_rot270(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f32))) +float32x4_t vcmulq_rot270_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_f32))) +float32x4_t vcmulq_rot270(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) +float16x8_t vcmulq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) +float16x8_t vcmulq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) +float32x4_t vcmulq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) +float32x4_t vcmulq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) +float16x8_t vcmulq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) +float16x8_t vcmulq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) +float32x4_t vcmulq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) +float32x4_t vcmulq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f16))) +float16x8_t vcmulq_rot90_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f16))) +float16x8_t vcmulq_rot90(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f32))) +float32x4_t vcmulq_rot90_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_f32))) +float32x4_t vcmulq_rot90(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) +float16x8_t vcmulq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) +float16x8_t vcmulq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) +float32x4_t vcmulq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) +float32x4_t vcmulq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) +float16x8_t vcmulq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) +float16x8_t vcmulq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) +float32x4_t vcmulq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) +float32x4_t vcmulq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f16))) +float16x8_t vcmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f16))) +float16x8_t vcmulq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f32))) +float32x4_t vcmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vcmulq_x_f32))) +float32x4_t vcmulq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_f16))) +float16x8_t vcreateq_f16(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcreateq_f32))) +float32x4_t vcreateq_f32(uint64_t, uint64_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvtbq_f16_f32))) +float16x8_t vcvtbq_f16_f32(float16x8_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvtbq_m_f16_f32))) +float16x8_t vcvtbq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvttq_f16_f32))) +float16x8_t vcvttq_f16_f32(float16x8_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vcvttq_m_f16_f32))) +float16x8_t vcvttq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_f16))) +float16x8_t veorq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_f16))) +float16x8_t veorq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_f32))) +float32x4_t veorq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_f32))) +float32x4_t veorq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f16))) +float16x8_t veorq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f16))) +float16x8_t veorq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f32))) +float32x4_t veorq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_m_f32))) +float32x4_t veorq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f16))) +float16x8_t veorq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f16))) +float16x8_t veorq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f32))) +float32x4_t veorq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_veorq_x_f32))) +float32x4_t veorq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f16))) +float16_t vgetq_lane_f16(float16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f16))) +float16_t vgetq_lane(float16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f32))) +float32_t vgetq_lane_f32(float32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vgetq_lane_f32))) +float32_t vgetq_lane(float32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_f16))) +float16x8_t vld1q_f16(const float16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_f16))) +float16x8_t vld1q(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_f32))) +float32x4_t vld1q_f32(const float32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_f32))) +float32x4_t vld1q(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f16))) +float16x8_t vld1q_z_f16(const float16_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f16))) +float16x8_t vld1q_z(const float16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f32))) +float32x4_t vld1q_z_f32(const float32_t *, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld1q_z_f32))) +float32x4_t vld1q_z(const float32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_f16))) +float16x8x2_t vld2q_f16(const float16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_f16))) +float16x8x2_t vld2q(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld2q_f32))) +float32x4x2_t vld2q_f32(const float32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld2q_f32))) +float32x4x2_t vld2q(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_f16))) +float16x8x4_t vld4q_f16(const float16_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_f16))) +float16x8x4_t vld4q(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vld4q_f32))) +float32x4x4_t vld4q_f32(const float32_t *); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vld4q_f32))) +float32x4x4_t vld4q(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_f16))) +float16x8_t vldrhq_f16(const float16_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) +float16x8_t vldrhq_gather_offset_f16(const float16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) +float16x8_t vldrhq_gather_offset(const float16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) +float16x8_t vldrhq_gather_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) +float16x8_t vldrhq_gather_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) +float16x8_t vldrhq_gather_shifted_offset_f16(const float16_t *, uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) +float16x8_t vldrhq_gather_shifted_offset(const float16_t *, uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) +float16x8_t vldrhq_gather_shifted_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) +float16x8_t vldrhq_gather_shifted_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrhq_z_f16))) +float16x8_t vldrhq_z_f16(const float16_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_f32))) +float32x4_t vldrwq_f32(const float32_t *); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_f32))) +float32x4_t vldrwq_gather_base_f32(uint32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_f32))) +float32x4_t vldrwq_gather_base_wb_f32(uint32x4_t *, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_f32))) +float32x4_t vldrwq_gather_base_wb_z_f32(uint32x4_t *, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_base_z_f32))) +float32x4_t vldrwq_gather_base_z_f32(uint32x4_t, int, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) +float32x4_t vldrwq_gather_offset_f32(const float32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) +float32x4_t vldrwq_gather_offset(const float32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) +float32x4_t vldrwq_gather_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) +float32x4_t vldrwq_gather_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) +float32x4_t vldrwq_gather_shifted_offset_f32(const float32_t *, uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) +float32x4_t vldrwq_gather_shifted_offset(const float32_t *, uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) +float32x4_t vldrwq_gather_shifted_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) +float32x4_t vldrwq_gather_shifted_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vldrwq_z_f32))) +float32x4_t vldrwq_z_f32(const float32_t *, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f16))) +float16x8_t vmaxnmq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f16))) +float16x8_t vmaxnmq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f32))) +float32x4_t vmaxnmq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_f32))) +float32x4_t vmaxnmq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f16))) +float16x8_t vmaxnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f16))) +float16x8_t vmaxnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f32))) +float32x4_t vmaxnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_m_f32))) +float32x4_t vmaxnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f16))) +float16x8_t vmaxnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f16))) +float16x8_t vmaxnmq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f32))) +float32x4_t vmaxnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmaxnmq_x_f32))) +float32x4_t vmaxnmq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f16))) +float16x8_t vminnmq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f16))) +float16x8_t vminnmq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f32))) +float32x4_t vminnmq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_f32))) +float32x4_t vminnmq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f16))) +float16x8_t vminnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f16))) +float16x8_t vminnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f32))) +float32x4_t vminnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_m_f32))) +float32x4_t vminnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f16))) +float16x8_t vminnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f16))) +float16x8_t vminnmq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f32))) +float32x4_t vminnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vminnmq_x_f32))) +float32x4_t vminnmq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_f16))) +float16x8_t vmulq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_f16))) +float16x8_t vmulq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_f32))) +float32x4_t vmulq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_f32))) +float32x4_t vmulq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f16))) +float16x8_t vmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f16))) +float16x8_t vmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f32))) +float32x4_t vmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_m_f32))) +float32x4_t vmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f16))) +float16x8_t vmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f16))) +float16x8_t vmulq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f32))) +float32x4_t vmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vmulq_x_f32))) +float32x4_t vmulq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_f16))) +float16x8_t vornq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_f16))) +float16x8_t vornq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_f32))) +float32x4_t vornq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_f32))) +float32x4_t vornq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f16))) +float16x8_t vornq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f16))) +float16x8_t vornq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f32))) +float32x4_t vornq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_m_f32))) +float32x4_t vornq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f16))) +float16x8_t vornq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f16))) +float16x8_t vornq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f32))) +float32x4_t vornq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vornq_x_f32))) +float32x4_t vornq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_f16))) +float16x8_t vorrq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_f16))) +float16x8_t vorrq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_f32))) +float32x4_t vorrq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_f32))) +float32x4_t vorrq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f16))) +float16x8_t vorrq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f16))) +float16x8_t vorrq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f32))) +float32x4_t vorrq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_m_f32))) +float32x4_t vorrq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f16))) +float16x8_t vorrq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f16))) +float16x8_t vorrq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f32))) +float32x4_t vorrq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vorrq_x_f32))) +float32x4_t vorrq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_f16))) +float16x8_t vpselq_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_f16))) +float16x8_t vpselq(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vpselq_f32))) +float32x4_t vpselq_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vpselq_f32))) +float32x4_t vpselq(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) +float16x8_t vreinterpretq_f16_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) +float16x8_t vreinterpretq_f16(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) +float16x8_t vreinterpretq_f16_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) +float16x8_t vreinterpretq_f16(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) +float16x8_t vreinterpretq_f16_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) +float16x8_t vreinterpretq_f16(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) +float16x8_t vreinterpretq_f16_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) +float16x8_t vreinterpretq_f16(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) +float16x8_t vreinterpretq_f16_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) +float16x8_t vreinterpretq_f16(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) +float16x8_t vreinterpretq_f16_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) +float16x8_t vreinterpretq_f16(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) +float16x8_t vreinterpretq_f16_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) +float16x8_t vreinterpretq_f16(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) +float16x8_t vreinterpretq_f16_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) +float16x8_t vreinterpretq_f16(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) +float16x8_t vreinterpretq_f16_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) +float16x8_t vreinterpretq_f16(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) +float32x4_t vreinterpretq_f32_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) +float32x4_t vreinterpretq_f32(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) +float32x4_t vreinterpretq_f32_s16(int16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) +float32x4_t vreinterpretq_f32(int16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) +float32x4_t vreinterpretq_f32_s32(int32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) +float32x4_t vreinterpretq_f32(int32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) +float32x4_t vreinterpretq_f32_s64(int64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) +float32x4_t vreinterpretq_f32(int64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) +float32x4_t vreinterpretq_f32_s8(int8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) +float32x4_t vreinterpretq_f32(int8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) +float32x4_t vreinterpretq_f32_u16(uint16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) +float32x4_t vreinterpretq_f32(uint16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) +float32x4_t vreinterpretq_f32_u32(uint32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) +float32x4_t vreinterpretq_f32(uint32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) +float32x4_t vreinterpretq_f32_u64(uint64x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) +float32x4_t vreinterpretq_f32(uint64x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) +float32x4_t vreinterpretq_f32_u8(uint8x16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) +float32x4_t vreinterpretq_f32(uint8x16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) +int16x8_t vreinterpretq_s16_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) +int16x8_t vreinterpretq_s16(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) +int16x8_t vreinterpretq_s16_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) +int16x8_t vreinterpretq_s16(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) +int32x4_t vreinterpretq_s32_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) +int32x4_t vreinterpretq_s32(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) +int32x4_t vreinterpretq_s32_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) +int32x4_t vreinterpretq_s32(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) +int64x2_t vreinterpretq_s64_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) +int64x2_t vreinterpretq_s64(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) +int64x2_t vreinterpretq_s64_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) +int64x2_t vreinterpretq_s64(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) +int8x16_t vreinterpretq_s8_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) +int8x16_t vreinterpretq_s8(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) +int8x16_t vreinterpretq_s8_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) +int8x16_t vreinterpretq_s8(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) +uint16x8_t vreinterpretq_u16_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) +uint16x8_t vreinterpretq_u16(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) +uint16x8_t vreinterpretq_u16_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) +uint16x8_t vreinterpretq_u16(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) +uint32x4_t vreinterpretq_u32_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) +uint32x4_t vreinterpretq_u32(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) +uint32x4_t vreinterpretq_u32_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) +uint32x4_t vreinterpretq_u32(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) +uint64x2_t vreinterpretq_u64_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) +uint64x2_t vreinterpretq_u64(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) +uint64x2_t vreinterpretq_u64_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) +uint64x2_t vreinterpretq_u64(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) +uint8x16_t vreinterpretq_u8_f16(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) +uint8x16_t vreinterpretq_u8(float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) +uint8x16_t vreinterpretq_u8_f32(float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) +uint8x16_t vreinterpretq_u8(float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f16))) +float16x8_t vsetq_lane_f16(float16_t, float16x8_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f16))) +float16x8_t vsetq_lane(float16_t, float16x8_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f32))) +float32x4_t vsetq_lane_f32(float32_t, float32x4_t, int); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsetq_lane_f32))) +float32x4_t vsetq_lane(float32_t, float32x4_t, int); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_f16))) +void vst1q_f16(float16_t *, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_f16))) +void vst1q(float16_t *, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_f32))) +void vst1q_f32(float32_t *, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_f32))) +void vst1q(float32_t *, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f16))) +void vst1q_p_f16(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f16))) +void vst1q_p(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f32))) +void vst1q_p_f32(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst1q_p_f32))) +void vst1q_p(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_f16))) +void vst2q_f16(float16_t *, float16x8x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_f16))) +void vst2q(float16_t *, float16x8x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst2q_f32))) +void vst2q_f32(float32_t *, float32x4x2_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst2q_f32))) +void vst2q(float32_t *, float32x4x2_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_f16))) +void vst4q_f16(float16_t *, float16x8x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_f16))) +void vst4q(float16_t *, float16x8x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vst4q_f32))) +void vst4q_f32(float32_t *, float32x4x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vst4q_f32))) +void vst4q(float32_t *, float32x4x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_f16))) +void vstrhq_f16(float16_t *, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_f16))) +void vstrhq(float16_t *, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_f16))) +void vstrhq_p_f16(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_p_f16))) +void vstrhq_p(float16_t *, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) +void vstrhq_scatter_offset_f16(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) +void vstrhq_scatter_offset(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) +void vstrhq_scatter_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) +void vstrhq_scatter_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) +void vstrhq_scatter_shifted_offset_f16(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) +void vstrhq_scatter_shifted_offset(float16_t *, uint16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) +void vstrhq_scatter_shifted_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) +void vstrhq_scatter_shifted_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_f32))) +void vstrwq_f32(float32_t *, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_f32))) +void vstrwq(float32_t *, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_f32))) +void vstrwq_p_f32(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_p_f32))) +void vstrwq_p(float32_t *, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) +void vstrwq_scatter_base_f32(uint32x4_t, int, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) +void vstrwq_scatter_base(uint32x4_t, int, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) +void vstrwq_scatter_base_p_f32(uint32x4_t, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) +void vstrwq_scatter_base_p(uint32x4_t, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) +void vstrwq_scatter_base_wb_f32(uint32x4_t *, int, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) +void vstrwq_scatter_base_wb(uint32x4_t *, int, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) +void vstrwq_scatter_base_wb_p_f32(uint32x4_t *, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) +void vstrwq_scatter_base_wb_p(uint32x4_t *, int, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) +void vstrwq_scatter_offset_f32(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) +void vstrwq_scatter_offset(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) +void vstrwq_scatter_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) +void vstrwq_scatter_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) +void vstrwq_scatter_shifted_offset_f32(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) +void vstrwq_scatter_shifted_offset(float32_t *, uint32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) +void vstrwq_scatter_shifted_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) +void vstrwq_scatter_shifted_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_f16))) +float16x8_t vsubq_f16(float16x8_t, float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_f16))) +float16x8_t vsubq(float16x8_t, float16x8_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_f32))) +float32x4_t vsubq_f32(float32x4_t, float32x4_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_f32))) +float32x4_t vsubq(float32x4_t, float32x4_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f16))) +float16x8_t vsubq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f16))) +float16x8_t vsubq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f32))) +float32x4_t vsubq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_m_f32))) +float32x4_t vsubq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f16))) +float16x8_t vsubq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f16))) +float16x8_t vsubq_x(float16x8_t, float16x8_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f32))) +float32x4_t vsubq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vsubq_x_f32))) +float32x4_t vsubq_x(float32x4_t, float32x4_t, mve_pred16_t); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_f16))) +float16x8_t vuninitializedq_f16(); +static __inline__ __attribute__((__clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_f32))) +float32x4_t vuninitializedq_f32(); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f16))) +float16x8_t vuninitializedq(float16x8_t); +static __inline__ __attribute__((overloadable, __clang_arm_mve_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f32))) +float32x4_t vuninitializedq(float32x4_t); + +#endif /* (__ARM_FEATURE_MVE & 2) && (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) */ + +#endif /* __ARM_MVE_H */ diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h index 694bdfc9ce..fd9573d271 100644 --- a/lib/include/arm_neon.h +++ b/lib/include/arm_neon.h @@ -1042,20 +1042,11 @@ __ai uint32x2_t vadd_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vadd_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 + __p1; return __ret; } -#else -__ai uint64x1_t vadd_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = __p0 + __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -1124,20 +1115,11 @@ __ai int32x2_t vadd_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vadd_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 + __p1; return __ret; } -#else -__ai int64x1_t vadd_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = __p0 + __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -1457,20 +1439,11 @@ __ai uint32x2_t vand_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vand_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 & __p1; return __ret; } -#else -__ai uint64x1_t vand_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = __p0 & __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vand_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -1522,20 +1495,11 @@ __ai int32x2_t vand_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vand_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 & __p1; return __ret; } -#else -__ai int64x1_t vand_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = __p0 & __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vand_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -1723,20 +1687,11 @@ __ai uint32x2_t vbic_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vbic_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 & ~__p1; return __ret; } -#else -__ai uint64x1_t vbic_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = __p0 & ~__p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vbic_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -1788,20 +1743,11 @@ __ai int32x2_t vbic_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vbic_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 & ~__p1; return __ret; } -#else -__ai int64x1_t vbic_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = __p0 & ~__p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vbic_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -2089,20 +2035,11 @@ __ai uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vbsl_u64(uint64x1_t __p0, uint64x1_t __p1, uint64x1_t __p2) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 19); return __ret; } -#else -__ai uint64x1_t vbsl_u64(uint64x1_t __p0, uint64x1_t __p1, uint64x1_t __p2) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; @@ -2175,20 +2112,11 @@ __ai int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vbsl_s64(uint64x1_t __p0, int64x1_t __p1, int64x1_t __p2) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 3); return __ret; } -#else -__ai int64x1_t vbsl_s64(uint64x1_t __p0, int64x1_t __p1, int64x1_t __p2) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; @@ -4191,174 +4119,78 @@ __ai int16x8_t __noswap_vcombine_s16(int16x4_t __p0, int16x4_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ -__ai poly8x8_t vcreate_p8(uint64_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#else -__ai poly8x8_t vcreate_p8(uint64_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly16x4_t vcreate_p16(uint64_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#else -__ai poly16x4_t vcreate_p16(uint64_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vcreate_u8(uint64_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#else -__ai uint8x8_t vcreate_u8(uint64_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vcreate_u32(uint64_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#else -__ai uint32x2_t vcreate_u32(uint64_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vcreate_u64(uint64_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#else -__ai uint64x1_t vcreate_u64(uint64_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vcreate_u16(uint64_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#else -__ai uint16x4_t vcreate_u16(uint64_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vcreate_s8(uint64_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#else -__ai int8x8_t vcreate_s8(uint64_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vcreate_f32(uint64_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#else -__ai float32x2_t vcreate_f32(uint64_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float16x4_t vcreate_f16(uint64_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#else -__ai float16x4_t vcreate_f16(uint64_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vcreate_s32(uint64_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#else -__ai int32x2_t vcreate_s32(uint64_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vcreate_s64(uint64_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#else -__ai int64x1_t vcreate_s64(uint64_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vcreate_s16(uint64_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#else -__ai int16x4_t vcreate_s16(uint64_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - +#define vcreate_p8(__p0) __extension__ ({ \ + poly8x8_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (poly8x8_t)(__promote); \ + __ret; \ +}) +#define vcreate_p16(__p0) __extension__ ({ \ + poly16x4_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (poly16x4_t)(__promote); \ + __ret; \ +}) +#define vcreate_u8(__p0) __extension__ ({ \ + uint8x8_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (uint8x8_t)(__promote); \ + __ret; \ +}) +#define vcreate_u32(__p0) __extension__ ({ \ + uint32x2_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (uint32x2_t)(__promote); \ + __ret; \ +}) +#define vcreate_u64(__p0) __extension__ ({ \ + uint64x1_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (uint64x1_t)(__promote); \ + __ret; \ +}) +#define vcreate_u16(__p0) __extension__ ({ \ + uint16x4_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (uint16x4_t)(__promote); \ + __ret; \ +}) +#define vcreate_s8(__p0) __extension__ ({ \ + int8x8_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (int8x8_t)(__promote); \ + __ret; \ +}) +#define vcreate_f32(__p0) __extension__ ({ \ + float32x2_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (float32x2_t)(__promote); \ + __ret; \ +}) +#define vcreate_f16(__p0) __extension__ ({ \ + float16x4_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (float16x4_t)(__promote); \ + __ret; \ +}) +#define vcreate_s32(__p0) __extension__ ({ \ + int32x2_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (int32x2_t)(__promote); \ + __ret; \ +}) +#define vcreate_s64(__p0) __extension__ ({ \ + int64x1_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (int64x1_t)(__promote); \ + __ret; \ +}) +#define vcreate_s16(__p0) __extension__ ({ \ + int16x4_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (int16x4_t)(__promote); \ + __ret; \ +}) #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvtq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; @@ -4899,22 +4731,12 @@ __ai uint32x2_t vcvt_u32_f32(float32x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vdup_lane_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __ret; \ __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) -#else -#define vdup_lane_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vdup_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -4987,22 +4809,12 @@ __ai uint32x2_t vcvt_u32_f32(float32x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vdup_lane_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __ret; \ __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) -#else -#define vdup_lane_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vdup_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -5263,20 +5075,11 @@ __ai uint32x2_t vdup_n_u32(uint32_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vdup_n_u64(uint64_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) {__p0}; return __ret; } -#else -__ai uint64x1_t vdup_n_u64(uint64_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) {__p0}; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vdup_n_u16(uint16_t __p0) { uint16x4_t __ret; @@ -5354,20 +5157,11 @@ __ai int32x2_t vdup_n_s32(int32_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vdup_n_s64(int64_t __p0) { int64x1_t __ret; __ret = (int64x1_t) {__p0}; return __ret; } -#else -__ai int64x1_t vdup_n_s64(int64_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) {__p0}; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vdup_n_s16(int16_t __p0) { int16x4_t __ret; @@ -5553,20 +5347,11 @@ __ai uint32x2_t veor_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t veor_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 ^ __p1; return __ret; } -#else -__ai uint64x1_t veor_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = __p0 ^ __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t veor_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -5618,20 +5403,11 @@ __ai int32x2_t veor_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t veor_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 ^ __p1; return __ret; } -#else -__ai int64x1_t veor_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = __p0 ^ __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -5964,7 +5740,6 @@ __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vext_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ @@ -5972,16 +5747,6 @@ __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (uint64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) -#else -#define vext_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vext_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -6066,7 +5831,6 @@ __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vext_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ @@ -6074,16 +5838,6 @@ __ai int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (int64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) -#else -#define vext_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vext_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -6344,7 +6098,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s0 = __p0; \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ + __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__s0, __p1); \ __ret; \ }) #else @@ -6352,13 +6106,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \ + __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s0 = __p0; \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ + __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__s0, __p1); \ __ret; \ }) #endif @@ -6367,7 +6121,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s0 = __p0; \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__s0, __p1); \ __ret; \ }) #else @@ -6375,13 +6129,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vget_lane_i16((int8x8_t)__rev0, __p1); \ + __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s0 = __p0; \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__s0, __p1); \ __ret; \ }) #endif @@ -6390,7 +6144,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s0 = __p0; \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ + __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__s0, __p1); \ __ret; \ }) #else @@ -6398,13 +6152,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \ + __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s0 = __p0; \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ + __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__s0, __p1); \ __ret; \ }) #endif @@ -6413,7 +6167,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s0 = __p0; \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \ + __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__s0, __p1); \ __ret; \ }) #else @@ -6421,13 +6175,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__rev0, __p1); \ + __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s0 = __p0; \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \ + __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__s0, __p1); \ __ret; \ }) #endif @@ -6459,7 +6213,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \ + __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else @@ -6467,13 +6221,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__rev0, __p1); \ + __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \ + __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #endif @@ -6482,7 +6236,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ + __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else @@ -6490,13 +6244,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__rev0, __p1); \ + __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ + __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #endif @@ -6505,7 +6259,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \ + __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else @@ -6513,13 +6267,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__rev0, __p1); \ + __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \ + __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #endif @@ -6551,7 +6305,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vgetq_lane_f32((int8x16_t)__s0, __p1); \ + __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, __p1); \ __ret; \ }) #else @@ -6559,13 +6313,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vgetq_lane_f32((int8x16_t)__rev0, __p1); \ + __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vgetq_lane_f32((int8x16_t)__s0, __p1); \ + __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, __p1); \ __ret; \ }) #endif @@ -6574,7 +6328,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \ + __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else @@ -6582,13 +6336,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__rev0, __p1); \ + __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); \ + __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #endif @@ -6597,7 +6351,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ + __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else @@ -6605,13 +6359,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__rev0, __p1); \ + __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ + __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #endif @@ -6620,7 +6374,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else @@ -6628,13 +6382,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__rev0, __p1); \ + __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int8x16_t)__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #endif @@ -6666,7 +6420,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s0 = __p0; \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \ + __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else @@ -6674,44 +6428,28 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vget_lane_i32((int8x8_t)__rev0, __p1); \ + __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s0 = __p0; \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \ + __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vget_lane_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ + __ret = (uint64_t) __builtin_neon_vget_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) -#else -#define vget_lane_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#define __noswap_vget_lane_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else @@ -6719,13 +6457,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vget_lane_i16((int8x8_t)__rev0, __p1); \ + __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #endif @@ -6757,7 +6495,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vget_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __s0 = __p0; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vget_lane_f32((int8x8_t)__s0, __p1); \ + __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__s0, __p1); \ __ret; \ }) #else @@ -6765,13 +6503,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vget_lane_f32((int8x8_t)__rev0, __p1); \ + __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __s0 = __p0; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vget_lane_f32((int8x8_t)__s0, __p1); \ + __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__s0, __p1); \ __ret; \ }) #endif @@ -6780,7 +6518,7 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { #define vget_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s0 = __p0; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \ + __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else @@ -6788,44 +6526,28 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vget_lane_i32((int8x8_t)__rev0, __p1); \ + __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s0 = __p0; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vget_lane_i32((int8x8_t)__s0, __p1); \ + __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vget_lane_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ + __ret = (int64_t) __builtin_neon_vget_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) -#else -#define vget_lane_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#define __noswap_vget_lane_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vget_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else @@ -6833,13 +6555,13 @@ __ai int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vget_lane_i16((int8x8_t)__rev0, __p1); \ + __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vget_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vget_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #endif @@ -7667,20 +7389,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vld1_v(__p0, 19); \ __ret; \ }) -#else -#define vld1_u64(__p0) __extension__ ({ \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vld1_v(__p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ @@ -7741,20 +7454,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vld1_v(__p0, 3); \ __ret; \ }) -#else -#define vld1_s64(__p0) __extension__ ({ \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vld1_v(__p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ @@ -7995,20 +7699,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_dup_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vld1_dup_v(__p0, 19); \ __ret; \ }) -#else -#define vld1_dup_u64(__p0) __extension__ ({ \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vld1_dup_v(__p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_dup_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ @@ -8069,20 +7764,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_dup_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vld1_dup_v(__p0, 3); \ __ret; \ }) -#else -#define vld1_dup_s64(__p0) __extension__ ({ \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vld1_dup_v(__p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ @@ -8368,22 +8054,12 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) -#else -#define vld1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s1 = __p1; \ @@ -8456,22 +8132,12 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s1 = __p1; \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) -#else -#define vld1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s1 = __p1; \ @@ -8745,20 +8411,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_u64_x2(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld1_u64_x2(__p0) __extension__ ({ \ - uint64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_u16_x2(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ @@ -8827,20 +8484,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_s64_x2(__p0) __extension__ ({ \ int64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld1_s64_x2(__p0) __extension__ ({ \ - int64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_s16_x2(__p0) __extension__ ({ \ int16x4x2_t __ret; \ @@ -9128,20 +8776,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_u64_x3(__p0) __extension__ ({ \ uint64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld1_u64_x3(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_u16_x3(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ @@ -9214,20 +8853,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_s64_x3(__p0) __extension__ ({ \ int64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld1_s64_x3(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_s16_x3(__p0) __extension__ ({ \ int16x4x3_t __ret; \ @@ -9531,20 +9161,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_u64_x4(__p0) __extension__ ({ \ uint64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld1_u64_x4(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_u16_x4(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ @@ -9621,20 +9242,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_s64_x4(__p0) __extension__ ({ \ int64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld1_s64_x4(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1_s16_x4(__p0) __extension__ ({ \ int16x4x4_t __ret; \ @@ -9875,20 +9487,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld2_u64(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld2_u64(__p0) __extension__ ({ \ - uint64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld2_u16(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ @@ -9957,20 +9560,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld2_s64(__p0) __extension__ ({ \ int64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld2_s64(__p0) __extension__ ({ \ - int64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld2_s16(__p0) __extension__ ({ \ int16x4x2_t __ret; \ @@ -10243,20 +9837,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld2_dup_u64(__p0) __extension__ ({ \ uint64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld2_dup_u64(__p0) __extension__ ({ \ - uint64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld2_dup_u16(__p0) __extension__ ({ \ uint16x4x2_t __ret; \ @@ -10325,20 +9910,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld2_dup_s64(__p0) __extension__ ({ \ int64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld2_dup_s64(__p0) __extension__ ({ \ - int64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld2_dup_s16(__p0) __extension__ ({ \ int16x4x2_t __ret; \ @@ -10470,7 +10046,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 41); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 41); \ __ret; \ }) #else @@ -10480,7 +10056,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ float32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 41); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -10492,7 +10068,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 34); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 34); \ __ret; \ }) #else @@ -10502,7 +10078,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ int32x4x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 34); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -10514,7 +10090,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 33); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 33); \ __ret; \ }) #else @@ -10524,7 +10100,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 33); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -10624,7 +10200,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 9); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 9); \ __ret; \ }) #else @@ -10634,7 +10210,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ float32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 9); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -10646,7 +10222,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 2); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 2); \ __ret; \ }) #else @@ -10656,7 +10232,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ int32x2x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 2); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -10668,7 +10244,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 1); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 1); \ __ret; \ }) #else @@ -10678,7 +10254,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ int16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 1); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -10920,20 +10496,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld3_u64(__p0) __extension__ ({ \ uint64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld3_u64(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3_u16(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ @@ -11006,20 +10573,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld3_s64(__p0) __extension__ ({ \ int64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld3_s64(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3_s16(__p0) __extension__ ({ \ int16x4x3_t __ret; \ @@ -11308,20 +10866,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld3_dup_u64(__p0) __extension__ ({ \ uint64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld3_dup_u64(__p0) __extension__ ({ \ - uint64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3_dup_u16(__p0) __extension__ ({ \ uint16x4x3_t __ret; \ @@ -11394,20 +10943,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld3_dup_s64(__p0) __extension__ ({ \ int64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld3_dup_s64(__p0) __extension__ ({ \ - int64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3_dup_s16(__p0) __extension__ ({ \ int16x4x3_t __ret; \ @@ -11550,7 +11090,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 41); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 41); \ __ret; \ }) #else @@ -11561,7 +11101,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ float32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 41); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -11574,7 +11114,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 34); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 34); \ __ret; \ }) #else @@ -11585,7 +11125,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ int32x4x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 34); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -11598,7 +11138,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 33); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 33); \ __ret; \ }) #else @@ -11609,7 +11149,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 33); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -11718,7 +11258,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 9); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 9); \ __ret; \ }) #else @@ -11729,7 +11269,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ float32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 9); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -11742,7 +11282,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 2); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 2); \ __ret; \ }) #else @@ -11753,7 +11293,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ int32x2x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 2); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -11766,7 +11306,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 1); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 1); \ __ret; \ }) #else @@ -11777,7 +11317,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ int16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 1); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -12033,20 +11573,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld4_u64(__p0) __extension__ ({ \ uint64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld4_u64(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4_u16(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ @@ -12123,20 +11654,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld4_s64(__p0) __extension__ ({ \ int64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld4_s64(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4_s16(__p0) __extension__ ({ \ int16x4x4_t __ret; \ @@ -12441,20 +11963,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld4_dup_u64(__p0) __extension__ ({ \ uint64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 19); \ __ret; \ }) -#else -#define vld4_dup_u64(__p0) __extension__ ({ \ - uint64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4_dup_u16(__p0) __extension__ ({ \ uint16x4x4_t __ret; \ @@ -12531,20 +12044,11 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld4_dup_s64(__p0) __extension__ ({ \ int64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 3); \ __ret; \ }) -#else -#define vld4_dup_s64(__p0) __extension__ ({ \ - int64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4_dup_s16(__p0) __extension__ ({ \ int16x4x4_t __ret; \ @@ -12698,7 +12202,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 41); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 41); \ __ret; \ }) #else @@ -12710,7 +12214,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ float32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 41); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 41); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -12724,7 +12228,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 34); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 34); \ __ret; \ }) #else @@ -12736,7 +12240,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ int32x4x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 34); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 34); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -12750,7 +12254,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 33); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 33); \ __ret; \ }) #else @@ -12762,7 +12266,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 33); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 33); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -12880,7 +12384,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 9); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 9); \ __ret; \ }) #else @@ -12892,7 +12396,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ float32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 9); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 9); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -12906,7 +12410,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 2); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 2); \ __ret; \ }) #else @@ -12918,7 +12422,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ int32x2x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 2); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 2); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -12932,7 +12436,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { #define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 1); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 1); \ __ret; \ }) #else @@ -12944,7 +12448,7 @@ __ai int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ int16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 1); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 1); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -14996,20 +14500,11 @@ __ai uint32x2_t vmov_n_u32(uint32_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vmov_n_u64(uint64_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) {__p0}; return __ret; } -#else -__ai uint64x1_t vmov_n_u64(uint64_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) {__p0}; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vmov_n_u16(uint16_t __p0) { uint16x4_t __ret; @@ -15087,20 +14582,11 @@ __ai int32x2_t vmov_n_s32(int32_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vmov_n_s64(int64_t __p0) { int64x1_t __ret; __ret = (int64x1_t) {__p0}; return __ret; } -#else -__ai int64x1_t vmov_n_s64(int64_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) {__p0}; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vmov_n_s16(int16_t __p0) { int16x4_t __ret; @@ -16251,20 +15737,20 @@ __ai int32x4_t __noswap_vmull_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(uint32x2_t) {__p1, __p1}, 51); + __ret = vmull_u32(__p0, (uint32x2_t) {__p1, __p1}); return __ret; } #else __ai uint64x2_t vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)(uint32x2_t) {__p1, __p1}, 51); + __ret = __noswap_vmull_u32(__rev0, (uint32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai uint64x2_t __noswap_vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(uint32x2_t) {__p1, __p1}, 51); + __ret = __noswap_vmull_u32(__p0, (uint32x2_t) {__p1, __p1}); return __ret; } #endif @@ -16272,20 +15758,20 @@ __ai uint64x2_t __noswap_vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(uint16x4_t) {__p1, __p1, __p1, __p1}, 50); + __ret = vmull_u16(__p0, (uint16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai uint32x4_t vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)(uint16x4_t) {__p1, __p1, __p1, __p1}, 50); + __ret = __noswap_vmull_u16(__rev0, (uint16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai uint32x4_t __noswap_vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(uint16x4_t) {__p1, __p1, __p1, __p1}, 50); + __ret = __noswap_vmull_u16(__p0, (uint16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #endif @@ -16293,20 +15779,20 @@ __ai uint32x4_t __noswap_vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(int32x2_t) {__p1, __p1}, 35); + __ret = vmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int64x2_t vmull_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)(int32x2_t) {__p1, __p1}, 35); + __ret = __noswap_vmull_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(int32x2_t) {__p1, __p1}, 35); + __ret = __noswap_vmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #endif @@ -16314,20 +15800,20 @@ __ai int64x2_t __noswap_vmull_n_s32(int32x2_t __p0, int32_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = vmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vmull_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = __noswap_vmull_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = __noswap_vmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #endif @@ -16854,20 +16340,11 @@ __ai uint32x2_t vorn_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vorn_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 | ~__p1; return __ret; } -#else -__ai uint64x1_t vorn_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = __p0 | ~__p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vorn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -16919,20 +16396,11 @@ __ai int32x2_t vorn_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vorn_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 | ~__p1; return __ret; } -#else -__ai int64x1_t vorn_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = __p0 | ~__p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vorn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -17120,20 +16588,11 @@ __ai uint32x2_t vorr_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vorr_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 | __p1; return __ret; } -#else -__ai uint64x1_t vorr_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = __p0 | __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vorr_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -17185,20 +16644,11 @@ __ai int32x2_t vorr_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vorr_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 | __p1; return __ret; } -#else -__ai int64x1_t vorr_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = __p0 | __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vorr_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -18239,20 +17689,11 @@ __ai uint32x2_t vqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -18309,20 +17750,11 @@ __ai int32x2_t __noswap_vqadd_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vqadd_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } -#else -__ai int64x1_t vqadd_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -18442,7 +17874,7 @@ __ai int32x4_t __noswap_vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int32x2_t) {__p2, __p2}, 35); + __ret = vqdmlal_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #else @@ -18450,13 +17882,13 @@ __ai int64x2_t vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)(int32x2_t) {__p2, __p2}, 35); + __ret = __noswap_vqdmlal_s32(__rev0, __rev1, (int32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int32x2_t) {__p2, __p2}, 35); + __ret = __noswap_vqdmlal_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #endif @@ -18464,7 +17896,7 @@ __ai int64x2_t __noswap_vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __ #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int16x4_t) {__p2, __p2, __p2, __p2}, 34); + __ret = vqdmlal_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else @@ -18472,13 +17904,13 @@ __ai int32x4_t vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)(int16x4_t) {__p2, __p2, __p2, __p2}, 34); + __ret = __noswap_vqdmlal_s16(__rev0, __rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int16x4_t) {__p2, __p2, __p2, __p2}, 34); + __ret = __noswap_vqdmlal_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif @@ -18580,7 +18012,7 @@ __ai int32x4_t __noswap_vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int32x2_t) {__p2, __p2}, 35); + __ret = vqdmlsl_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #else @@ -18588,13 +18020,13 @@ __ai int64x2_t vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)(int32x2_t) {__p2, __p2}, 35); + __ret = __noswap_vqdmlsl_s32(__rev0, __rev1, (int32x2_t) {__p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int32x2_t) {__p2, __p2}, 35); + __ret = __noswap_vqdmlsl_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } #endif @@ -18602,7 +18034,7 @@ __ai int64x2_t __noswap_vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __ #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int16x4_t) {__p2, __p2, __p2, __p2}, 34); + __ret = vqdmlsl_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #else @@ -18610,13 +18042,13 @@ __ai int32x4_t vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)(int16x4_t) {__p2, __p2, __p2, __p2}, 34); + __ret = __noswap_vqdmlsl_s16(__rev0, __rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)(int16x4_t) {__p2, __p2, __p2, __p2}, 34); + __ret = __noswap_vqdmlsl_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } #endif @@ -18796,14 +18228,14 @@ __ai int16x4_t __noswap_vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)(int32x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = vqdmulhq_s32(__p0, (int32x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vqdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__rev0, (int8x16_t)(int32x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = __noswap_vqdmulhq_s32(__rev0, (int32x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } @@ -18812,14 +18244,14 @@ __ai int32x4_t vqdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)(int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}, 33); + __ret = vqdmulhq_s16(__p0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x8_t vqdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__rev0, (int8x16_t)(int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}, 33); + __ret = __noswap_vqdmulhq_s16(__rev0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } @@ -18828,14 +18260,14 @@ __ai int16x8_t vqdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)(int32x2_t) {__p1, __p1}, 2); + __ret = vqdmulh_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int32x2_t vqdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__rev0, (int8x8_t)(int32x2_t) {__p1, __p1}, 2); + __ret = __noswap_vqdmulh_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } @@ -18844,14 +18276,14 @@ __ai int32x2_t vqdmulh_n_s32(int32x2_t __p0, int32_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 1); + __ret = vqdmulh_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x4_t vqdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__rev0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 1); + __ret = __noswap_vqdmulh_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } @@ -18946,20 +18378,20 @@ __ai int32x4_t __noswap_vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)(int32x2_t) {__p1, __p1}, 35); + __ret = vqdmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int64x2_t vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__rev0, (int8x8_t)(int32x2_t) {__p1, __p1}, 35); + __ret = __noswap_vqdmull_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } __ai int64x2_t __noswap_vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)(int32x2_t) {__p1, __p1}, 35); + __ret = __noswap_vqdmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #endif @@ -18967,20 +18399,20 @@ __ai int64x2_t __noswap_vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = vqdmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__rev0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = __noswap_vqdmull_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai int32x4_t __noswap_vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = __noswap_vqdmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #endif @@ -19445,14 +18877,14 @@ __ai int16x4_t __noswap_vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)(int32x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = vqrdmulhq_s32(__p0, (int32x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__rev0, (int8x16_t)(int32x4_t) {__p1, __p1, __p1, __p1}, 34); + __ret = __noswap_vqrdmulhq_s32(__rev0, (int32x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } @@ -19461,14 +18893,14 @@ __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)(int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}, 33); + __ret = vqrdmulhq_s16(__p0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__rev0, (int8x16_t)(int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}, 33); + __ret = __noswap_vqrdmulhq_s16(__rev0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } @@ -19477,14 +18909,14 @@ __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int32x2_t vqrdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)(int32x2_t) {__p1, __p1}, 2); + __ret = vqrdmulh_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } #else __ai int32x2_t vqrdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__rev0, (int8x8_t)(int32x2_t) {__p1, __p1}, 2); + __ret = __noswap_vqrdmulh_s32(__rev0, (int32x2_t) {__p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } @@ -19493,14 +18925,14 @@ __ai int32x2_t vqrdmulh_n_s32(int32x2_t __p0, int32_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqrdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 1); + __ret = vqrdmulh_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } #else __ai int16x4_t vqrdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__rev0, (int8x8_t)(int16x4_t) {__p1, __p1, __p1, __p1}, 1); + __ret = __noswap_vqrdmulh_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } @@ -19676,20 +19108,11 @@ __ai uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; @@ -19741,20 +19164,11 @@ __ai int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } -#else -__ai int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -20158,20 +19572,11 @@ __ai uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; @@ -20223,20 +19628,11 @@ __ai int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } -#else -__ai int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -20434,22 +19830,12 @@ __ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vqshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#else -#define vqshl_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vqshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -20504,22 +19890,12 @@ __ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vqshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) -#else -#define vqshl_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vqshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -20646,22 +20022,12 @@ __ai int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#else -#define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -21076,20 +20442,11 @@ __ai uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -21146,20 +20503,11 @@ __ai int32x2_t __noswap_vqsub_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } -#else -__ai int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -22362,20 +21710,11 @@ __ai uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; @@ -22427,20 +21766,11 @@ __ai int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } -#else -__ai int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -22638,22 +21968,12 @@ __ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vrshr_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#else -#define vrshr_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vrshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -22708,22 +22028,12 @@ __ai int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vrshr_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) -#else -#define vrshr_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vrshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -23194,7 +22504,6 @@ __ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vrsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ @@ -23202,16 +22511,6 @@ __ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) -#else -#define vrsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vrsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -23275,7 +22574,6 @@ __ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vrsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ @@ -23283,16 +22581,6 @@ __ai float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) -#else -#define vrsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vrsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -23451,7 +22739,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__s1, __p2); \ __ret; \ }) #else @@ -23460,7 +22748,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly8x8_t __s1 = __p1; \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) @@ -23468,7 +22756,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__s1, __p2); \ __ret; \ }) #endif @@ -23478,7 +22766,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__s1, __p2); \ __ret; \ }) #else @@ -23487,7 +22775,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly16x4_t __s1 = __p1; \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) @@ -23495,7 +22783,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__s1, __p2); \ __ret; \ }) #endif @@ -23505,7 +22793,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__s1, __p2); \ __ret; \ }) #else @@ -23514,7 +22802,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly8x16_t __s1 = __p1; \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) @@ -23522,7 +22810,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__s1, __p2); \ __ret; \ }) #endif @@ -23532,7 +22820,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__s1, __p2); \ __ret; \ }) #else @@ -23541,7 +22829,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly16x8_t __s1 = __p1; \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) @@ -23549,7 +22837,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__s1, __p2); \ __ret; \ }) #endif @@ -23586,7 +22874,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #else @@ -23595,7 +22883,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint32x4_t __s1 = __p1; \ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) @@ -23603,7 +22891,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #endif @@ -23613,7 +22901,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #else @@ -23622,7 +22910,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint64x2_t __s1 = __p1; \ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) @@ -23630,7 +22918,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #endif @@ -23640,7 +22928,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #else @@ -23649,7 +22937,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __s1 = __p1; \ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) @@ -23657,7 +22945,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #endif @@ -23694,7 +22982,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__s1, __p2); \ + __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__s1, __p2); \ __ret; \ }) #else @@ -23703,7 +22991,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { float32x4_t __s1 = __p1; \ float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) @@ -23711,7 +22999,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (int8x16_t)__s1, __p2); \ + __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__s1, __p2); \ __ret; \ }) #endif @@ -23721,7 +23009,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ + __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #else @@ -23730,7 +23018,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __s1 = __p1; \ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) @@ -23738,7 +23026,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int8x16_t)__s1, __p2); \ + __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ __ret; \ }) #endif @@ -23748,7 +23036,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #else @@ -23757,7 +23045,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int64x2_t __s1 = __p1; \ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) @@ -23765,7 +23053,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ __ret; \ }) #endif @@ -23775,7 +23063,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #else @@ -23784,7 +23072,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __s1 = __p1; \ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret; \ }) @@ -23792,7 +23080,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ __ret; \ }) #endif @@ -23829,7 +23117,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #else @@ -23838,7 +23126,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint32x2_t __s1 = __p1; \ uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) @@ -23846,42 +23134,24 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int64x1_t)__s1, __p2); \ __ret; \ }) -#else -#define vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#define __noswap_vset_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #else @@ -23890,7 +23160,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint16x4_t __s1 = __p1; \ uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) @@ -23898,7 +23168,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #endif @@ -23935,7 +23205,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, __p2); \ __ret; \ }) #else @@ -23944,7 +23214,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { float32x2_t __s1 = __p1; \ float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) @@ -23952,7 +23222,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, __p2); \ __ret; \ }) #endif @@ -23962,7 +23232,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #else @@ -23971,7 +23241,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int32x2_t __s1 = __p1; \ int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) @@ -23979,42 +23249,24 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int8x8_t)__s1, __p2); \ + __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int64x1_t)__s1, __p2); \ __ret; \ }) -#else -#define vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#define __noswap_vset_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #else @@ -24023,7 +23275,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int16x4_t __s1 = __p1; \ int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ __ret; \ }) @@ -24031,7 +23283,7 @@ __ai int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int8x8_t)__s1, __p2); \ + __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ __ret; \ }) #endif @@ -24206,20 +23458,11 @@ __ai uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; @@ -24271,20 +23514,11 @@ __ai int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } -#else -__ai int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -24482,22 +23716,12 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#else -#define vshl_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -24552,22 +23776,12 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) -#else -#define vshl_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -24910,22 +24124,12 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vshr_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#else -#define vshr_n_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -24980,22 +24184,12 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vshr_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) -#else -#define vshr_n_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -25452,7 +24646,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vsli_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ @@ -25460,16 +24653,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) -#else -#define vsli_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsli_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -25533,7 +24716,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vsli_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ @@ -25541,16 +24723,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) -#else -#define vsli_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsli_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -25782,7 +24954,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ @@ -25790,16 +24961,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) -#else -#define vsra_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsra_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -25863,7 +25024,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ @@ -25871,16 +25031,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) -#else -#define vsra_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsra_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -26196,7 +25346,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vsri_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ @@ -26204,16 +25353,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ __ret; \ }) -#else -#define vsri_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64x1_t __s1 = __p1; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsri_n_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s0 = __p0; \ @@ -26277,7 +25416,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vsri_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ @@ -26285,16 +25423,6 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ __ret; \ }) -#else -#define vsri_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64x1_t __s1 = __p1; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsri_n_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s0 = __p0; \ @@ -26511,18 +25639,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ }) -#else -#define vst1_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s1 = __p1; \ @@ -26575,18 +25695,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ }) -#else -#define vst1_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s1 = __p1; \ @@ -26795,18 +25907,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ }) -#else -#define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s1 = __p1; \ @@ -26859,18 +25963,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ }) -#else -#define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s1 = __p1; \ @@ -27022,7 +26118,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 41); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 41); \ }) #else #define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ @@ -27030,14 +26126,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { float32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 41); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 34); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 34); \ }) #else #define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ @@ -27045,14 +26141,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 34); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 35); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 35); \ }) #else #define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ @@ -27060,14 +26156,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 35); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 33); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 33); \ }) #else #define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ @@ -27075,7 +26171,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 33); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 33); \ }) #endif @@ -27109,18 +26205,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_u64_x2(__p0, __p1) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ }) -#else -#define vst1_u64_x2(__p0, __p1) __extension__ ({ \ - uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x2(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ @@ -27154,7 +26242,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x2(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 9); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 9); \ }) #else #define vst1_f32_x2(__p0, __p1) __extension__ ({ \ @@ -27162,14 +26250,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { float32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 9); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x2(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 2); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 2); \ }) #else #define vst1_s32_x2(__p0, __p1) __extension__ ({ \ @@ -27177,26 +26265,18 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 2); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 2); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_s64_x2(__p0, __p1) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 3); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 3); \ }) -#else -#define vst1_s64_x2(__p0, __p1) __extension__ ({ \ - int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x2(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 1); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 1); \ }) #else #define vst1_s16_x2(__p0, __p1) __extension__ ({ \ @@ -27204,7 +26284,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 1); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 1); \ }) #endif @@ -27355,7 +26435,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 41); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 41); \ }) #else #define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ @@ -27364,14 +26444,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 41); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 34); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 34); \ }) #else #define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ @@ -27380,14 +26460,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 34); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 35); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 35); \ }) #else #define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ @@ -27396,14 +26476,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 35); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 33); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 33); \ }) #else #define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ @@ -27412,7 +26492,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 33); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 33); \ }) #endif @@ -27448,18 +26528,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_u64_x3(__p0, __p1) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ }) -#else -#define vst1_u64_x3(__p0, __p1) __extension__ ({ \ - uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x3(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ @@ -27495,7 +26567,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x3(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 9); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 9); \ }) #else #define vst1_f32_x3(__p0, __p1) __extension__ ({ \ @@ -27504,14 +26576,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 9); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x3(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 2); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 2); \ }) #else #define vst1_s32_x3(__p0, __p1) __extension__ ({ \ @@ -27520,26 +26592,18 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 2); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 2); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_s64_x3(__p0, __p1) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 3); \ }) -#else -#define vst1_s64_x3(__p0, __p1) __extension__ ({ \ - int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x3(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 1); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 1); \ }) #else #define vst1_s16_x3(__p0, __p1) __extension__ ({ \ @@ -27548,7 +26612,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 1); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 1); \ }) #endif @@ -27708,7 +26772,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 41); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 41); \ }) #else #define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ @@ -27718,14 +26782,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 41); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 34); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 34); \ }) #else #define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ @@ -27735,14 +26799,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 34); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 35); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 35); \ }) #else #define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ @@ -27752,14 +26816,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 35); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 33); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 33); \ }) #else #define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ @@ -27769,7 +26833,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 33); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 33); \ }) #endif @@ -27807,18 +26871,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_u64_x4(__p0, __p1) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ }) -#else -#define vst1_u64_x4(__p0, __p1) __extension__ ({ \ - uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x4(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ @@ -27856,7 +26912,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x4(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 9); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 9); \ }) #else #define vst1_f32_x4(__p0, __p1) __extension__ ({ \ @@ -27866,14 +26922,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 9); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x4(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 2); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 2); \ }) #else #define vst1_s32_x4(__p0, __p1) __extension__ ({ \ @@ -27883,26 +26939,18 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 2); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 2); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_s64_x4(__p0, __p1) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 3); \ }) -#else -#define vst1_s64_x4(__p0, __p1) __extension__ ({ \ - int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x4(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 1); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 1); \ }) #else #define vst1_s16_x4(__p0, __p1) __extension__ ({ \ @@ -27912,7 +26960,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 1); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 1); \ }) #endif @@ -28039,7 +27087,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst2q_f32(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 41); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 41); \ }) #else #define vst2q_f32(__p0, __p1) __extension__ ({ \ @@ -28047,14 +27095,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { float32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 41); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s32(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 34); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 34); \ }) #else #define vst2q_s32(__p0, __p1) __extension__ ({ \ @@ -28062,14 +27110,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 34); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s16(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 33); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 33); \ }) #else #define vst2q_s16(__p0, __p1) __extension__ ({ \ @@ -28077,7 +27125,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 33); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 33); \ }) #endif @@ -28111,18 +27159,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst2_u64(__p0, __p1) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ }) -#else -#define vst2_u64(__p0, __p1) __extension__ ({ \ - uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2_u16(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ @@ -28156,7 +27196,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst2_f32(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 9); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 9); \ }) #else #define vst2_f32(__p0, __p1) __extension__ ({ \ @@ -28164,14 +27204,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { float32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_v(__p0, __rev1.val[0], __rev1.val[1], 9); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_s32(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 2); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 2); \ }) #else #define vst2_s32(__p0, __p1) __extension__ ({ \ @@ -28179,26 +27219,18 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_v(__p0, __rev1.val[0], __rev1.val[1], 2); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 2); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst2_s64(__p0, __p1) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 3); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 3); \ }) -#else -#define vst2_s64(__p0, __p1) __extension__ ({ \ - int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2_s16(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 1); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 1); \ }) #else #define vst2_s16(__p0, __p1) __extension__ ({ \ @@ -28206,7 +27238,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, __rev1.val[0], __rev1.val[1], 1); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 1); \ }) #endif @@ -28288,7 +27320,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 41); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 41); \ }) #else #define vst2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ @@ -28296,14 +27328,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { float32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 41); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 34); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 34); \ }) #else #define vst2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ @@ -28311,14 +27343,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 34); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 33); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 33); \ }) #else #define vst2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ @@ -28326,7 +27358,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 33); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 33); \ }) #endif @@ -28393,7 +27425,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 9); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 9); \ }) #else #define vst2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ @@ -28401,14 +27433,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { float32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 9); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 2); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 2); \ }) #else #define vst2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ @@ -28416,14 +27448,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int32x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 2); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 1); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 1); \ }) #else #define vst2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ @@ -28431,7 +27463,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 1); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 1); \ }) #endif @@ -28566,7 +27598,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst3q_f32(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 41); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 41); \ }) #else #define vst3q_f32(__p0, __p1) __extension__ ({ \ @@ -28575,14 +27607,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 41); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s32(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 34); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 34); \ }) #else #define vst3q_s32(__p0, __p1) __extension__ ({ \ @@ -28591,14 +27623,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 34); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s16(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 33); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 33); \ }) #else #define vst3q_s16(__p0, __p1) __extension__ ({ \ @@ -28607,7 +27639,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 33); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 33); \ }) #endif @@ -28643,18 +27675,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst3_u64(__p0, __p1) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ }) -#else -#define vst3_u64(__p0, __p1) __extension__ ({ \ - uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3_u16(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ @@ -28690,7 +27714,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst3_f32(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 9); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 9); \ }) #else #define vst3_f32(__p0, __p1) __extension__ ({ \ @@ -28699,14 +27723,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 9); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_s32(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 2); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 2); \ }) #else #define vst3_s32(__p0, __p1) __extension__ ({ \ @@ -28715,26 +27739,18 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 2); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 2); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst3_s64(__p0, __p1) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 3); \ }) -#else -#define vst3_s64(__p0, __p1) __extension__ ({ \ - int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3_s16(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 1); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 1); \ }) #else #define vst3_s16(__p0, __p1) __extension__ ({ \ @@ -28743,7 +27759,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 1); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 1); \ }) #endif @@ -28830,7 +27846,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 41); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 41); \ }) #else #define vst3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ @@ -28839,14 +27855,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 41); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 34); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 34); \ }) #else #define vst3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ @@ -28855,14 +27871,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 34); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 33); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 33); \ }) #else #define vst3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ @@ -28871,7 +27887,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 33); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 33); \ }) #endif @@ -28942,7 +27958,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 9); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 9); \ }) #else #define vst3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ @@ -28951,14 +27967,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 9); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 2); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 2); \ }) #else #define vst3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ @@ -28967,14 +27983,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 2); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 1); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 1); \ }) #else #define vst3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ @@ -28983,7 +27999,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 1); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 1); \ }) #endif @@ -29126,7 +28142,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst4q_f32(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 41); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 41); \ }) #else #define vst4q_f32(__p0, __p1) __extension__ ({ \ @@ -29136,14 +28152,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 41); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s32(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 34); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 34); \ }) #else #define vst4q_s32(__p0, __p1) __extension__ ({ \ @@ -29153,14 +28169,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 34); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s16(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 33); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 33); \ }) #else #define vst4q_s16(__p0, __p1) __extension__ ({ \ @@ -29170,7 +28186,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 33); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 33); \ }) #endif @@ -29208,18 +28224,10 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst4_u64(__p0, __p1) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ }) -#else -#define vst4_u64(__p0, __p1) __extension__ ({ \ - uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4_u16(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ @@ -29257,7 +28265,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst4_f32(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 9); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 9); \ }) #else #define vst4_f32(__p0, __p1) __extension__ ({ \ @@ -29267,14 +28275,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 9); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_s32(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 2); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 2); \ }) #else #define vst4_s32(__p0, __p1) __extension__ ({ \ @@ -29284,26 +28292,18 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 2); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 2); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst4_s64(__p0, __p1) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 3); \ }) -#else -#define vst4_s64(__p0, __p1) __extension__ ({ \ - int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 3); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4_s16(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 1); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 1); \ }) #else #define vst4_s16(__p0, __p1) __extension__ ({ \ @@ -29313,7 +28313,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 1); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 1); \ }) #endif @@ -29405,7 +28405,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 41); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 41); \ }) #else #define vst4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ @@ -29415,14 +28415,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 41); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 34); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 34); \ }) #else #define vst4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ @@ -29432,14 +28432,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 34); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 33); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 33); \ }) #else #define vst4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ @@ -29449,7 +28449,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 33); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 33); \ }) #endif @@ -29524,7 +28524,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { #ifdef __LITTLE_ENDIAN__ #define vst4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 9); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 9); \ }) #else #define vst4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ @@ -29534,14 +28534,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 9); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 2); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 2); \ }) #else #define vst4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ @@ -29551,14 +28551,14 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 2); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 1); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 1); \ }) #else #define vst4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ @@ -29568,7 +28568,7 @@ __ai int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 1); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 1); \ }) #endif @@ -29759,20 +28759,11 @@ __ai uint32x2_t vsub_u32(uint32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vsub_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 - __p1; return __ret; } -#else -__ai uint64x1_t vsub_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = __p0 - __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; @@ -29841,20 +28832,11 @@ __ai int32x2_t vsub_s32(int32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vsub_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 - __p1; return __ret; } -#else -__ai int64x1_t vsub_s64(int64x1_t __p0, int64x1_t __p1) { - int64x1_t __ret; - __ret = __p0 - __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; @@ -32085,3721 +31067,1345 @@ __ai int16x4x2_t vzip_s16(int16x4_t __p0, int16x4_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - #endif #if (__ARM_FP & 2) #ifdef __LITTLE_ENDIAN__ __ai float16x4_t vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 8); + __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 41); return __ret; } #else __ai float16x4_t vcvt_f16_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__rev0, 8); + __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__rev0, 41); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float16x4_t __noswap_vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 8); + __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 41); return __ret; } #endif @@ -35807,20 +32413,20 @@ __ai float16x4_t __noswap_vcvt_f16_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 41); + __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8); return __ret; } #else __ai float32x4_t vcvt_f32_f16(float16x4_t __p0) { float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__rev0, 41); + __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__rev0, 8); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 41); + __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8); return __ret; } #endif @@ -36101,7 +32707,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { #define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 40); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 40); \ __ret; \ }) #else @@ -36111,7 +32717,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 40); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -36123,7 +32729,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { #define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 8); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 8); \ __ret; \ }) #else @@ -36133,7 +32739,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ float16x4x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 8); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -36217,7 +32823,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { #define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 40); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 40); \ __ret; \ }) #else @@ -36228,7 +32834,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 40); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -36241,7 +32847,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { #define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 8); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 8); \ __ret; \ }) #else @@ -36252,7 +32858,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ float16x4x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 8); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -36341,7 +32947,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { #define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 40); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 40); \ __ret; \ }) #else @@ -36353,7 +32959,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 40); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 40); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ @@ -36367,7 +32973,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { #define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 8); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 8); \ __ret; \ }) #else @@ -36379,7 +32985,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ float16x4x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 8); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 8); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ @@ -36444,7 +33050,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 40); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 40); \ }) #else #define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ @@ -36452,14 +33058,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 40); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x2(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 8); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 8); \ }) #else #define vst1_f16_x2(__p0, __p1) __extension__ ({ \ @@ -36467,14 +33073,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, __rev1.val[0], __rev1.val[1], 8); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 40); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 40); \ }) #else #define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ @@ -36483,14 +33089,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 40); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x3(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 8); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 8); \ }) #else #define vst1_f16_x3(__p0, __p1) __extension__ ({ \ @@ -36499,14 +33105,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 8); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 40); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 40); \ }) #else #define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ @@ -36516,14 +33122,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 40); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x4(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 8); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 8); \ }) #else #define vst1_f16_x4(__p0, __p1) __extension__ ({ \ @@ -36533,14 +33139,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 8); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_f16(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 40); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 40); \ }) #else #define vst2q_f16(__p0, __p1) __extension__ ({ \ @@ -36548,14 +33154,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 40); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_f16(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 8); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 8); \ }) #else #define vst2_f16(__p0, __p1) __extension__ ({ \ @@ -36563,14 +33169,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, __rev1.val[0], __rev1.val[1], 8); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 40); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 40); \ }) #else #define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -36578,14 +33184,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float16x8x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 40); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 8); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 8); \ }) #else #define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -36593,14 +33199,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float16x4x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 8); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_f16(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 40); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 40); \ }) #else #define vst3q_f16(__p0, __p1) __extension__ ({ \ @@ -36609,14 +33215,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 40); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_f16(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 8); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 8); \ }) #else #define vst3_f16(__p0, __p1) __extension__ ({ \ @@ -36625,14 +33231,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 8); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 40); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 40); \ }) #else #define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -36641,14 +33247,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 40); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 8); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 8); \ }) #else #define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -36657,14 +33263,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 8); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_f16(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 40); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 40); \ }) #else #define vst4q_f16(__p0, __p1) __extension__ ({ \ @@ -36674,14 +33280,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 40); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_f16(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 8); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 8); \ }) #else #define vst4_f16(__p0, __p1) __extension__ ({ \ @@ -36691,14 +33297,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 8); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 40); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 40); \ }) #else #define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -36708,14 +33314,14 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 40); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 8); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 8); \ }) #else #define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -36725,7 +33331,7 @@ __ai float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 8); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 8); \ }) #endif @@ -37058,7 +33664,7 @@ __ai uint8x16_t vaesmcq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); + __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__p0, __p1, __p2); return __ret; } #else @@ -37066,30 +33672,21 @@ __ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); + __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__rev0, __p1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vsha1h_u32(uint32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); return __ret; } -#else -__ai uint32_t vsha1h_u32(uint32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); + __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__p0, __p1, __p2); return __ret; } #else @@ -37097,7 +33694,7 @@ __ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); + __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__rev0, __p1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } @@ -37106,7 +33703,7 @@ __ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { #ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32((int8x16_t)__p0, __p1, (int8x16_t)__p2); + __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__p0, __p1, __p2); return __ret; } #else @@ -37114,7 +33711,7 @@ __ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32((int8x16_t)__rev0, __p1, (int8x16_t)__rev2); + __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__rev0, __p1, __rev2); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } @@ -37388,20 +33985,11 @@ __ai float32x2_t vrndn_f32(float32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float32_t vrndns_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrndns_f32(__p0); return __ret; } -#else -__ai float32_t vrndns_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrndns_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float32x4_t vrndpq_f32(float32x4_t __p0) { float32x4_t __ret; @@ -37818,20 +34406,11 @@ __ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vcvta_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vcvta_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -37848,20 +34427,11 @@ __ai uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcvta_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcvta_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -37878,20 +34448,11 @@ __ai int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vcvtm_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vcvtm_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -37908,20 +34469,11 @@ __ai uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -37938,20 +34490,11 @@ __ai int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vcvtn_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vcvtn_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -37968,20 +34511,11 @@ __ai uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -37998,20 +34532,11 @@ __ai int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vcvtp_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vcvtp_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -38028,5508 +34553,1971 @@ __ai uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } -#else -__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { - poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } -#else -__ai poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } -#else -__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { - poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } -#else -__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { - poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); return __ret; } -#else -__ai poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { - poly128_t __ret; - __ret = (poly128_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } -#else -__ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { - poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } -#else -__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { - poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } -#else -__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { - uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } -#else -__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { - uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } -#else -__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { - uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } -#else -__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } -#else -__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { - int8x16_t __ret; - __ret = (int8x16_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); return __ret; } -#else -__ai float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { - float64x2_t __ret; - __ret = (float64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } -#else -__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { - float32x4_t __ret; - __ret = (float32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } -#else -__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { - float16x8_t __ret; - __ret = (float16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } -#else -__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { - int32x4_t __ret; - __ret = (int32x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } -#else -__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { - int64x2_t __ret; - __ret = (int64x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } -#else -__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { - int16x8_t __ret; - __ret = (int16x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } -#else -__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { - uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } -#else -__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { - uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } -#else -__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } -#else -__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } -#else -__ai int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { - int8x8_t __ret; - __ret = (int8x8_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); return __ret; } -#else -__ai float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } -#else -__ai float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { - float32x2_t __ret; - __ret = (float32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } -#else -__ai float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { - float16x4_t __ret; - __ret = (float16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } -#else -__ai int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { - int32x2_t __ret; - __ret = (int32x2_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } -#else -__ai int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } -#else -__ai int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { - int16x4_t __ret; - __ret = (int16x4_t)(__p0); - return __ret; -} -#endif - #endif #if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ @@ -43548,20 +36536,11 @@ __ai float64x2_t vrndq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrnd_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrnd_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndaq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -43578,20 +36557,11 @@ __ai float64x2_t vrndaq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrnda_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrnda_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndiq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -43608,20 +36578,11 @@ __ai float64x2_t vrndiq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrndi_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrndi_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndmq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -43638,20 +36599,11 @@ __ai float64x2_t vrndmq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrndm_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrndm_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndnq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -43668,20 +36620,11 @@ __ai float64x2_t vrndnq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrndn_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrndn_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndpq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -43698,20 +36641,11 @@ __ai float64x2_t vrndpq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrndp_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrndp_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrndxq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -43728,20 +36662,11 @@ __ai float64x2_t vrndxq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrndx_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrndx_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #endif #if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN) #ifdef __LITTLE_ENDIAN__ @@ -43761,20 +36686,11 @@ __ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } -#else -__ai float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; @@ -43792,16 +36708,183 @@ __ai float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } +#endif +#if defined(__ARM_FEATURE_COMPLEX) +#ifdef __LITTLE_ENDIAN__ +__ai float32x2_t vcadd_rot270_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcadd_rot270_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + return __ret; +} #else -__ai float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); +__ai float32x2_t vcadd_rot270_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcadd_rot270_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x2_t vcadd_rot90_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcadd_rot90_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + return __ret; +} +#else +__ai float32x2_t vcadd_rot90_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcadd_rot90_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x4_t vcaddq_rot270_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcaddq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} +#else +__ai float32x4_t vcaddq_rot270_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcaddq_rot270_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcaddq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} +#else +__ai float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcaddq_rot90_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#endif +#if defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +#ifdef __LITTLE_ENDIAN__ +__ai float16x4_t vcadd_rot270_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcadd_rot270_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} +#else +__ai float16x4_t vcadd_rot270_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcadd_rot270_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x4_t vcadd_rot90_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcadd_rot90_v((int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} +#else +__ai float16x4_t vcadd_rot90_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcadd_rot90_v((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vcaddq_rot270_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcaddq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, 40); + return __ret; +} +#else +__ai float16x8_t vcaddq_rot270_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcaddq_rot270_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcaddq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, 40); + return __ret; +} +#else +__ai float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcaddq_rot90_v((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#endif +#if defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__) +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vcaddq_rot270_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcaddq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} +#else +__ai float64x2_t vcaddq_rot270_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcaddq_rot270_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vcaddq_rot90_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcaddq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} +#else +__ai float64x2_t vcaddq_rot90_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcaddq_rot90_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif @@ -46305,7 +39388,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { #define vduph_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s0 = __p0; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_lane_f16((int8x8_t)__s0, __p1); \ + __ret = (float16_t) __builtin_neon_vduph_lane_f16((float16x4_t)__s0, __p1); \ __ret; \ }) #else @@ -46313,7 +39396,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __s0 = __p0; \ float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_lane_f16((int8x8_t)__rev0, __p1); \ + __ret = (float16_t) __builtin_neon_vduph_lane_f16((float16x4_t)__rev0, __p1); \ __ret; \ }) #endif @@ -46322,7 +39405,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { #define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s0 = __p0; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_laneq_f16((int8x16_t)__s0, __p1); \ + __ret = (float16_t) __builtin_neon_vduph_laneq_f16((float16x8_t)__s0, __p1); \ __ret; \ }) #else @@ -46330,7 +39413,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __s0 = __p0; \ float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vduph_laneq_f16((int8x16_t)__rev0, __p1); \ + __ret = (float16_t) __builtin_neon_vduph_laneq_f16((float16x8_t)__rev0, __p1); \ __ret; \ }) #endif @@ -46341,7 +39424,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__s2, __p3); \ __ret; \ }) #else @@ -46351,7 +39434,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __s2 = __p2; \ float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -46359,7 +39442,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__s2, __p3); \ __ret; \ }) #endif @@ -46434,7 +39517,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__s2, __p3); \ __ret; \ }) #else @@ -46444,7 +39527,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __s2 = __p2; \ float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -46452,7 +39535,7 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__s2, __p3); \ __ret; \ }) #endif @@ -46978,7 +40061,7 @@ __ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { float16_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (int8x8_t)__s1, __p2); \ + __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (float16x4_t)__s1, __p2); \ __ret; \ }) #else @@ -46987,7 +40070,7 @@ __ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __s1 = __p1; \ float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (int8x8_t)__rev1, __p2); \ + __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (float16x4_t)__rev1, __p2); \ __ret; \ }) #endif @@ -47039,7 +40122,7 @@ __ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { float16_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (int8x16_t)__s1, __p2); \ + __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (float16x8_t)__s1, __p2); \ __ret; \ }) #else @@ -47048,7 +40131,7 @@ __ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __s1 = __p1; \ float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ float16_t __ret; \ - __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (float16x8_t)__rev1, __p2); \ __ret; \ }) #endif @@ -48073,48 +41156,21 @@ __ai float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } -#else -__ai float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64_t vabdd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); return __ret; } -#else -__ai float64_t vabdd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vabds_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); return __ret; } -#else -__ai float32_t vabds_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vabsq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -48147,48 +41203,21 @@ __ai int64x2_t vabsq_s64(int64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vabs_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vabs_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vabs_s64(int64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vabs_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vabsd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); return __ret; } -#else -__ai int64_t vabsd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; @@ -48206,48 +41235,21 @@ __ai float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vadd_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 + __p1; return __ret; } -#else -__ai float64x1_t vadd_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 + __p1; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; @@ -48359,14 +41361,14 @@ __ai int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddlvq_u8(uint8x16_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlvq_u8((int8x16_t)__p0); + __ret = (uint16_t) __builtin_neon_vaddlvq_u8(__p0); return __ret; } #else __ai uint16_t vaddlvq_u8(uint8x16_t __p0) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlvq_u8((int8x16_t)__rev0); + __ret = (uint16_t) __builtin_neon_vaddlvq_u8(__rev0); return __ret; } #endif @@ -48374,14 +41376,14 @@ __ai uint16_t vaddlvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint64_t vaddlvq_u32(uint32x4_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlvq_u32((int8x16_t)__p0); + __ret = (uint64_t) __builtin_neon_vaddlvq_u32(__p0); return __ret; } #else __ai uint64_t vaddlvq_u32(uint32x4_t __p0) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlvq_u32((int8x16_t)__rev0); + __ret = (uint64_t) __builtin_neon_vaddlvq_u32(__rev0); return __ret; } #endif @@ -48389,14 +41391,14 @@ __ai uint64_t vaddlvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddlvq_u16(uint16x8_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlvq_u16((int8x16_t)__p0); + __ret = (uint32_t) __builtin_neon_vaddlvq_u16(__p0); return __ret; } #else __ai uint32_t vaddlvq_u16(uint16x8_t __p0) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlvq_u16((int8x16_t)__rev0); + __ret = (uint32_t) __builtin_neon_vaddlvq_u16(__rev0); return __ret; } #endif @@ -48404,14 +41406,14 @@ __ai uint32_t vaddlvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddlvq_s8(int8x16_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlvq_s8((int8x16_t)__p0); + __ret = (int16_t) __builtin_neon_vaddlvq_s8(__p0); return __ret; } #else __ai int16_t vaddlvq_s8(int8x16_t __p0) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlvq_s8((int8x16_t)__rev0); + __ret = (int16_t) __builtin_neon_vaddlvq_s8(__rev0); return __ret; } #endif @@ -48419,14 +41421,14 @@ __ai int16_t vaddlvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int64_t vaddlvq_s32(int32x4_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlvq_s32((int8x16_t)__p0); + __ret = (int64_t) __builtin_neon_vaddlvq_s32(__p0); return __ret; } #else __ai int64_t vaddlvq_s32(int32x4_t __p0) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlvq_s32((int8x16_t)__rev0); + __ret = (int64_t) __builtin_neon_vaddlvq_s32(__rev0); return __ret; } #endif @@ -48434,14 +41436,14 @@ __ai int64_t vaddlvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddlvq_s16(int16x8_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlvq_s16((int8x16_t)__p0); + __ret = (int32_t) __builtin_neon_vaddlvq_s16(__p0); return __ret; } #else __ai int32_t vaddlvq_s16(int16x8_t __p0) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlvq_s16((int8x16_t)__rev0); + __ret = (int32_t) __builtin_neon_vaddlvq_s16(__rev0); return __ret; } #endif @@ -48449,14 +41451,14 @@ __ai int32_t vaddlvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddlv_u8(uint8x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlv_u8((int8x8_t)__p0); + __ret = (uint16_t) __builtin_neon_vaddlv_u8(__p0); return __ret; } #else __ai uint16_t vaddlv_u8(uint8x8_t __p0) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlv_u8((int8x8_t)__rev0); + __ret = (uint16_t) __builtin_neon_vaddlv_u8(__rev0); return __ret; } #endif @@ -48464,14 +41466,14 @@ __ai uint16_t vaddlv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint64_t vaddlv_u32(uint32x2_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlv_u32((int8x8_t)__p0); + __ret = (uint64_t) __builtin_neon_vaddlv_u32(__p0); return __ret; } #else __ai uint64_t vaddlv_u32(uint32x2_t __p0) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlv_u32((int8x8_t)__rev0); + __ret = (uint64_t) __builtin_neon_vaddlv_u32(__rev0); return __ret; } #endif @@ -48479,14 +41481,14 @@ __ai uint64_t vaddlv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddlv_u16(uint16x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlv_u16((int8x8_t)__p0); + __ret = (uint32_t) __builtin_neon_vaddlv_u16(__p0); return __ret; } #else __ai uint32_t vaddlv_u16(uint16x4_t __p0) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlv_u16((int8x8_t)__rev0); + __ret = (uint32_t) __builtin_neon_vaddlv_u16(__rev0); return __ret; } #endif @@ -48494,14 +41496,14 @@ __ai uint32_t vaddlv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddlv_s8(int8x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlv_s8((int8x8_t)__p0); + __ret = (int16_t) __builtin_neon_vaddlv_s8(__p0); return __ret; } #else __ai int16_t vaddlv_s8(int8x8_t __p0) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlv_s8((int8x8_t)__rev0); + __ret = (int16_t) __builtin_neon_vaddlv_s8(__rev0); return __ret; } #endif @@ -48509,14 +41511,14 @@ __ai int16_t vaddlv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int64_t vaddlv_s32(int32x2_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlv_s32((int8x8_t)__p0); + __ret = (int64_t) __builtin_neon_vaddlv_s32(__p0); return __ret; } #else __ai int64_t vaddlv_s32(int32x2_t __p0) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlv_s32((int8x8_t)__rev0); + __ret = (int64_t) __builtin_neon_vaddlv_s32(__rev0); return __ret; } #endif @@ -48524,14 +41526,14 @@ __ai int64_t vaddlv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddlv_s16(int16x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlv_s16((int8x8_t)__p0); + __ret = (int32_t) __builtin_neon_vaddlv_s16(__p0); return __ret; } #else __ai int32_t vaddlv_s16(int16x4_t __p0) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlv_s16((int8x8_t)__rev0); + __ret = (int32_t) __builtin_neon_vaddlv_s16(__rev0); return __ret; } #endif @@ -48539,14 +41541,14 @@ __ai int32_t vaddlv_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint8_t vaddvq_u8(uint8x16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddvq_u8((int8x16_t)__p0); + __ret = (uint8_t) __builtin_neon_vaddvq_u8(__p0); return __ret; } #else __ai uint8_t vaddvq_u8(uint8x16_t __p0) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddvq_u8((int8x16_t)__rev0); + __ret = (uint8_t) __builtin_neon_vaddvq_u8(__rev0); return __ret; } #endif @@ -48554,14 +41556,14 @@ __ai uint8_t vaddvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddvq_u32(uint32x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddvq_u32((int8x16_t)__p0); + __ret = (uint32_t) __builtin_neon_vaddvq_u32(__p0); return __ret; } #else __ai uint32_t vaddvq_u32(uint32x4_t __p0) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddvq_u32((int8x16_t)__rev0); + __ret = (uint32_t) __builtin_neon_vaddvq_u32(__rev0); return __ret; } #endif @@ -48569,14 +41571,14 @@ __ai uint32_t vaddvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint64_t vaddvq_u64(uint64x2_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddvq_u64((int8x16_t)__p0); + __ret = (uint64_t) __builtin_neon_vaddvq_u64(__p0); return __ret; } #else __ai uint64_t vaddvq_u64(uint64x2_t __p0) { uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddvq_u64((int8x16_t)__rev0); + __ret = (uint64_t) __builtin_neon_vaddvq_u64(__rev0); return __ret; } #endif @@ -48584,14 +41586,14 @@ __ai uint64_t vaddvq_u64(uint64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddvq_u16(uint16x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddvq_u16((int8x16_t)__p0); + __ret = (uint16_t) __builtin_neon_vaddvq_u16(__p0); return __ret; } #else __ai uint16_t vaddvq_u16(uint16x8_t __p0) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddvq_u16((int8x16_t)__rev0); + __ret = (uint16_t) __builtin_neon_vaddvq_u16(__rev0); return __ret; } #endif @@ -48599,14 +41601,14 @@ __ai uint16_t vaddvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int8_t vaddvq_s8(int8x16_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddvq_s8((int8x16_t)__p0); + __ret = (int8_t) __builtin_neon_vaddvq_s8(__p0); return __ret; } #else __ai int8_t vaddvq_s8(int8x16_t __p0) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddvq_s8((int8x16_t)__rev0); + __ret = (int8_t) __builtin_neon_vaddvq_s8(__rev0); return __ret; } #endif @@ -48614,14 +41616,14 @@ __ai int8_t vaddvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vaddvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vaddvq_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vaddvq_f64(__p0); return __ret; } #else __ai float64_t vaddvq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vaddvq_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vaddvq_f64(__rev0); return __ret; } #endif @@ -48629,14 +41631,14 @@ __ai float64_t vaddvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vaddvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddvq_f32((int8x16_t)__p0); + __ret = (float32_t) __builtin_neon_vaddvq_f32(__p0); return __ret; } #else __ai float32_t vaddvq_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddvq_f32((int8x16_t)__rev0); + __ret = (float32_t) __builtin_neon_vaddvq_f32(__rev0); return __ret; } #endif @@ -48644,14 +41646,14 @@ __ai float32_t vaddvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddvq_s32(int32x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddvq_s32((int8x16_t)__p0); + __ret = (int32_t) __builtin_neon_vaddvq_s32(__p0); return __ret; } #else __ai int32_t vaddvq_s32(int32x4_t __p0) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddvq_s32((int8x16_t)__rev0); + __ret = (int32_t) __builtin_neon_vaddvq_s32(__rev0); return __ret; } #endif @@ -48659,14 +41661,14 @@ __ai int32_t vaddvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int64_t vaddvq_s64(int64x2_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddvq_s64((int8x16_t)__p0); + __ret = (int64_t) __builtin_neon_vaddvq_s64(__p0); return __ret; } #else __ai int64_t vaddvq_s64(int64x2_t __p0) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddvq_s64((int8x16_t)__rev0); + __ret = (int64_t) __builtin_neon_vaddvq_s64(__rev0); return __ret; } #endif @@ -48674,14 +41676,14 @@ __ai int64_t vaddvq_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddvq_s16(int16x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddvq_s16((int8x16_t)__p0); + __ret = (int16_t) __builtin_neon_vaddvq_s16(__p0); return __ret; } #else __ai int16_t vaddvq_s16(int16x8_t __p0) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddvq_s16((int8x16_t)__rev0); + __ret = (int16_t) __builtin_neon_vaddvq_s16(__rev0); return __ret; } #endif @@ -48689,14 +41691,14 @@ __ai int16_t vaddvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint8_t vaddv_u8(uint8x8_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddv_u8((int8x8_t)__p0); + __ret = (uint8_t) __builtin_neon_vaddv_u8(__p0); return __ret; } #else __ai uint8_t vaddv_u8(uint8x8_t __p0) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddv_u8((int8x8_t)__rev0); + __ret = (uint8_t) __builtin_neon_vaddv_u8(__rev0); return __ret; } #endif @@ -48704,14 +41706,14 @@ __ai uint8_t vaddv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vaddv_u32(uint32x2_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddv_u32((int8x8_t)__p0); + __ret = (uint32_t) __builtin_neon_vaddv_u32(__p0); return __ret; } #else __ai uint32_t vaddv_u32(uint32x2_t __p0) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddv_u32((int8x8_t)__rev0); + __ret = (uint32_t) __builtin_neon_vaddv_u32(__rev0); return __ret; } #endif @@ -48719,14 +41721,14 @@ __ai uint32_t vaddv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vaddv_u16(uint16x4_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddv_u16((int8x8_t)__p0); + __ret = (uint16_t) __builtin_neon_vaddv_u16(__p0); return __ret; } #else __ai uint16_t vaddv_u16(uint16x4_t __p0) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddv_u16((int8x8_t)__rev0); + __ret = (uint16_t) __builtin_neon_vaddv_u16(__rev0); return __ret; } #endif @@ -48734,14 +41736,14 @@ __ai uint16_t vaddv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int8_t vaddv_s8(int8x8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddv_s8((int8x8_t)__p0); + __ret = (int8_t) __builtin_neon_vaddv_s8(__p0); return __ret; } #else __ai int8_t vaddv_s8(int8x8_t __p0) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddv_s8((int8x8_t)__rev0); + __ret = (int8_t) __builtin_neon_vaddv_s8(__rev0); return __ret; } #endif @@ -48749,14 +41751,14 @@ __ai int8_t vaddv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vaddv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddv_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vaddv_f32(__p0); return __ret; } #else __ai float32_t vaddv_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddv_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vaddv_f32(__rev0); return __ret; } #endif @@ -48764,14 +41766,14 @@ __ai float32_t vaddv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vaddv_s32(int32x2_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddv_s32((int8x8_t)__p0); + __ret = (int32_t) __builtin_neon_vaddv_s32(__p0); return __ret; } #else __ai int32_t vaddv_s32(int32x2_t __p0) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddv_s32((int8x8_t)__rev0); + __ret = (int32_t) __builtin_neon_vaddv_s32(__rev0); return __ret; } #endif @@ -48779,32 +41781,23 @@ __ai int32_t vaddv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vaddv_s16(int16x4_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddv_s16((int8x8_t)__p0); + __ret = (int16_t) __builtin_neon_vaddv_s16(__p0); return __ret; } #else __ai int16_t vaddv_s16(int16x4_t __p0) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddv_s16((int8x8_t)__rev0); + __ret = (int16_t) __builtin_neon_vaddv_s16(__rev0); return __ret; } #endif -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { poly64x1_t __ret; __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); return __ret; } -#else -__ai poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { - poly64x1_t __ret; - __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { poly64x2_t __ret; @@ -48841,20 +41834,11 @@ __ai float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } -#else -__ai float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; @@ -48872,48 +41856,21 @@ __ai uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcages_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vcages_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; @@ -48931,48 +41888,21 @@ __ai uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; @@ -48990,48 +41920,21 @@ __ai uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcales_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vcales_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; @@ -49049,62 +41952,26 @@ __ai uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } -#else -__ai uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; @@ -49173,104 +42040,41 @@ __ai uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } -#else -__ai uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } -#else -__ai uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 == __p1); return __ret; } -#else -__ai uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vceqd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vceqd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vceqd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqd_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x8_t vceqz_p8(poly8x8_t __p0) { uint8x8_t __ret; @@ -49287,20 +42091,11 @@ __ai uint8x8_t vceqz_p8(poly8x8_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceqz_p64(poly64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vceqz_p64(poly64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vceqz_p16(poly16x4_t __p0) { uint16x4_t __ret; @@ -49557,20 +42352,11 @@ __ai uint32x2_t vceqz_u32(uint32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceqz_u64(uint64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vceqz_u64(uint64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vceqz_u16(uint16x4_t __p0) { uint16x4_t __ret; @@ -49603,20 +42389,11 @@ __ai uint8x8_t vceqz_s8(int8x8_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceqz_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vceqz_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vceqz_f32(float32x2_t __p0) { uint32x2_t __ret; @@ -49649,20 +42426,11 @@ __ai uint32x2_t vceqz_s32(int32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vceqz_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vceqz_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vceqz_s16(int16x4_t __p0) { uint16x4_t __ret; @@ -49679,62 +42447,26 @@ __ai uint16x4_t vceqz_s16(int16x4_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vceqzd_u64(uint64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); return __ret; } -#else -__ai uint64_t vceqzd_u64(uint64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vceqzd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vceqzd_s64(__p0); return __ret; } -#else -__ai int64_t vceqzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqzd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vceqzd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); return __ret; } -#else -__ai uint64_t vceqzd_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vceqzs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); return __ret; } -#else -__ai uint32_t vceqzs_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; @@ -49786,104 +42518,41 @@ __ai uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } -#else -__ai uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } -#else -__ai uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } -#else -__ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcged_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcged_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vcged_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcged_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcged_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcges_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vcges_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgezq_s8(int8x16_t __p0) { uint8x16_t __ret; @@ -49996,20 +42665,11 @@ __ai uint8x8_t vcgez_s8(int8x8_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcgez_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcgez_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgez_f32(float32x2_t __p0) { uint32x2_t __ret; @@ -50042,20 +42702,11 @@ __ai uint32x2_t vcgez_s32(int32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcgez_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcgez_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcgez_s16(int16x4_t __p0) { uint16x4_t __ret; @@ -50072,48 +42723,21 @@ __ai uint16x4_t vcgez_s16(int16x4_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcgezd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcgezd_s64(__p0); return __ret; } -#else -__ai int64_t vcgezd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgezd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcgezd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); return __ret; } -#else -__ai uint64_t vcgezd_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcgezs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); return __ret; } -#else -__ai uint32_t vcgezs_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; @@ -50165,104 +42789,41 @@ __ai uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 > __p1); return __ret; } -#else -__ai uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 > __p1); return __ret; } -#else -__ai uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 > __p1); return __ret; } -#else -__ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcgtd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcgtd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vcgtd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtd_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcgtzq_s8(int8x16_t __p0) { uint8x16_t __ret; @@ -50375,20 +42936,11 @@ __ai uint8x8_t vcgtz_s8(int8x8_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcgtz_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcgtz_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcgtz_f32(float32x2_t __p0) { uint32x2_t __ret; @@ -50421,20 +42973,11 @@ __ai uint32x2_t vcgtz_s32(int32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcgtz_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcgtz_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcgtz_s16(int16x4_t __p0) { uint16x4_t __ret; @@ -50451,48 +42994,21 @@ __ai uint16x4_t vcgtz_s16(int16x4_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcgtzd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcgtzd_s64(__p0); return __ret; } -#else -__ai int64_t vcgtzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtzd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcgtzd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); return __ret; } -#else -__ai uint64_t vcgtzd_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcgtzs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); return __ret; } -#else -__ai uint32_t vcgtzs_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; @@ -50544,104 +43060,41 @@ __ai uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } -#else -__ai uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } -#else -__ai uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 <= __p1); return __ret; } -#else -__ai uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcled_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcled_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vcled_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcled_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcles_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vcles_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vclezq_s8(int8x16_t __p0) { uint8x16_t __ret; @@ -50754,20 +43207,11 @@ __ai uint8x8_t vclez_s8(int8x8_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vclez_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vclez_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vclez_f32(float32x2_t __p0) { uint32x2_t __ret; @@ -50800,20 +43244,11 @@ __ai uint32x2_t vclez_s32(int32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vclez_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vclez_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vclez_s16(int16x4_t __p0) { uint16x4_t __ret; @@ -50830,48 +43265,21 @@ __ai uint16x4_t vclez_s16(int16x4_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64_t vclezd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vclezd_s64(__p0); return __ret; } -#else -__ai int64_t vclezd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vclezd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vclezd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); return __ret; } -#else -__ai uint64_t vclezd_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vclezs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); return __ret; } -#else -__ai uint32_t vclezs_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; @@ -50923,104 +43331,41 @@ __ai uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 < __p1); return __ret; } -#else -__ai uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 < __p1); return __ret; } -#else -__ai uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0 < __p1); return __ret; } -#else -__ai uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcltd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcltd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vcltd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltd_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); return __ret; } -#else -__ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vclts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); return __ret; } -#else -__ai uint32_t vclts_f32(float32_t __p0, float32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vcltzq_s8(int8x16_t __p0) { uint8x16_t __ret; @@ -51133,20 +43478,11 @@ __ai uint8x8_t vcltz_s8(int8x8_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcltz_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcltz_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint32x2_t vcltz_f32(float32x2_t __p0) { uint32x2_t __ret; @@ -51179,20 +43515,11 @@ __ai uint32x2_t vcltz_s32(int32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcltz_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcltz_s64(int64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vcltz_s16(int16x4_t __p0) { uint16x4_t __ret; @@ -51209,48 +43536,21 @@ __ai uint16x4_t vcltz_s16(int16x4_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcltzd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcltzd_s64(__p0); return __ret; } -#else -__ai int64_t vcltzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltzd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcltzd_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); return __ret; } -#else -__ai uint64_t vcltzd_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcltzs_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); return __ret; } -#else -__ai uint32_t vcltzs_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { poly64x2_t __ret; @@ -51379,7 +43679,7 @@ __ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __s2_21 = __p2_21; \ uint64x2_t __rev0_21; __rev0_21 = __builtin_shufflevector(__s0_21, __s0_21, 1, 0); \ uint64x2_t __ret_21; \ - __ret_21 = __noswap_vsetq_lane_u64(__noswap_vget_lane_u64(__s2_21, __p3_21), __rev0_21, __p1_21); \ + __ret_21 = __noswap_vsetq_lane_u64(vget_lane_u64(__s2_21, __p3_21), __rev0_21, __p1_21); \ __ret_21 = __builtin_shufflevector(__ret_21, __ret_21, 1, 0); \ __ret_21; \ }) @@ -51483,7 +43783,7 @@ __ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { int64x1_t __s2_31 = __p2_31; \ int64x2_t __rev0_31; __rev0_31 = __builtin_shufflevector(__s0_31, __s0_31, 1, 0); \ int64x2_t __ret_31; \ - __ret_31 = __noswap_vsetq_lane_s64(__noswap_vget_lane_s64(__s2_31, __p3_31), __rev0_31, __p1_31); \ + __ret_31 = __noswap_vsetq_lane_s64(vget_lane_s64(__s2_31, __p3_31), __rev0_31, __p1_31); \ __ret_31 = __builtin_shufflevector(__ret_31, __ret_31, 1, 0); \ __ret_31; \ }) @@ -51594,7 +43894,6 @@ __ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vcopy_lane_u64(__p0_42, __p1_42, __p2_42, __p3_42) __extension__ ({ \ uint64x1_t __s0_42 = __p0_42; \ uint64x1_t __s2_42 = __p2_42; \ @@ -51602,653 +43901,598 @@ __ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { __ret_42 = vset_lane_u64(vget_lane_u64(__s2_42, __p3_42), __s0_42, __p1_42); \ __ret_42; \ }) -#else -#define vcopy_lane_u64(__p0_43, __p1_43, __p2_43, __p3_43) __extension__ ({ \ - uint64x1_t __s0_43 = __p0_43; \ - uint64x1_t __s2_43 = __p2_43; \ - uint64x1_t __ret_43; \ - __ret_43 = __noswap_vset_lane_u64(__noswap_vget_lane_u64(__s2_43, __p3_43), __s0_43, __p1_43); \ +#ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_u16(__p0_43, __p1_43, __p2_43, __p3_43) __extension__ ({ \ + uint16x4_t __s0_43 = __p0_43; \ + uint16x4_t __s2_43 = __p2_43; \ + uint16x4_t __ret_43; \ + __ret_43 = vset_lane_u16(vget_lane_u16(__s2_43, __p3_43), __s0_43, __p1_43); \ __ret_43; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ +#else #define vcopy_lane_u16(__p0_44, __p1_44, __p2_44, __p3_44) __extension__ ({ \ uint16x4_t __s0_44 = __p0_44; \ uint16x4_t __s2_44 = __p2_44; \ + uint16x4_t __rev0_44; __rev0_44 = __builtin_shufflevector(__s0_44, __s0_44, 3, 2, 1, 0); \ + uint16x4_t __rev2_44; __rev2_44 = __builtin_shufflevector(__s2_44, __s2_44, 3, 2, 1, 0); \ uint16x4_t __ret_44; \ - __ret_44 = vset_lane_u16(vget_lane_u16(__s2_44, __p3_44), __s0_44, __p1_44); \ + __ret_44 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_44, __p3_44), __rev0_44, __p1_44); \ + __ret_44 = __builtin_shufflevector(__ret_44, __ret_44, 3, 2, 1, 0); \ __ret_44; \ }) -#else -#define vcopy_lane_u16(__p0_45, __p1_45, __p2_45, __p3_45) __extension__ ({ \ - uint16x4_t __s0_45 = __p0_45; \ - uint16x4_t __s2_45 = __p2_45; \ - uint16x4_t __rev0_45; __rev0_45 = __builtin_shufflevector(__s0_45, __s0_45, 3, 2, 1, 0); \ - uint16x4_t __rev2_45; __rev2_45 = __builtin_shufflevector(__s2_45, __s2_45, 3, 2, 1, 0); \ - uint16x4_t __ret_45; \ - __ret_45 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_45, __p3_45), __rev0_45, __p1_45); \ - __ret_45 = __builtin_shufflevector(__ret_45, __ret_45, 3, 2, 1, 0); \ - __ret_45; \ -}) #endif #ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_s8(__p0_45, __p1_45, __p2_45, __p3_45) __extension__ ({ \ + int8x8_t __s0_45 = __p0_45; \ + int8x8_t __s2_45 = __p2_45; \ + int8x8_t __ret_45; \ + __ret_45 = vset_lane_s8(vget_lane_s8(__s2_45, __p3_45), __s0_45, __p1_45); \ + __ret_45; \ +}) +#else #define vcopy_lane_s8(__p0_46, __p1_46, __p2_46, __p3_46) __extension__ ({ \ int8x8_t __s0_46 = __p0_46; \ int8x8_t __s2_46 = __p2_46; \ + int8x8_t __rev0_46; __rev0_46 = __builtin_shufflevector(__s0_46, __s0_46, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_46; __rev2_46 = __builtin_shufflevector(__s2_46, __s2_46, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x8_t __ret_46; \ - __ret_46 = vset_lane_s8(vget_lane_s8(__s2_46, __p3_46), __s0_46, __p1_46); \ + __ret_46 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_46, __p3_46), __rev0_46, __p1_46); \ + __ret_46 = __builtin_shufflevector(__ret_46, __ret_46, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_46; \ }) -#else -#define vcopy_lane_s8(__p0_47, __p1_47, __p2_47, __p3_47) __extension__ ({ \ - int8x8_t __s0_47 = __p0_47; \ - int8x8_t __s2_47 = __p2_47; \ - int8x8_t __rev0_47; __rev0_47 = __builtin_shufflevector(__s0_47, __s0_47, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_47; __rev2_47 = __builtin_shufflevector(__s2_47, __s2_47, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret_47; \ - __ret_47 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_47, __p3_47), __rev0_47, __p1_47); \ - __ret_47 = __builtin_shufflevector(__ret_47, __ret_47, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_47; \ -}) #endif #ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_f32(__p0_47, __p1_47, __p2_47, __p3_47) __extension__ ({ \ + float32x2_t __s0_47 = __p0_47; \ + float32x2_t __s2_47 = __p2_47; \ + float32x2_t __ret_47; \ + __ret_47 = vset_lane_f32(vget_lane_f32(__s2_47, __p3_47), __s0_47, __p1_47); \ + __ret_47; \ +}) +#else #define vcopy_lane_f32(__p0_48, __p1_48, __p2_48, __p3_48) __extension__ ({ \ float32x2_t __s0_48 = __p0_48; \ float32x2_t __s2_48 = __p2_48; \ + float32x2_t __rev0_48; __rev0_48 = __builtin_shufflevector(__s0_48, __s0_48, 1, 0); \ + float32x2_t __rev2_48; __rev2_48 = __builtin_shufflevector(__s2_48, __s2_48, 1, 0); \ float32x2_t __ret_48; \ - __ret_48 = vset_lane_f32(vget_lane_f32(__s2_48, __p3_48), __s0_48, __p1_48); \ + __ret_48 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_48, __p3_48), __rev0_48, __p1_48); \ + __ret_48 = __builtin_shufflevector(__ret_48, __ret_48, 1, 0); \ __ret_48; \ }) -#else -#define vcopy_lane_f32(__p0_49, __p1_49, __p2_49, __p3_49) __extension__ ({ \ - float32x2_t __s0_49 = __p0_49; \ - float32x2_t __s2_49 = __p2_49; \ - float32x2_t __rev0_49; __rev0_49 = __builtin_shufflevector(__s0_49, __s0_49, 1, 0); \ - float32x2_t __rev2_49; __rev2_49 = __builtin_shufflevector(__s2_49, __s2_49, 1, 0); \ - float32x2_t __ret_49; \ - __ret_49 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_49, __p3_49), __rev0_49, __p1_49); \ - __ret_49 = __builtin_shufflevector(__ret_49, __ret_49, 1, 0); \ - __ret_49; \ -}) #endif #ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_s32(__p0_49, __p1_49, __p2_49, __p3_49) __extension__ ({ \ + int32x2_t __s0_49 = __p0_49; \ + int32x2_t __s2_49 = __p2_49; \ + int32x2_t __ret_49; \ + __ret_49 = vset_lane_s32(vget_lane_s32(__s2_49, __p3_49), __s0_49, __p1_49); \ + __ret_49; \ +}) +#else #define vcopy_lane_s32(__p0_50, __p1_50, __p2_50, __p3_50) __extension__ ({ \ int32x2_t __s0_50 = __p0_50; \ int32x2_t __s2_50 = __p2_50; \ + int32x2_t __rev0_50; __rev0_50 = __builtin_shufflevector(__s0_50, __s0_50, 1, 0); \ + int32x2_t __rev2_50; __rev2_50 = __builtin_shufflevector(__s2_50, __s2_50, 1, 0); \ int32x2_t __ret_50; \ - __ret_50 = vset_lane_s32(vget_lane_s32(__s2_50, __p3_50), __s0_50, __p1_50); \ + __ret_50 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_50, __p3_50), __rev0_50, __p1_50); \ + __ret_50 = __builtin_shufflevector(__ret_50, __ret_50, 1, 0); \ __ret_50; \ }) -#else -#define vcopy_lane_s32(__p0_51, __p1_51, __p2_51, __p3_51) __extension__ ({ \ - int32x2_t __s0_51 = __p0_51; \ - int32x2_t __s2_51 = __p2_51; \ - int32x2_t __rev0_51; __rev0_51 = __builtin_shufflevector(__s0_51, __s0_51, 1, 0); \ - int32x2_t __rev2_51; __rev2_51 = __builtin_shufflevector(__s2_51, __s2_51, 1, 0); \ - int32x2_t __ret_51; \ - __ret_51 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_51, __p3_51), __rev0_51, __p1_51); \ - __ret_51 = __builtin_shufflevector(__ret_51, __ret_51, 1, 0); \ - __ret_51; \ -}) #endif +#define vcopy_lane_s64(__p0_51, __p1_51, __p2_51, __p3_51) __extension__ ({ \ + int64x1_t __s0_51 = __p0_51; \ + int64x1_t __s2_51 = __p2_51; \ + int64x1_t __ret_51; \ + __ret_51 = vset_lane_s64(vget_lane_s64(__s2_51, __p3_51), __s0_51, __p1_51); \ + __ret_51; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s64(__p0_52, __p1_52, __p2_52, __p3_52) __extension__ ({ \ - int64x1_t __s0_52 = __p0_52; \ - int64x1_t __s2_52 = __p2_52; \ - int64x1_t __ret_52; \ - __ret_52 = vset_lane_s64(vget_lane_s64(__s2_52, __p3_52), __s0_52, __p1_52); \ +#define vcopy_lane_s16(__p0_52, __p1_52, __p2_52, __p3_52) __extension__ ({ \ + int16x4_t __s0_52 = __p0_52; \ + int16x4_t __s2_52 = __p2_52; \ + int16x4_t __ret_52; \ + __ret_52 = vset_lane_s16(vget_lane_s16(__s2_52, __p3_52), __s0_52, __p1_52); \ __ret_52; \ }) #else -#define vcopy_lane_s64(__p0_53, __p1_53, __p2_53, __p3_53) __extension__ ({ \ - int64x1_t __s0_53 = __p0_53; \ - int64x1_t __s2_53 = __p2_53; \ - int64x1_t __ret_53; \ - __ret_53 = __noswap_vset_lane_s64(__noswap_vget_lane_s64(__s2_53, __p3_53), __s0_53, __p1_53); \ +#define vcopy_lane_s16(__p0_53, __p1_53, __p2_53, __p3_53) __extension__ ({ \ + int16x4_t __s0_53 = __p0_53; \ + int16x4_t __s2_53 = __p2_53; \ + int16x4_t __rev0_53; __rev0_53 = __builtin_shufflevector(__s0_53, __s0_53, 3, 2, 1, 0); \ + int16x4_t __rev2_53; __rev2_53 = __builtin_shufflevector(__s2_53, __s2_53, 3, 2, 1, 0); \ + int16x4_t __ret_53; \ + __ret_53 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_53, __p3_53), __rev0_53, __p1_53); \ + __ret_53 = __builtin_shufflevector(__ret_53, __ret_53, 3, 2, 1, 0); \ __ret_53; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s16(__p0_54, __p1_54, __p2_54, __p3_54) __extension__ ({ \ - int16x4_t __s0_54 = __p0_54; \ - int16x4_t __s2_54 = __p2_54; \ - int16x4_t __ret_54; \ - __ret_54 = vset_lane_s16(vget_lane_s16(__s2_54, __p3_54), __s0_54, __p1_54); \ +#define vcopyq_laneq_p8(__p0_54, __p1_54, __p2_54, __p3_54) __extension__ ({ \ + poly8x16_t __s0_54 = __p0_54; \ + poly8x16_t __s2_54 = __p2_54; \ + poly8x16_t __ret_54; \ + __ret_54 = vsetq_lane_p8(vgetq_lane_p8(__s2_54, __p3_54), __s0_54, __p1_54); \ __ret_54; \ }) #else -#define vcopy_lane_s16(__p0_55, __p1_55, __p2_55, __p3_55) __extension__ ({ \ - int16x4_t __s0_55 = __p0_55; \ - int16x4_t __s2_55 = __p2_55; \ - int16x4_t __rev0_55; __rev0_55 = __builtin_shufflevector(__s0_55, __s0_55, 3, 2, 1, 0); \ - int16x4_t __rev2_55; __rev2_55 = __builtin_shufflevector(__s2_55, __s2_55, 3, 2, 1, 0); \ - int16x4_t __ret_55; \ - __ret_55 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_55, __p3_55), __rev0_55, __p1_55); \ - __ret_55 = __builtin_shufflevector(__ret_55, __ret_55, 3, 2, 1, 0); \ +#define vcopyq_laneq_p8(__p0_55, __p1_55, __p2_55, __p3_55) __extension__ ({ \ + poly8x16_t __s0_55 = __p0_55; \ + poly8x16_t __s2_55 = __p2_55; \ + poly8x16_t __rev0_55; __rev0_55 = __builtin_shufflevector(__s0_55, __s0_55, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev2_55; __rev2_55 = __builtin_shufflevector(__s2_55, __s2_55, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret_55; \ + __ret_55 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_55, __p3_55), __rev0_55, __p1_55); \ + __ret_55 = __builtin_shufflevector(__ret_55, __ret_55, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_55; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p8(__p0_56, __p1_56, __p2_56, __p3_56) __extension__ ({ \ - poly8x16_t __s0_56 = __p0_56; \ - poly8x16_t __s2_56 = __p2_56; \ - poly8x16_t __ret_56; \ - __ret_56 = vsetq_lane_p8(vgetq_lane_p8(__s2_56, __p3_56), __s0_56, __p1_56); \ +#define vcopyq_laneq_p16(__p0_56, __p1_56, __p2_56, __p3_56) __extension__ ({ \ + poly16x8_t __s0_56 = __p0_56; \ + poly16x8_t __s2_56 = __p2_56; \ + poly16x8_t __ret_56; \ + __ret_56 = vsetq_lane_p16(vgetq_lane_p16(__s2_56, __p3_56), __s0_56, __p1_56); \ __ret_56; \ }) #else -#define vcopyq_laneq_p8(__p0_57, __p1_57, __p2_57, __p3_57) __extension__ ({ \ - poly8x16_t __s0_57 = __p0_57; \ - poly8x16_t __s2_57 = __p2_57; \ - poly8x16_t __rev0_57; __rev0_57 = __builtin_shufflevector(__s0_57, __s0_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_57; __rev2_57 = __builtin_shufflevector(__s2_57, __s2_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret_57; \ - __ret_57 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_57, __p3_57), __rev0_57, __p1_57); \ - __ret_57 = __builtin_shufflevector(__ret_57, __ret_57, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_p16(__p0_57, __p1_57, __p2_57, __p3_57) __extension__ ({ \ + poly16x8_t __s0_57 = __p0_57; \ + poly16x8_t __s2_57 = __p2_57; \ + poly16x8_t __rev0_57; __rev0_57 = __builtin_shufflevector(__s0_57, __s0_57, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev2_57; __rev2_57 = __builtin_shufflevector(__s2_57, __s2_57, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret_57; \ + __ret_57 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_57, __p3_57), __rev0_57, __p1_57); \ + __ret_57 = __builtin_shufflevector(__ret_57, __ret_57, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_57; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p16(__p0_58, __p1_58, __p2_58, __p3_58) __extension__ ({ \ - poly16x8_t __s0_58 = __p0_58; \ - poly16x8_t __s2_58 = __p2_58; \ - poly16x8_t __ret_58; \ - __ret_58 = vsetq_lane_p16(vgetq_lane_p16(__s2_58, __p3_58), __s0_58, __p1_58); \ +#define vcopyq_laneq_u8(__p0_58, __p1_58, __p2_58, __p3_58) __extension__ ({ \ + uint8x16_t __s0_58 = __p0_58; \ + uint8x16_t __s2_58 = __p2_58; \ + uint8x16_t __ret_58; \ + __ret_58 = vsetq_lane_u8(vgetq_lane_u8(__s2_58, __p3_58), __s0_58, __p1_58); \ __ret_58; \ }) #else -#define vcopyq_laneq_p16(__p0_59, __p1_59, __p2_59, __p3_59) __extension__ ({ \ - poly16x8_t __s0_59 = __p0_59; \ - poly16x8_t __s2_59 = __p2_59; \ - poly16x8_t __rev0_59; __rev0_59 = __builtin_shufflevector(__s0_59, __s0_59, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev2_59; __rev2_59 = __builtin_shufflevector(__s2_59, __s2_59, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret_59; \ - __ret_59 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_59, __p3_59), __rev0_59, __p1_59); \ - __ret_59 = __builtin_shufflevector(__ret_59, __ret_59, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_u8(__p0_59, __p1_59, __p2_59, __p3_59) __extension__ ({ \ + uint8x16_t __s0_59 = __p0_59; \ + uint8x16_t __s2_59 = __p2_59; \ + uint8x16_t __rev0_59; __rev0_59 = __builtin_shufflevector(__s0_59, __s0_59, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_59; __rev2_59 = __builtin_shufflevector(__s2_59, __s2_59, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_59; \ + __ret_59 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_59, __p3_59), __rev0_59, __p1_59); \ + __ret_59 = __builtin_shufflevector(__ret_59, __ret_59, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_59; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u8(__p0_60, __p1_60, __p2_60, __p3_60) __extension__ ({ \ - uint8x16_t __s0_60 = __p0_60; \ - uint8x16_t __s2_60 = __p2_60; \ - uint8x16_t __ret_60; \ - __ret_60 = vsetq_lane_u8(vgetq_lane_u8(__s2_60, __p3_60), __s0_60, __p1_60); \ +#define vcopyq_laneq_u32(__p0_60, __p1_60, __p2_60, __p3_60) __extension__ ({ \ + uint32x4_t __s0_60 = __p0_60; \ + uint32x4_t __s2_60 = __p2_60; \ + uint32x4_t __ret_60; \ + __ret_60 = vsetq_lane_u32(vgetq_lane_u32(__s2_60, __p3_60), __s0_60, __p1_60); \ __ret_60; \ }) #else -#define vcopyq_laneq_u8(__p0_61, __p1_61, __p2_61, __p3_61) __extension__ ({ \ - uint8x16_t __s0_61 = __p0_61; \ - uint8x16_t __s2_61 = __p2_61; \ - uint8x16_t __rev0_61; __rev0_61 = __builtin_shufflevector(__s0_61, __s0_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_61; __rev2_61 = __builtin_shufflevector(__s2_61, __s2_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_61; \ - __ret_61 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_61, __p3_61), __rev0_61, __p1_61); \ - __ret_61 = __builtin_shufflevector(__ret_61, __ret_61, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_u32(__p0_61, __p1_61, __p2_61, __p3_61) __extension__ ({ \ + uint32x4_t __s0_61 = __p0_61; \ + uint32x4_t __s2_61 = __p2_61; \ + uint32x4_t __rev0_61; __rev0_61 = __builtin_shufflevector(__s0_61, __s0_61, 3, 2, 1, 0); \ + uint32x4_t __rev2_61; __rev2_61 = __builtin_shufflevector(__s2_61, __s2_61, 3, 2, 1, 0); \ + uint32x4_t __ret_61; \ + __ret_61 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_61, __p3_61), __rev0_61, __p1_61); \ + __ret_61 = __builtin_shufflevector(__ret_61, __ret_61, 3, 2, 1, 0); \ __ret_61; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u32(__p0_62, __p1_62, __p2_62, __p3_62) __extension__ ({ \ - uint32x4_t __s0_62 = __p0_62; \ - uint32x4_t __s2_62 = __p2_62; \ - uint32x4_t __ret_62; \ - __ret_62 = vsetq_lane_u32(vgetq_lane_u32(__s2_62, __p3_62), __s0_62, __p1_62); \ +#define vcopyq_laneq_u64(__p0_62, __p1_62, __p2_62, __p3_62) __extension__ ({ \ + uint64x2_t __s0_62 = __p0_62; \ + uint64x2_t __s2_62 = __p2_62; \ + uint64x2_t __ret_62; \ + __ret_62 = vsetq_lane_u64(vgetq_lane_u64(__s2_62, __p3_62), __s0_62, __p1_62); \ __ret_62; \ }) #else -#define vcopyq_laneq_u32(__p0_63, __p1_63, __p2_63, __p3_63) __extension__ ({ \ - uint32x4_t __s0_63 = __p0_63; \ - uint32x4_t __s2_63 = __p2_63; \ - uint32x4_t __rev0_63; __rev0_63 = __builtin_shufflevector(__s0_63, __s0_63, 3, 2, 1, 0); \ - uint32x4_t __rev2_63; __rev2_63 = __builtin_shufflevector(__s2_63, __s2_63, 3, 2, 1, 0); \ - uint32x4_t __ret_63; \ - __ret_63 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_63, __p3_63), __rev0_63, __p1_63); \ - __ret_63 = __builtin_shufflevector(__ret_63, __ret_63, 3, 2, 1, 0); \ +#define vcopyq_laneq_u64(__p0_63, __p1_63, __p2_63, __p3_63) __extension__ ({ \ + uint64x2_t __s0_63 = __p0_63; \ + uint64x2_t __s2_63 = __p2_63; \ + uint64x2_t __rev0_63; __rev0_63 = __builtin_shufflevector(__s0_63, __s0_63, 1, 0); \ + uint64x2_t __rev2_63; __rev2_63 = __builtin_shufflevector(__s2_63, __s2_63, 1, 0); \ + uint64x2_t __ret_63; \ + __ret_63 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_63, __p3_63), __rev0_63, __p1_63); \ + __ret_63 = __builtin_shufflevector(__ret_63, __ret_63, 1, 0); \ __ret_63; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u64(__p0_64, __p1_64, __p2_64, __p3_64) __extension__ ({ \ - uint64x2_t __s0_64 = __p0_64; \ - uint64x2_t __s2_64 = __p2_64; \ - uint64x2_t __ret_64; \ - __ret_64 = vsetq_lane_u64(vgetq_lane_u64(__s2_64, __p3_64), __s0_64, __p1_64); \ +#define vcopyq_laneq_u16(__p0_64, __p1_64, __p2_64, __p3_64) __extension__ ({ \ + uint16x8_t __s0_64 = __p0_64; \ + uint16x8_t __s2_64 = __p2_64; \ + uint16x8_t __ret_64; \ + __ret_64 = vsetq_lane_u16(vgetq_lane_u16(__s2_64, __p3_64), __s0_64, __p1_64); \ __ret_64; \ }) #else -#define vcopyq_laneq_u64(__p0_65, __p1_65, __p2_65, __p3_65) __extension__ ({ \ - uint64x2_t __s0_65 = __p0_65; \ - uint64x2_t __s2_65 = __p2_65; \ - uint64x2_t __rev0_65; __rev0_65 = __builtin_shufflevector(__s0_65, __s0_65, 1, 0); \ - uint64x2_t __rev2_65; __rev2_65 = __builtin_shufflevector(__s2_65, __s2_65, 1, 0); \ - uint64x2_t __ret_65; \ - __ret_65 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_65, __p3_65), __rev0_65, __p1_65); \ - __ret_65 = __builtin_shufflevector(__ret_65, __ret_65, 1, 0); \ +#define vcopyq_laneq_u16(__p0_65, __p1_65, __p2_65, __p3_65) __extension__ ({ \ + uint16x8_t __s0_65 = __p0_65; \ + uint16x8_t __s2_65 = __p2_65; \ + uint16x8_t __rev0_65; __rev0_65 = __builtin_shufflevector(__s0_65, __s0_65, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2_65; __rev2_65 = __builtin_shufflevector(__s2_65, __s2_65, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_65; \ + __ret_65 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_65, __p3_65), __rev0_65, __p1_65); \ + __ret_65 = __builtin_shufflevector(__ret_65, __ret_65, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_65; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u16(__p0_66, __p1_66, __p2_66, __p3_66) __extension__ ({ \ - uint16x8_t __s0_66 = __p0_66; \ - uint16x8_t __s2_66 = __p2_66; \ - uint16x8_t __ret_66; \ - __ret_66 = vsetq_lane_u16(vgetq_lane_u16(__s2_66, __p3_66), __s0_66, __p1_66); \ +#define vcopyq_laneq_s8(__p0_66, __p1_66, __p2_66, __p3_66) __extension__ ({ \ + int8x16_t __s0_66 = __p0_66; \ + int8x16_t __s2_66 = __p2_66; \ + int8x16_t __ret_66; \ + __ret_66 = vsetq_lane_s8(vgetq_lane_s8(__s2_66, __p3_66), __s0_66, __p1_66); \ __ret_66; \ }) #else -#define vcopyq_laneq_u16(__p0_67, __p1_67, __p2_67, __p3_67) __extension__ ({ \ - uint16x8_t __s0_67 = __p0_67; \ - uint16x8_t __s2_67 = __p2_67; \ - uint16x8_t __rev0_67; __rev0_67 = __builtin_shufflevector(__s0_67, __s0_67, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_67; __rev2_67 = __builtin_shufflevector(__s2_67, __s2_67, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_67; \ - __ret_67 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_67, __p3_67), __rev0_67, __p1_67); \ - __ret_67 = __builtin_shufflevector(__ret_67, __ret_67, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_s8(__p0_67, __p1_67, __p2_67, __p3_67) __extension__ ({ \ + int8x16_t __s0_67 = __p0_67; \ + int8x16_t __s2_67 = __p2_67; \ + int8x16_t __rev0_67; __rev0_67 = __builtin_shufflevector(__s0_67, __s0_67, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_67; __rev2_67 = __builtin_shufflevector(__s2_67, __s2_67, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_67; \ + __ret_67 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_67, __p3_67), __rev0_67, __p1_67); \ + __ret_67 = __builtin_shufflevector(__ret_67, __ret_67, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_67; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s8(__p0_68, __p1_68, __p2_68, __p3_68) __extension__ ({ \ - int8x16_t __s0_68 = __p0_68; \ - int8x16_t __s2_68 = __p2_68; \ - int8x16_t __ret_68; \ - __ret_68 = vsetq_lane_s8(vgetq_lane_s8(__s2_68, __p3_68), __s0_68, __p1_68); \ +#define vcopyq_laneq_f32(__p0_68, __p1_68, __p2_68, __p3_68) __extension__ ({ \ + float32x4_t __s0_68 = __p0_68; \ + float32x4_t __s2_68 = __p2_68; \ + float32x4_t __ret_68; \ + __ret_68 = vsetq_lane_f32(vgetq_lane_f32(__s2_68, __p3_68), __s0_68, __p1_68); \ __ret_68; \ }) #else -#define vcopyq_laneq_s8(__p0_69, __p1_69, __p2_69, __p3_69) __extension__ ({ \ - int8x16_t __s0_69 = __p0_69; \ - int8x16_t __s2_69 = __p2_69; \ - int8x16_t __rev0_69; __rev0_69 = __builtin_shufflevector(__s0_69, __s0_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_69; __rev2_69 = __builtin_shufflevector(__s2_69, __s2_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_69; \ - __ret_69 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_69, __p3_69), __rev0_69, __p1_69); \ - __ret_69 = __builtin_shufflevector(__ret_69, __ret_69, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_f32(__p0_69, __p1_69, __p2_69, __p3_69) __extension__ ({ \ + float32x4_t __s0_69 = __p0_69; \ + float32x4_t __s2_69 = __p2_69; \ + float32x4_t __rev0_69; __rev0_69 = __builtin_shufflevector(__s0_69, __s0_69, 3, 2, 1, 0); \ + float32x4_t __rev2_69; __rev2_69 = __builtin_shufflevector(__s2_69, __s2_69, 3, 2, 1, 0); \ + float32x4_t __ret_69; \ + __ret_69 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_69, __p3_69), __rev0_69, __p1_69); \ + __ret_69 = __builtin_shufflevector(__ret_69, __ret_69, 3, 2, 1, 0); \ __ret_69; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f32(__p0_70, __p1_70, __p2_70, __p3_70) __extension__ ({ \ - float32x4_t __s0_70 = __p0_70; \ - float32x4_t __s2_70 = __p2_70; \ - float32x4_t __ret_70; \ - __ret_70 = vsetq_lane_f32(vgetq_lane_f32(__s2_70, __p3_70), __s0_70, __p1_70); \ +#define vcopyq_laneq_s32(__p0_70, __p1_70, __p2_70, __p3_70) __extension__ ({ \ + int32x4_t __s0_70 = __p0_70; \ + int32x4_t __s2_70 = __p2_70; \ + int32x4_t __ret_70; \ + __ret_70 = vsetq_lane_s32(vgetq_lane_s32(__s2_70, __p3_70), __s0_70, __p1_70); \ __ret_70; \ }) #else -#define vcopyq_laneq_f32(__p0_71, __p1_71, __p2_71, __p3_71) __extension__ ({ \ - float32x4_t __s0_71 = __p0_71; \ - float32x4_t __s2_71 = __p2_71; \ - float32x4_t __rev0_71; __rev0_71 = __builtin_shufflevector(__s0_71, __s0_71, 3, 2, 1, 0); \ - float32x4_t __rev2_71; __rev2_71 = __builtin_shufflevector(__s2_71, __s2_71, 3, 2, 1, 0); \ - float32x4_t __ret_71; \ - __ret_71 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_71, __p3_71), __rev0_71, __p1_71); \ +#define vcopyq_laneq_s32(__p0_71, __p1_71, __p2_71, __p3_71) __extension__ ({ \ + int32x4_t __s0_71 = __p0_71; \ + int32x4_t __s2_71 = __p2_71; \ + int32x4_t __rev0_71; __rev0_71 = __builtin_shufflevector(__s0_71, __s0_71, 3, 2, 1, 0); \ + int32x4_t __rev2_71; __rev2_71 = __builtin_shufflevector(__s2_71, __s2_71, 3, 2, 1, 0); \ + int32x4_t __ret_71; \ + __ret_71 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_71, __p3_71), __rev0_71, __p1_71); \ __ret_71 = __builtin_shufflevector(__ret_71, __ret_71, 3, 2, 1, 0); \ __ret_71; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s32(__p0_72, __p1_72, __p2_72, __p3_72) __extension__ ({ \ - int32x4_t __s0_72 = __p0_72; \ - int32x4_t __s2_72 = __p2_72; \ - int32x4_t __ret_72; \ - __ret_72 = vsetq_lane_s32(vgetq_lane_s32(__s2_72, __p3_72), __s0_72, __p1_72); \ +#define vcopyq_laneq_s64(__p0_72, __p1_72, __p2_72, __p3_72) __extension__ ({ \ + int64x2_t __s0_72 = __p0_72; \ + int64x2_t __s2_72 = __p2_72; \ + int64x2_t __ret_72; \ + __ret_72 = vsetq_lane_s64(vgetq_lane_s64(__s2_72, __p3_72), __s0_72, __p1_72); \ __ret_72; \ }) #else -#define vcopyq_laneq_s32(__p0_73, __p1_73, __p2_73, __p3_73) __extension__ ({ \ - int32x4_t __s0_73 = __p0_73; \ - int32x4_t __s2_73 = __p2_73; \ - int32x4_t __rev0_73; __rev0_73 = __builtin_shufflevector(__s0_73, __s0_73, 3, 2, 1, 0); \ - int32x4_t __rev2_73; __rev2_73 = __builtin_shufflevector(__s2_73, __s2_73, 3, 2, 1, 0); \ - int32x4_t __ret_73; \ - __ret_73 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_73, __p3_73), __rev0_73, __p1_73); \ - __ret_73 = __builtin_shufflevector(__ret_73, __ret_73, 3, 2, 1, 0); \ +#define vcopyq_laneq_s64(__p0_73, __p1_73, __p2_73, __p3_73) __extension__ ({ \ + int64x2_t __s0_73 = __p0_73; \ + int64x2_t __s2_73 = __p2_73; \ + int64x2_t __rev0_73; __rev0_73 = __builtin_shufflevector(__s0_73, __s0_73, 1, 0); \ + int64x2_t __rev2_73; __rev2_73 = __builtin_shufflevector(__s2_73, __s2_73, 1, 0); \ + int64x2_t __ret_73; \ + __ret_73 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_73, __p3_73), __rev0_73, __p1_73); \ + __ret_73 = __builtin_shufflevector(__ret_73, __ret_73, 1, 0); \ __ret_73; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s64(__p0_74, __p1_74, __p2_74, __p3_74) __extension__ ({ \ - int64x2_t __s0_74 = __p0_74; \ - int64x2_t __s2_74 = __p2_74; \ - int64x2_t __ret_74; \ - __ret_74 = vsetq_lane_s64(vgetq_lane_s64(__s2_74, __p3_74), __s0_74, __p1_74); \ +#define vcopyq_laneq_s16(__p0_74, __p1_74, __p2_74, __p3_74) __extension__ ({ \ + int16x8_t __s0_74 = __p0_74; \ + int16x8_t __s2_74 = __p2_74; \ + int16x8_t __ret_74; \ + __ret_74 = vsetq_lane_s16(vgetq_lane_s16(__s2_74, __p3_74), __s0_74, __p1_74); \ __ret_74; \ }) #else -#define vcopyq_laneq_s64(__p0_75, __p1_75, __p2_75, __p3_75) __extension__ ({ \ - int64x2_t __s0_75 = __p0_75; \ - int64x2_t __s2_75 = __p2_75; \ - int64x2_t __rev0_75; __rev0_75 = __builtin_shufflevector(__s0_75, __s0_75, 1, 0); \ - int64x2_t __rev2_75; __rev2_75 = __builtin_shufflevector(__s2_75, __s2_75, 1, 0); \ - int64x2_t __ret_75; \ - __ret_75 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_75, __p3_75), __rev0_75, __p1_75); \ - __ret_75 = __builtin_shufflevector(__ret_75, __ret_75, 1, 0); \ +#define vcopyq_laneq_s16(__p0_75, __p1_75, __p2_75, __p3_75) __extension__ ({ \ + int16x8_t __s0_75 = __p0_75; \ + int16x8_t __s2_75 = __p2_75; \ + int16x8_t __rev0_75; __rev0_75 = __builtin_shufflevector(__s0_75, __s0_75, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_75; __rev2_75 = __builtin_shufflevector(__s2_75, __s2_75, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_75; \ + __ret_75 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_75, __p3_75), __rev0_75, __p1_75); \ + __ret_75 = __builtin_shufflevector(__ret_75, __ret_75, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_75; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s16(__p0_76, __p1_76, __p2_76, __p3_76) __extension__ ({ \ - int16x8_t __s0_76 = __p0_76; \ - int16x8_t __s2_76 = __p2_76; \ - int16x8_t __ret_76; \ - __ret_76 = vsetq_lane_s16(vgetq_lane_s16(__s2_76, __p3_76), __s0_76, __p1_76); \ +#define vcopy_laneq_p8(__p0_76, __p1_76, __p2_76, __p3_76) __extension__ ({ \ + poly8x8_t __s0_76 = __p0_76; \ + poly8x16_t __s2_76 = __p2_76; \ + poly8x8_t __ret_76; \ + __ret_76 = vset_lane_p8(vgetq_lane_p8(__s2_76, __p3_76), __s0_76, __p1_76); \ __ret_76; \ }) #else -#define vcopyq_laneq_s16(__p0_77, __p1_77, __p2_77, __p3_77) __extension__ ({ \ - int16x8_t __s0_77 = __p0_77; \ - int16x8_t __s2_77 = __p2_77; \ - int16x8_t __rev0_77; __rev0_77 = __builtin_shufflevector(__s0_77, __s0_77, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_77; __rev2_77 = __builtin_shufflevector(__s2_77, __s2_77, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_77; \ - __ret_77 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_77, __p3_77), __rev0_77, __p1_77); \ +#define vcopy_laneq_p8(__p0_77, __p1_77, __p2_77, __p3_77) __extension__ ({ \ + poly8x8_t __s0_77 = __p0_77; \ + poly8x16_t __s2_77 = __p2_77; \ + poly8x8_t __rev0_77; __rev0_77 = __builtin_shufflevector(__s0_77, __s0_77, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev2_77; __rev2_77 = __builtin_shufflevector(__s2_77, __s2_77, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret_77; \ + __ret_77 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_77, __p3_77), __rev0_77, __p1_77); \ __ret_77 = __builtin_shufflevector(__ret_77, __ret_77, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_77; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p8(__p0_78, __p1_78, __p2_78, __p3_78) __extension__ ({ \ - poly8x8_t __s0_78 = __p0_78; \ - poly8x16_t __s2_78 = __p2_78; \ - poly8x8_t __ret_78; \ - __ret_78 = vset_lane_p8(vgetq_lane_p8(__s2_78, __p3_78), __s0_78, __p1_78); \ +#define vcopy_laneq_p16(__p0_78, __p1_78, __p2_78, __p3_78) __extension__ ({ \ + poly16x4_t __s0_78 = __p0_78; \ + poly16x8_t __s2_78 = __p2_78; \ + poly16x4_t __ret_78; \ + __ret_78 = vset_lane_p16(vgetq_lane_p16(__s2_78, __p3_78), __s0_78, __p1_78); \ __ret_78; \ }) #else -#define vcopy_laneq_p8(__p0_79, __p1_79, __p2_79, __p3_79) __extension__ ({ \ - poly8x8_t __s0_79 = __p0_79; \ - poly8x16_t __s2_79 = __p2_79; \ - poly8x8_t __rev0_79; __rev0_79 = __builtin_shufflevector(__s0_79, __s0_79, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_79; __rev2_79 = __builtin_shufflevector(__s2_79, __s2_79, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret_79; \ - __ret_79 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_79, __p3_79), __rev0_79, __p1_79); \ - __ret_79 = __builtin_shufflevector(__ret_79, __ret_79, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopy_laneq_p16(__p0_79, __p1_79, __p2_79, __p3_79) __extension__ ({ \ + poly16x4_t __s0_79 = __p0_79; \ + poly16x8_t __s2_79 = __p2_79; \ + poly16x4_t __rev0_79; __rev0_79 = __builtin_shufflevector(__s0_79, __s0_79, 3, 2, 1, 0); \ + poly16x8_t __rev2_79; __rev2_79 = __builtin_shufflevector(__s2_79, __s2_79, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __ret_79; \ + __ret_79 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_79, __p3_79), __rev0_79, __p1_79); \ + __ret_79 = __builtin_shufflevector(__ret_79, __ret_79, 3, 2, 1, 0); \ __ret_79; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p16(__p0_80, __p1_80, __p2_80, __p3_80) __extension__ ({ \ - poly16x4_t __s0_80 = __p0_80; \ - poly16x8_t __s2_80 = __p2_80; \ - poly16x4_t __ret_80; \ - __ret_80 = vset_lane_p16(vgetq_lane_p16(__s2_80, __p3_80), __s0_80, __p1_80); \ +#define vcopy_laneq_u8(__p0_80, __p1_80, __p2_80, __p3_80) __extension__ ({ \ + uint8x8_t __s0_80 = __p0_80; \ + uint8x16_t __s2_80 = __p2_80; \ + uint8x8_t __ret_80; \ + __ret_80 = vset_lane_u8(vgetq_lane_u8(__s2_80, __p3_80), __s0_80, __p1_80); \ __ret_80; \ }) #else -#define vcopy_laneq_p16(__p0_81, __p1_81, __p2_81, __p3_81) __extension__ ({ \ - poly16x4_t __s0_81 = __p0_81; \ - poly16x8_t __s2_81 = __p2_81; \ - poly16x4_t __rev0_81; __rev0_81 = __builtin_shufflevector(__s0_81, __s0_81, 3, 2, 1, 0); \ - poly16x8_t __rev2_81; __rev2_81 = __builtin_shufflevector(__s2_81, __s2_81, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __ret_81; \ - __ret_81 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_81, __p3_81), __rev0_81, __p1_81); \ - __ret_81 = __builtin_shufflevector(__ret_81, __ret_81, 3, 2, 1, 0); \ +#define vcopy_laneq_u8(__p0_81, __p1_81, __p2_81, __p3_81) __extension__ ({ \ + uint8x8_t __s0_81 = __p0_81; \ + uint8x16_t __s2_81 = __p2_81; \ + uint8x8_t __rev0_81; __rev0_81 = __builtin_shufflevector(__s0_81, __s0_81, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_81; __rev2_81 = __builtin_shufflevector(__s2_81, __s2_81, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret_81; \ + __ret_81 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_81, __p3_81), __rev0_81, __p1_81); \ + __ret_81 = __builtin_shufflevector(__ret_81, __ret_81, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_81; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u8(__p0_82, __p1_82, __p2_82, __p3_82) __extension__ ({ \ - uint8x8_t __s0_82 = __p0_82; \ - uint8x16_t __s2_82 = __p2_82; \ - uint8x8_t __ret_82; \ - __ret_82 = vset_lane_u8(vgetq_lane_u8(__s2_82, __p3_82), __s0_82, __p1_82); \ +#define vcopy_laneq_u32(__p0_82, __p1_82, __p2_82, __p3_82) __extension__ ({ \ + uint32x2_t __s0_82 = __p0_82; \ + uint32x4_t __s2_82 = __p2_82; \ + uint32x2_t __ret_82; \ + __ret_82 = vset_lane_u32(vgetq_lane_u32(__s2_82, __p3_82), __s0_82, __p1_82); \ __ret_82; \ }) #else -#define vcopy_laneq_u8(__p0_83, __p1_83, __p2_83, __p3_83) __extension__ ({ \ - uint8x8_t __s0_83 = __p0_83; \ - uint8x16_t __s2_83 = __p2_83; \ - uint8x8_t __rev0_83; __rev0_83 = __builtin_shufflevector(__s0_83, __s0_83, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_83; __rev2_83 = __builtin_shufflevector(__s2_83, __s2_83, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret_83; \ - __ret_83 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_83, __p3_83), __rev0_83, __p1_83); \ - __ret_83 = __builtin_shufflevector(__ret_83, __ret_83, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopy_laneq_u32(__p0_83, __p1_83, __p2_83, __p3_83) __extension__ ({ \ + uint32x2_t __s0_83 = __p0_83; \ + uint32x4_t __s2_83 = __p2_83; \ + uint32x2_t __rev0_83; __rev0_83 = __builtin_shufflevector(__s0_83, __s0_83, 1, 0); \ + uint32x4_t __rev2_83; __rev2_83 = __builtin_shufflevector(__s2_83, __s2_83, 3, 2, 1, 0); \ + uint32x2_t __ret_83; \ + __ret_83 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_83, __p3_83), __rev0_83, __p1_83); \ + __ret_83 = __builtin_shufflevector(__ret_83, __ret_83, 1, 0); \ __ret_83; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u32(__p0_84, __p1_84, __p2_84, __p3_84) __extension__ ({ \ - uint32x2_t __s0_84 = __p0_84; \ - uint32x4_t __s2_84 = __p2_84; \ - uint32x2_t __ret_84; \ - __ret_84 = vset_lane_u32(vgetq_lane_u32(__s2_84, __p3_84), __s0_84, __p1_84); \ +#define vcopy_laneq_u64(__p0_84, __p1_84, __p2_84, __p3_84) __extension__ ({ \ + uint64x1_t __s0_84 = __p0_84; \ + uint64x2_t __s2_84 = __p2_84; \ + uint64x1_t __ret_84; \ + __ret_84 = vset_lane_u64(vgetq_lane_u64(__s2_84, __p3_84), __s0_84, __p1_84); \ __ret_84; \ }) #else -#define vcopy_laneq_u32(__p0_85, __p1_85, __p2_85, __p3_85) __extension__ ({ \ - uint32x2_t __s0_85 = __p0_85; \ - uint32x4_t __s2_85 = __p2_85; \ - uint32x2_t __rev0_85; __rev0_85 = __builtin_shufflevector(__s0_85, __s0_85, 1, 0); \ - uint32x4_t __rev2_85; __rev2_85 = __builtin_shufflevector(__s2_85, __s2_85, 3, 2, 1, 0); \ - uint32x2_t __ret_85; \ - __ret_85 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_85, __p3_85), __rev0_85, __p1_85); \ - __ret_85 = __builtin_shufflevector(__ret_85, __ret_85, 1, 0); \ +#define vcopy_laneq_u64(__p0_85, __p1_85, __p2_85, __p3_85) __extension__ ({ \ + uint64x1_t __s0_85 = __p0_85; \ + uint64x2_t __s2_85 = __p2_85; \ + uint64x2_t __rev2_85; __rev2_85 = __builtin_shufflevector(__s2_85, __s2_85, 1, 0); \ + uint64x1_t __ret_85; \ + __ret_85 = vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_85, __p3_85), __s0_85, __p1_85); \ __ret_85; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u64(__p0_86, __p1_86, __p2_86, __p3_86) __extension__ ({ \ - uint64x1_t __s0_86 = __p0_86; \ - uint64x2_t __s2_86 = __p2_86; \ - uint64x1_t __ret_86; \ - __ret_86 = vset_lane_u64(vgetq_lane_u64(__s2_86, __p3_86), __s0_86, __p1_86); \ +#define vcopy_laneq_u16(__p0_86, __p1_86, __p2_86, __p3_86) __extension__ ({ \ + uint16x4_t __s0_86 = __p0_86; \ + uint16x8_t __s2_86 = __p2_86; \ + uint16x4_t __ret_86; \ + __ret_86 = vset_lane_u16(vgetq_lane_u16(__s2_86, __p3_86), __s0_86, __p1_86); \ __ret_86; \ }) #else -#define vcopy_laneq_u64(__p0_87, __p1_87, __p2_87, __p3_87) __extension__ ({ \ - uint64x1_t __s0_87 = __p0_87; \ - uint64x2_t __s2_87 = __p2_87; \ - uint64x2_t __rev2_87; __rev2_87 = __builtin_shufflevector(__s2_87, __s2_87, 1, 0); \ - uint64x1_t __ret_87; \ - __ret_87 = __noswap_vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_87, __p3_87), __s0_87, __p1_87); \ +#define vcopy_laneq_u16(__p0_87, __p1_87, __p2_87, __p3_87) __extension__ ({ \ + uint16x4_t __s0_87 = __p0_87; \ + uint16x8_t __s2_87 = __p2_87; \ + uint16x4_t __rev0_87; __rev0_87 = __builtin_shufflevector(__s0_87, __s0_87, 3, 2, 1, 0); \ + uint16x8_t __rev2_87; __rev2_87 = __builtin_shufflevector(__s2_87, __s2_87, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret_87; \ + __ret_87 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_87, __p3_87), __rev0_87, __p1_87); \ + __ret_87 = __builtin_shufflevector(__ret_87, __ret_87, 3, 2, 1, 0); \ __ret_87; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u16(__p0_88, __p1_88, __p2_88, __p3_88) __extension__ ({ \ - uint16x4_t __s0_88 = __p0_88; \ - uint16x8_t __s2_88 = __p2_88; \ - uint16x4_t __ret_88; \ - __ret_88 = vset_lane_u16(vgetq_lane_u16(__s2_88, __p3_88), __s0_88, __p1_88); \ +#define vcopy_laneq_s8(__p0_88, __p1_88, __p2_88, __p3_88) __extension__ ({ \ + int8x8_t __s0_88 = __p0_88; \ + int8x16_t __s2_88 = __p2_88; \ + int8x8_t __ret_88; \ + __ret_88 = vset_lane_s8(vgetq_lane_s8(__s2_88, __p3_88), __s0_88, __p1_88); \ __ret_88; \ }) #else -#define vcopy_laneq_u16(__p0_89, __p1_89, __p2_89, __p3_89) __extension__ ({ \ - uint16x4_t __s0_89 = __p0_89; \ - uint16x8_t __s2_89 = __p2_89; \ - uint16x4_t __rev0_89; __rev0_89 = __builtin_shufflevector(__s0_89, __s0_89, 3, 2, 1, 0); \ - uint16x8_t __rev2_89; __rev2_89 = __builtin_shufflevector(__s2_89, __s2_89, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret_89; \ - __ret_89 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_89, __p3_89), __rev0_89, __p1_89); \ - __ret_89 = __builtin_shufflevector(__ret_89, __ret_89, 3, 2, 1, 0); \ +#define vcopy_laneq_s8(__p0_89, __p1_89, __p2_89, __p3_89) __extension__ ({ \ + int8x8_t __s0_89 = __p0_89; \ + int8x16_t __s2_89 = __p2_89; \ + int8x8_t __rev0_89; __rev0_89 = __builtin_shufflevector(__s0_89, __s0_89, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_89; __rev2_89 = __builtin_shufflevector(__s2_89, __s2_89, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret_89; \ + __ret_89 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_89, __p3_89), __rev0_89, __p1_89); \ + __ret_89 = __builtin_shufflevector(__ret_89, __ret_89, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_89; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s8(__p0_90, __p1_90, __p2_90, __p3_90) __extension__ ({ \ - int8x8_t __s0_90 = __p0_90; \ - int8x16_t __s2_90 = __p2_90; \ - int8x8_t __ret_90; \ - __ret_90 = vset_lane_s8(vgetq_lane_s8(__s2_90, __p3_90), __s0_90, __p1_90); \ +#define vcopy_laneq_f32(__p0_90, __p1_90, __p2_90, __p3_90) __extension__ ({ \ + float32x2_t __s0_90 = __p0_90; \ + float32x4_t __s2_90 = __p2_90; \ + float32x2_t __ret_90; \ + __ret_90 = vset_lane_f32(vgetq_lane_f32(__s2_90, __p3_90), __s0_90, __p1_90); \ __ret_90; \ }) #else -#define vcopy_laneq_s8(__p0_91, __p1_91, __p2_91, __p3_91) __extension__ ({ \ - int8x8_t __s0_91 = __p0_91; \ - int8x16_t __s2_91 = __p2_91; \ - int8x8_t __rev0_91; __rev0_91 = __builtin_shufflevector(__s0_91, __s0_91, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_91; __rev2_91 = __builtin_shufflevector(__s2_91, __s2_91, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret_91; \ - __ret_91 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_91, __p3_91), __rev0_91, __p1_91); \ - __ret_91 = __builtin_shufflevector(__ret_91, __ret_91, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopy_laneq_f32(__p0_91, __p1_91, __p2_91, __p3_91) __extension__ ({ \ + float32x2_t __s0_91 = __p0_91; \ + float32x4_t __s2_91 = __p2_91; \ + float32x2_t __rev0_91; __rev0_91 = __builtin_shufflevector(__s0_91, __s0_91, 1, 0); \ + float32x4_t __rev2_91; __rev2_91 = __builtin_shufflevector(__s2_91, __s2_91, 3, 2, 1, 0); \ + float32x2_t __ret_91; \ + __ret_91 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_91, __p3_91), __rev0_91, __p1_91); \ + __ret_91 = __builtin_shufflevector(__ret_91, __ret_91, 1, 0); \ __ret_91; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f32(__p0_92, __p1_92, __p2_92, __p3_92) __extension__ ({ \ - float32x2_t __s0_92 = __p0_92; \ - float32x4_t __s2_92 = __p2_92; \ - float32x2_t __ret_92; \ - __ret_92 = vset_lane_f32(vgetq_lane_f32(__s2_92, __p3_92), __s0_92, __p1_92); \ +#define vcopy_laneq_s32(__p0_92, __p1_92, __p2_92, __p3_92) __extension__ ({ \ + int32x2_t __s0_92 = __p0_92; \ + int32x4_t __s2_92 = __p2_92; \ + int32x2_t __ret_92; \ + __ret_92 = vset_lane_s32(vgetq_lane_s32(__s2_92, __p3_92), __s0_92, __p1_92); \ __ret_92; \ }) #else -#define vcopy_laneq_f32(__p0_93, __p1_93, __p2_93, __p3_93) __extension__ ({ \ - float32x2_t __s0_93 = __p0_93; \ - float32x4_t __s2_93 = __p2_93; \ - float32x2_t __rev0_93; __rev0_93 = __builtin_shufflevector(__s0_93, __s0_93, 1, 0); \ - float32x4_t __rev2_93; __rev2_93 = __builtin_shufflevector(__s2_93, __s2_93, 3, 2, 1, 0); \ - float32x2_t __ret_93; \ - __ret_93 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_93, __p3_93), __rev0_93, __p1_93); \ +#define vcopy_laneq_s32(__p0_93, __p1_93, __p2_93, __p3_93) __extension__ ({ \ + int32x2_t __s0_93 = __p0_93; \ + int32x4_t __s2_93 = __p2_93; \ + int32x2_t __rev0_93; __rev0_93 = __builtin_shufflevector(__s0_93, __s0_93, 1, 0); \ + int32x4_t __rev2_93; __rev2_93 = __builtin_shufflevector(__s2_93, __s2_93, 3, 2, 1, 0); \ + int32x2_t __ret_93; \ + __ret_93 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_93, __p3_93), __rev0_93, __p1_93); \ __ret_93 = __builtin_shufflevector(__ret_93, __ret_93, 1, 0); \ __ret_93; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s32(__p0_94, __p1_94, __p2_94, __p3_94) __extension__ ({ \ - int32x2_t __s0_94 = __p0_94; \ - int32x4_t __s2_94 = __p2_94; \ - int32x2_t __ret_94; \ - __ret_94 = vset_lane_s32(vgetq_lane_s32(__s2_94, __p3_94), __s0_94, __p1_94); \ +#define vcopy_laneq_s64(__p0_94, __p1_94, __p2_94, __p3_94) __extension__ ({ \ + int64x1_t __s0_94 = __p0_94; \ + int64x2_t __s2_94 = __p2_94; \ + int64x1_t __ret_94; \ + __ret_94 = vset_lane_s64(vgetq_lane_s64(__s2_94, __p3_94), __s0_94, __p1_94); \ __ret_94; \ }) #else -#define vcopy_laneq_s32(__p0_95, __p1_95, __p2_95, __p3_95) __extension__ ({ \ - int32x2_t __s0_95 = __p0_95; \ - int32x4_t __s2_95 = __p2_95; \ - int32x2_t __rev0_95; __rev0_95 = __builtin_shufflevector(__s0_95, __s0_95, 1, 0); \ - int32x4_t __rev2_95; __rev2_95 = __builtin_shufflevector(__s2_95, __s2_95, 3, 2, 1, 0); \ - int32x2_t __ret_95; \ - __ret_95 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_95, __p3_95), __rev0_95, __p1_95); \ - __ret_95 = __builtin_shufflevector(__ret_95, __ret_95, 1, 0); \ +#define vcopy_laneq_s64(__p0_95, __p1_95, __p2_95, __p3_95) __extension__ ({ \ + int64x1_t __s0_95 = __p0_95; \ + int64x2_t __s2_95 = __p2_95; \ + int64x2_t __rev2_95; __rev2_95 = __builtin_shufflevector(__s2_95, __s2_95, 1, 0); \ + int64x1_t __ret_95; \ + __ret_95 = vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_95, __p3_95), __s0_95, __p1_95); \ __ret_95; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s64(__p0_96, __p1_96, __p2_96, __p3_96) __extension__ ({ \ - int64x1_t __s0_96 = __p0_96; \ - int64x2_t __s2_96 = __p2_96; \ - int64x1_t __ret_96; \ - __ret_96 = vset_lane_s64(vgetq_lane_s64(__s2_96, __p3_96), __s0_96, __p1_96); \ +#define vcopy_laneq_s16(__p0_96, __p1_96, __p2_96, __p3_96) __extension__ ({ \ + int16x4_t __s0_96 = __p0_96; \ + int16x8_t __s2_96 = __p2_96; \ + int16x4_t __ret_96; \ + __ret_96 = vset_lane_s16(vgetq_lane_s16(__s2_96, __p3_96), __s0_96, __p1_96); \ __ret_96; \ }) #else -#define vcopy_laneq_s64(__p0_97, __p1_97, __p2_97, __p3_97) __extension__ ({ \ - int64x1_t __s0_97 = __p0_97; \ - int64x2_t __s2_97 = __p2_97; \ - int64x2_t __rev2_97; __rev2_97 = __builtin_shufflevector(__s2_97, __s2_97, 1, 0); \ - int64x1_t __ret_97; \ - __ret_97 = __noswap_vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_97, __p3_97), __s0_97, __p1_97); \ +#define vcopy_laneq_s16(__p0_97, __p1_97, __p2_97, __p3_97) __extension__ ({ \ + int16x4_t __s0_97 = __p0_97; \ + int16x8_t __s2_97 = __p2_97; \ + int16x4_t __rev0_97; __rev0_97 = __builtin_shufflevector(__s0_97, __s0_97, 3, 2, 1, 0); \ + int16x8_t __rev2_97; __rev2_97 = __builtin_shufflevector(__s2_97, __s2_97, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_97; \ + __ret_97 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_97, __p3_97), __rev0_97, __p1_97); \ + __ret_97 = __builtin_shufflevector(__ret_97, __ret_97, 3, 2, 1, 0); \ __ret_97; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s16(__p0_98, __p1_98, __p2_98, __p3_98) __extension__ ({ \ - int16x4_t __s0_98 = __p0_98; \ - int16x8_t __s2_98 = __p2_98; \ - int16x4_t __ret_98; \ - __ret_98 = vset_lane_s16(vgetq_lane_s16(__s2_98, __p3_98), __s0_98, __p1_98); \ - __ret_98; \ +#define vcreate_p64(__p0) __extension__ ({ \ + poly64x1_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (poly64x1_t)(__promote); \ + __ret; \ }) -#else -#define vcopy_laneq_s16(__p0_99, __p1_99, __p2_99, __p3_99) __extension__ ({ \ - int16x4_t __s0_99 = __p0_99; \ - int16x8_t __s2_99 = __p2_99; \ - int16x4_t __rev0_99; __rev0_99 = __builtin_shufflevector(__s0_99, __s0_99, 3, 2, 1, 0); \ - int16x8_t __rev2_99; __rev2_99 = __builtin_shufflevector(__s2_99, __s2_99, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_99; \ - __ret_99 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_99, __p3_99), __rev0_99, __p1_99); \ - __ret_99 = __builtin_shufflevector(__ret_99, __ret_99, 3, 2, 1, 0); \ - __ret_99; \ +#define vcreate_f64(__p0) __extension__ ({ \ + float64x1_t __ret; \ + uint64_t __promote = __p0; \ + __ret = (float64x1_t)(__promote); \ + __ret; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai poly64x1_t vcreate_p64(uint64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#else -__ai poly64x1_t vcreate_p64(uint64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai float64x1_t vcreate_f64(uint64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#else -__ai float64x1_t vcreate_f64(uint64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t)(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vcvts_f32_s32(int32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); return __ret; } -#else -__ai float32_t vcvts_f32_s32(int32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vcvts_f32_u32(uint32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); return __ret; } -#else -__ai float32_t vcvts_f32_u32(uint32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vcvt_f32_f64(float64x2_t __p0) { float32x2_t __ret; @@ -52270,34 +44514,16 @@ __ai float32x2_t __noswap_vcvt_f32_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64_t vcvtd_f64_s64(int64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); return __ret; } -#else -__ai float64_t vcvtd_f64_s64(int64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64_t vcvtd_f64_u64(uint64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); return __ret; } -#else -__ai float64_t vcvtd_f64_u64(uint64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; @@ -52330,34 +44556,16 @@ __ai float64x2_t vcvtq_f64_s64(int64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vcvt_f64_u64(uint64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai float64x1_t vcvt_f64_u64(uint64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vcvt_f64_s64(int64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai float64x1_t vcvt_f64_s64(int64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vcvt_f64_f32(float32x2_t __p0) { float64x2_t __ret; @@ -52445,38 +44653,18 @@ __ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ #define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ uint32_t __s0 = __p0; \ float32_t __ret; \ __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ __ret; \ }) -#else -#define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ int32_t __s0 = __p0; \ float32_t __ret; \ __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ __ret; \ }) -#else -#define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ @@ -52513,86 +44701,36 @@ __ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ float64x1_t __ret; \ __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#else -#define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ float64x1_t __ret; \ __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) -#else -#define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ uint64_t __s0 = __p0; \ float64_t __ret; \ __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ __ret; \ }) -#else -#define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ int64_t __s0 = __p0; \ float64_t __ret; \ __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ __ret; \ }) -#else -#define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ float32_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ __ret; \ }) -#else -#define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ - float32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s0 = __p0; \ @@ -52611,54 +44749,24 @@ __ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s0 = __p0; \ int64x1_t __ret; \ __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ __ret; \ }) -#else -#define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ float64_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ __ret; \ }) -#else -#define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ - float64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ float32_t __s0 = __p0; \ uint32_t __ret; \ __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ __ret; \ }) -#else -#define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ - float32_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s0 = __p0; \ @@ -52677,66 +44785,28 @@ __ai float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s0 = __p0; \ uint64x1_t __ret; \ __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ __ret; \ }) -#else -#define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ float64_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ __ret; \ }) -#else -#define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ - float64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vcvts_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); return __ret; } -#else -__ai int32_t vcvts_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcvtd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); return __ret; } -#else -__ai int64_t vcvtd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -52753,48 +44823,21 @@ __ai int64x2_t vcvtq_s64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vcvt_s64_f64(float64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vcvt_s64_f64(float64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcvts_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); return __ret; } -#else -__ai uint32_t vcvts_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcvtd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); return __ret; } -#else -__ai uint64_t vcvtd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -52811,258 +44854,96 @@ __ai uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vcvt_u64_f64(float64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); return __ret; } -#else -__ai uint64x1_t vcvt_u64_f64(float64x1_t __p0) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vcvtas_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); return __ret; } -#else -__ai int32_t vcvtas_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcvtad_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); return __ret; } -#else -__ai int64_t vcvtad_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcvtas_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); return __ret; } -#else -__ai uint32_t vcvtas_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcvtad_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); return __ret; } -#else -__ai uint64_t vcvtad_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vcvtms_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); return __ret; } -#else -__ai int32_t vcvtms_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcvtmd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); return __ret; } -#else -__ai int64_t vcvtmd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcvtms_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); return __ret; } -#else -__ai uint32_t vcvtms_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcvtmd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); return __ret; } -#else -__ai uint64_t vcvtmd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vcvtns_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); return __ret; } -#else -__ai int32_t vcvtns_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcvtnd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); return __ret; } -#else -__ai int64_t vcvtnd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcvtns_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); return __ret; } -#else -__ai uint32_t vcvtns_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcvtnd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); return __ret; } -#else -__ai uint64_t vcvtnd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vcvtps_s32_f32(float32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); return __ret; } -#else -__ai int32_t vcvtps_s32_f32(float32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vcvtpd_s64_f64(float64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); return __ret; } -#else -__ai int64_t vcvtpd_s64_f64(float64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vcvtps_u32_f32(float32_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); return __ret; } -#else -__ai uint32_t vcvtps_u32_f32(float32_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vcvtpd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); return __ret; } -#else -__ai uint64_t vcvtpd_u64_f64(float64_t __p0) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vcvtxd_f32_f64(float64_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); return __ret; } -#else -__ai float32_t vcvtxd_f32_f64(float64_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vcvtx_f32_f64(float64x2_t __p0) { float32x2_t __ret; @@ -53135,20 +45016,11 @@ __ai float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vdiv_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 / __p1; return __ret; } -#else -__ai float64x1_t vdiv_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 / __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; @@ -53170,7 +45042,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s0 = __p0; \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ + __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((poly8x8_t)__s0, __p1); \ __ret; \ }) #else @@ -53178,7 +45050,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ + __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((poly8x8_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53187,7 +45059,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vduph_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s0 = __p0; \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (poly16_t) __builtin_neon_vduph_lane_i16((poly16x4_t)__s0, __p1); \ __ret; \ }) #else @@ -53195,7 +45067,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ + __ret = (poly16_t) __builtin_neon_vduph_lane_i16((poly16x4_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53221,7 +45093,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdups_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s0 = __p0; \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__s0, __p1); \ + __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else @@ -53229,32 +45101,22 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __s0 = __p0; \ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__rev0, __p1); \ + __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s0 = __p0; \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ + __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) -#else -#define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ - uint64x1_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vduph_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s0 = __p0; \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else @@ -53262,7 +45124,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { uint16x4_t __s0 = __p0; \ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ + __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53284,27 +45146,17 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s0 = __p0; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_lane_f64((int8x8_t)__s0, __p1); \ + __ret = (float64_t) __builtin_neon_vdupd_lane_f64((float64x1_t)__s0, __p1); \ __ret; \ }) -#else -#define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_lane_f64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vdups_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __s0 = __p0; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_lane_f32((int8x8_t)__s0, __p1); \ + __ret = (float32_t) __builtin_neon_vdups_lane_f32((float32x2_t)__s0, __p1); \ __ret; \ }) #else @@ -53312,7 +45164,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __s0 = __p0; \ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_lane_f32((int8x8_t)__rev0, __p1); \ + __ret = (float32_t) __builtin_neon_vdups_lane_f32((float32x2_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53321,7 +45173,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdups_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s0 = __p0; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__s0, __p1); \ + __ret = (int32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__s0, __p1); \ __ret; \ }) #else @@ -53329,32 +45181,22 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { int32x2_t __s0 = __p0; \ int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_lane_i32((int8x8_t)__rev0, __p1); \ + __ret = (int32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__rev0, __p1); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s0 = __p0; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ + __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int64x1_t)__s0, __p1); \ __ret; \ }) -#else -#define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ - int64x1_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vduph_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__s0, __p1); \ __ret; \ }) #else @@ -53362,27 +45204,17 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { int16x4_t __s0 = __p0; \ int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_lane_i16((int8x8_t)__rev0, __p1); \ + __ret = (int16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__rev0, __p1); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vdup_lane_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s0 = __p0; \ poly64x1_t __ret; \ __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) -#else -#define vdup_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vdupq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s0 = __p0; \ @@ -53435,22 +45267,12 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vdup_lane_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s0 = __p0; \ float64x1_t __ret; \ __ret = __builtin_shufflevector(__s0, __s0, __p1); \ __ret; \ }) -#else -#define vdup_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __ret; \ - __ret = __builtin_shufflevector(__s0, __s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vdup_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s0 = __p0; \ @@ -53473,7 +45295,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s0 = __p0; \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ + __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((poly8x16_t)__s0, __p1); \ __ret; \ }) #else @@ -53481,7 +45303,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ poly8_t __ret; \ - __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ + __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((poly8x16_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53490,7 +45312,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s0 = __p0; \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ + __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((poly16x8_t)__s0, __p1); \ __ret; \ }) #else @@ -53498,7 +45320,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ poly16_t __ret; \ - __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ + __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((poly16x8_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53524,7 +45346,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s0 = __p0; \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__s0, __p1); \ + __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else @@ -53532,7 +45354,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { uint32x4_t __s0 = __p0; \ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__rev0, __p1); \ + __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53541,7 +45363,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s0 = __p0; \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__s0, __p1); \ + __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else @@ -53549,7 +45371,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { uint64x2_t __s0 = __p0; \ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__rev0, __p1); \ + __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53558,7 +45380,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s0 = __p0; \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ + __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else @@ -53566,7 +45388,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { uint16x8_t __s0 = __p0; \ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ + __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53592,7 +45414,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((int8x16_t)__s0, __p1); \ + __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((float64x2_t)__s0, __p1); \ __ret; \ }) #else @@ -53600,7 +45422,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((int8x16_t)__rev0, __p1); \ + __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((float64x2_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53609,7 +45431,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ float32x4_t __s0 = __p0; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_laneq_f32((int8x16_t)__s0, __p1); \ + __ret = (float32_t) __builtin_neon_vdups_laneq_f32((float32x4_t)__s0, __p1); \ __ret; \ }) #else @@ -53617,7 +45439,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { float32x4_t __s0 = __p0; \ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vdups_laneq_f32((int8x16_t)__rev0, __p1); \ + __ret = (float32_t) __builtin_neon_vdups_laneq_f32((float32x4_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53626,7 +45448,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s0 = __p0; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__s0, __p1); \ + __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__s0, __p1); \ __ret; \ }) #else @@ -53634,7 +45456,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { int32x4_t __s0 = __p0; \ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int8x16_t)__rev0, __p1); \ + __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53643,7 +45465,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s0 = __p0; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__s0, __p1); \ + __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__s0, __p1); \ __ret; \ }) #else @@ -53651,7 +45473,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { int64x2_t __s0 = __p0; \ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int8x16_t)__rev0, __p1); \ + __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__rev0, __p1); \ __ret; \ }) #endif @@ -53660,7 +45482,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__s0, __p1); \ __ret; \ }) #else @@ -53668,7 +45490,7 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { int16x8_t __s0 = __p0; \ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int8x16_t)__rev0, __p1); \ + __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__rev0, __p1); \ __ret; \ }) #endif @@ -54173,20 +45995,11 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vdup_n_p64(poly64_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t) {__p0}; return __ret; } -#else -__ai poly64x1_t vdup_n_p64(poly64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t) {__p0}; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vdupq_n_p64(poly64_t __p0) { poly64x2_t __ret; @@ -54217,21 +46030,11 @@ __ai float64x2_t vdupq_n_f64(float64_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vdup_n_f64(float64_t __p0) { float64x1_t __ret; __ret = (float64x1_t) {__p0}; return __ret; } -#else -__ai float64x1_t vdup_n_f64(float64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) {__p0}; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ #define vext_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ @@ -54239,16 +46042,6 @@ __ai float64x1_t vdup_n_f64(float64_t __p0) { __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) -#else -#define vext_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vextq_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s0 = __p0; \ @@ -54291,7 +46084,6 @@ __ai float64x1_t vdup_n_f64(float64_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vext_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ @@ -54299,16 +46091,6 @@ __ai float64x1_t vdup_n_f64(float64_t __p0) { __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) -#else -#define vext_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; @@ -54332,60 +46114,26 @@ __ai float64x2_t __noswap_vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); return __ret; } -#else -__ai float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -__ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ #define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (float64x1_t)__s2, __p3); \ __ret; \ }) -#else -#define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) -#define __noswap_vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (int8x8_t)__s2, __p3); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__s2, __p3); \ __ret; \ }) #else @@ -54395,7 +46143,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float32x2_t __s2 = __p2; \ float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -54403,7 +46151,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__s2, __p3); \ __ret; \ }) #endif @@ -54471,7 +46219,6 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 }) #endif -#ifdef __LITTLE_ENDIAN__ #define vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ @@ -54480,25 +46227,6 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ __ret; \ }) -#else -#define vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ - __ret; \ -}) -#define __noswap_vfma_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __s2 = __p2; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ float32x2_t __s0 = __p0; \ @@ -54537,7 +46265,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__s2, __p3); \ __ret; \ }) #else @@ -54547,7 +46275,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float64x2_t __s2 = __p2; \ float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -54555,7 +46283,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__s2, __p3); \ __ret; \ }) #endif @@ -54566,7 +46294,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__s2, __p3); \ __ret; \ }) #else @@ -54576,7 +46304,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float32x4_t __s2 = __p2; \ float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__rev2, __p3); \ __ret; \ }) #define __noswap_vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -54584,7 +46312,7 @@ __ai float64x1_t __noswap_vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1 float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ float32_t __ret; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__s2, __p3); \ __ret; \ }) #endif @@ -54731,20 +46459,11 @@ __ai float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vfma_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { float64x1_t __ret; __ret = vfma_f64(__p0, __p1, (float64x1_t) {__p2}); return __ret; } -#else -__ai float64x1_t vfma_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { - float64x1_t __ret; - __ret = __noswap_vfma_f64(__p0, __p1, (float64x1_t) {__p2}); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; @@ -54763,287 +46482,254 @@ __ai float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = vfma_f64(__p0, -__p1, __p2); return __ret; } -#else -__ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = __noswap_vfma_f64(__p0, -__p1, __p2); - return __ret; -} -#endif - +#define vfmsd_lane_f64(__p0_98, __p1_98, __p2_98, __p3_98) __extension__ ({ \ + float64_t __s0_98 = __p0_98; \ + float64_t __s1_98 = __p1_98; \ + float64x1_t __s2_98 = __p2_98; \ + float64_t __ret_98; \ + __ret_98 = vfmad_lane_f64(__s0_98, -__s1_98, __s2_98, __p3_98); \ + __ret_98; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vfmsd_lane_f64(__p0_100, __p1_100, __p2_100, __p3_100) __extension__ ({ \ - float64_t __s0_100 = __p0_100; \ - float64_t __s1_100 = __p1_100; \ - float64x1_t __s2_100 = __p2_100; \ - float64_t __ret_100; \ - __ret_100 = vfmad_lane_f64(__s0_100, -__s1_100, __s2_100, __p3_100); \ +#define vfmss_lane_f32(__p0_99, __p1_99, __p2_99, __p3_99) __extension__ ({ \ + float32_t __s0_99 = __p0_99; \ + float32_t __s1_99 = __p1_99; \ + float32x2_t __s2_99 = __p2_99; \ + float32_t __ret_99; \ + __ret_99 = vfmas_lane_f32(__s0_99, -__s1_99, __s2_99, __p3_99); \ + __ret_99; \ +}) +#else +#define vfmss_lane_f32(__p0_100, __p1_100, __p2_100, __p3_100) __extension__ ({ \ + float32_t __s0_100 = __p0_100; \ + float32_t __s1_100 = __p1_100; \ + float32x2_t __s2_100 = __p2_100; \ + float32x2_t __rev2_100; __rev2_100 = __builtin_shufflevector(__s2_100, __s2_100, 1, 0); \ + float32_t __ret_100; \ + __ret_100 = __noswap_vfmas_lane_f32(__s0_100, -__s1_100, __rev2_100, __p3_100); \ __ret_100; \ }) -#else -#define vfmsd_lane_f64(__p0_101, __p1_101, __p2_101, __p3_101) __extension__ ({ \ - float64_t __s0_101 = __p0_101; \ - float64_t __s1_101 = __p1_101; \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmsq_lane_f64(__p0_101, __p1_101, __p2_101, __p3_101) __extension__ ({ \ + float64x2_t __s0_101 = __p0_101; \ + float64x2_t __s1_101 = __p1_101; \ float64x1_t __s2_101 = __p2_101; \ - float64_t __ret_101; \ - __ret_101 = __noswap_vfmad_lane_f64(__s0_101, -__s1_101, __s2_101, __p3_101); \ + float64x2_t __ret_101; \ + __ret_101 = vfmaq_lane_f64(__s0_101, -__s1_101, __s2_101, __p3_101); \ __ret_101; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmss_lane_f32(__p0_102, __p1_102, __p2_102, __p3_102) __extension__ ({ \ - float32_t __s0_102 = __p0_102; \ - float32_t __s1_102 = __p1_102; \ - float32x2_t __s2_102 = __p2_102; \ - float32_t __ret_102; \ - __ret_102 = vfmas_lane_f32(__s0_102, -__s1_102, __s2_102, __p3_102); \ +#else +#define vfmsq_lane_f64(__p0_102, __p1_102, __p2_102, __p3_102) __extension__ ({ \ + float64x2_t __s0_102 = __p0_102; \ + float64x2_t __s1_102 = __p1_102; \ + float64x1_t __s2_102 = __p2_102; \ + float64x2_t __rev0_102; __rev0_102 = __builtin_shufflevector(__s0_102, __s0_102, 1, 0); \ + float64x2_t __rev1_102; __rev1_102 = __builtin_shufflevector(__s1_102, __s1_102, 1, 0); \ + float64x2_t __ret_102; \ + __ret_102 = __noswap_vfmaq_lane_f64(__rev0_102, -__rev1_102, __s2_102, __p3_102); \ + __ret_102 = __builtin_shufflevector(__ret_102, __ret_102, 1, 0); \ __ret_102; \ }) -#else -#define vfmss_lane_f32(__p0_103, __p1_103, __p2_103, __p3_103) __extension__ ({ \ - float32_t __s0_103 = __p0_103; \ - float32_t __s1_103 = __p1_103; \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmsq_lane_f32(__p0_103, __p1_103, __p2_103, __p3_103) __extension__ ({ \ + float32x4_t __s0_103 = __p0_103; \ + float32x4_t __s1_103 = __p1_103; \ float32x2_t __s2_103 = __p2_103; \ - float32x2_t __rev2_103; __rev2_103 = __builtin_shufflevector(__s2_103, __s2_103, 1, 0); \ - float32_t __ret_103; \ - __ret_103 = __noswap_vfmas_lane_f32(__s0_103, -__s1_103, __rev2_103, __p3_103); \ + float32x4_t __ret_103; \ + __ret_103 = vfmaq_lane_f32(__s0_103, -__s1_103, __s2_103, __p3_103); \ __ret_103; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f64(__p0_104, __p1_104, __p2_104, __p3_104) __extension__ ({ \ - float64x2_t __s0_104 = __p0_104; \ - float64x2_t __s1_104 = __p1_104; \ - float64x1_t __s2_104 = __p2_104; \ - float64x2_t __ret_104; \ - __ret_104 = vfmaq_lane_f64(__s0_104, -__s1_104, __s2_104, __p3_104); \ +#else +#define vfmsq_lane_f32(__p0_104, __p1_104, __p2_104, __p3_104) __extension__ ({ \ + float32x4_t __s0_104 = __p0_104; \ + float32x4_t __s1_104 = __p1_104; \ + float32x2_t __s2_104 = __p2_104; \ + float32x4_t __rev0_104; __rev0_104 = __builtin_shufflevector(__s0_104, __s0_104, 3, 2, 1, 0); \ + float32x4_t __rev1_104; __rev1_104 = __builtin_shufflevector(__s1_104, __s1_104, 3, 2, 1, 0); \ + float32x2_t __rev2_104; __rev2_104 = __builtin_shufflevector(__s2_104, __s2_104, 1, 0); \ + float32x4_t __ret_104; \ + __ret_104 = __noswap_vfmaq_lane_f32(__rev0_104, -__rev1_104, __rev2_104, __p3_104); \ + __ret_104 = __builtin_shufflevector(__ret_104, __ret_104, 3, 2, 1, 0); \ __ret_104; \ }) -#else -#define vfmsq_lane_f64(__p0_105, __p1_105, __p2_105, __p3_105) __extension__ ({ \ - float64x2_t __s0_105 = __p0_105; \ - float64x2_t __s1_105 = __p1_105; \ - float64x1_t __s2_105 = __p2_105; \ - float64x2_t __rev0_105; __rev0_105 = __builtin_shufflevector(__s0_105, __s0_105, 1, 0); \ - float64x2_t __rev1_105; __rev1_105 = __builtin_shufflevector(__s1_105, __s1_105, 1, 0); \ - float64x2_t __ret_105; \ - __ret_105 = __noswap_vfmaq_lane_f64(__rev0_105, -__rev1_105, __s2_105, __p3_105); \ - __ret_105 = __builtin_shufflevector(__ret_105, __ret_105, 1, 0); \ - __ret_105; \ -}) #endif +#define vfms_lane_f64(__p0_105, __p1_105, __p2_105, __p3_105) __extension__ ({ \ + float64x1_t __s0_105 = __p0_105; \ + float64x1_t __s1_105 = __p1_105; \ + float64x1_t __s2_105 = __p2_105; \ + float64x1_t __ret_105; \ + __ret_105 = vfma_lane_f64(__s0_105, -__s1_105, __s2_105, __p3_105); \ + __ret_105; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f32(__p0_106, __p1_106, __p2_106, __p3_106) __extension__ ({ \ - float32x4_t __s0_106 = __p0_106; \ - float32x4_t __s1_106 = __p1_106; \ +#define vfms_lane_f32(__p0_106, __p1_106, __p2_106, __p3_106) __extension__ ({ \ + float32x2_t __s0_106 = __p0_106; \ + float32x2_t __s1_106 = __p1_106; \ float32x2_t __s2_106 = __p2_106; \ - float32x4_t __ret_106; \ - __ret_106 = vfmaq_lane_f32(__s0_106, -__s1_106, __s2_106, __p3_106); \ + float32x2_t __ret_106; \ + __ret_106 = vfma_lane_f32(__s0_106, -__s1_106, __s2_106, __p3_106); \ __ret_106; \ }) #else -#define vfmsq_lane_f32(__p0_107, __p1_107, __p2_107, __p3_107) __extension__ ({ \ - float32x4_t __s0_107 = __p0_107; \ - float32x4_t __s1_107 = __p1_107; \ +#define vfms_lane_f32(__p0_107, __p1_107, __p2_107, __p3_107) __extension__ ({ \ + float32x2_t __s0_107 = __p0_107; \ + float32x2_t __s1_107 = __p1_107; \ float32x2_t __s2_107 = __p2_107; \ - float32x4_t __rev0_107; __rev0_107 = __builtin_shufflevector(__s0_107, __s0_107, 3, 2, 1, 0); \ - float32x4_t __rev1_107; __rev1_107 = __builtin_shufflevector(__s1_107, __s1_107, 3, 2, 1, 0); \ + float32x2_t __rev0_107; __rev0_107 = __builtin_shufflevector(__s0_107, __s0_107, 1, 0); \ + float32x2_t __rev1_107; __rev1_107 = __builtin_shufflevector(__s1_107, __s1_107, 1, 0); \ float32x2_t __rev2_107; __rev2_107 = __builtin_shufflevector(__s2_107, __s2_107, 1, 0); \ - float32x4_t __ret_107; \ - __ret_107 = __noswap_vfmaq_lane_f32(__rev0_107, -__rev1_107, __rev2_107, __p3_107); \ - __ret_107 = __builtin_shufflevector(__ret_107, __ret_107, 3, 2, 1, 0); \ + float32x2_t __ret_107; \ + __ret_107 = __noswap_vfma_lane_f32(__rev0_107, -__rev1_107, __rev2_107, __p3_107); \ + __ret_107 = __builtin_shufflevector(__ret_107, __ret_107, 1, 0); \ __ret_107; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f64(__p0_108, __p1_108, __p2_108, __p3_108) __extension__ ({ \ - float64x1_t __s0_108 = __p0_108; \ - float64x1_t __s1_108 = __p1_108; \ - float64x1_t __s2_108 = __p2_108; \ - float64x1_t __ret_108; \ - __ret_108 = vfma_lane_f64(__s0_108, -__s1_108, __s2_108, __p3_108); \ +#define vfmsd_laneq_f64(__p0_108, __p1_108, __p2_108, __p3_108) __extension__ ({ \ + float64_t __s0_108 = __p0_108; \ + float64_t __s1_108 = __p1_108; \ + float64x2_t __s2_108 = __p2_108; \ + float64_t __ret_108; \ + __ret_108 = vfmad_laneq_f64(__s0_108, -__s1_108, __s2_108, __p3_108); \ __ret_108; \ }) #else -#define vfms_lane_f64(__p0_109, __p1_109, __p2_109, __p3_109) __extension__ ({ \ - float64x1_t __s0_109 = __p0_109; \ - float64x1_t __s1_109 = __p1_109; \ - float64x1_t __s2_109 = __p2_109; \ - float64x1_t __ret_109; \ - __ret_109 = __noswap_vfma_lane_f64(__s0_109, -__s1_109, __s2_109, __p3_109); \ +#define vfmsd_laneq_f64(__p0_109, __p1_109, __p2_109, __p3_109) __extension__ ({ \ + float64_t __s0_109 = __p0_109; \ + float64_t __s1_109 = __p1_109; \ + float64x2_t __s2_109 = __p2_109; \ + float64x2_t __rev2_109; __rev2_109 = __builtin_shufflevector(__s2_109, __s2_109, 1, 0); \ + float64_t __ret_109; \ + __ret_109 = __noswap_vfmad_laneq_f64(__s0_109, -__s1_109, __rev2_109, __p3_109); \ __ret_109; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f32(__p0_110, __p1_110, __p2_110, __p3_110) __extension__ ({ \ - float32x2_t __s0_110 = __p0_110; \ - float32x2_t __s1_110 = __p1_110; \ - float32x2_t __s2_110 = __p2_110; \ - float32x2_t __ret_110; \ - __ret_110 = vfma_lane_f32(__s0_110, -__s1_110, __s2_110, __p3_110); \ +#define vfmss_laneq_f32(__p0_110, __p1_110, __p2_110, __p3_110) __extension__ ({ \ + float32_t __s0_110 = __p0_110; \ + float32_t __s1_110 = __p1_110; \ + float32x4_t __s2_110 = __p2_110; \ + float32_t __ret_110; \ + __ret_110 = vfmas_laneq_f32(__s0_110, -__s1_110, __s2_110, __p3_110); \ __ret_110; \ }) #else -#define vfms_lane_f32(__p0_111, __p1_111, __p2_111, __p3_111) __extension__ ({ \ - float32x2_t __s0_111 = __p0_111; \ - float32x2_t __s1_111 = __p1_111; \ - float32x2_t __s2_111 = __p2_111; \ - float32x2_t __rev0_111; __rev0_111 = __builtin_shufflevector(__s0_111, __s0_111, 1, 0); \ - float32x2_t __rev1_111; __rev1_111 = __builtin_shufflevector(__s1_111, __s1_111, 1, 0); \ - float32x2_t __rev2_111; __rev2_111 = __builtin_shufflevector(__s2_111, __s2_111, 1, 0); \ - float32x2_t __ret_111; \ - __ret_111 = __noswap_vfma_lane_f32(__rev0_111, -__rev1_111, __rev2_111, __p3_111); \ - __ret_111 = __builtin_shufflevector(__ret_111, __ret_111, 1, 0); \ +#define vfmss_laneq_f32(__p0_111, __p1_111, __p2_111, __p3_111) __extension__ ({ \ + float32_t __s0_111 = __p0_111; \ + float32_t __s1_111 = __p1_111; \ + float32x4_t __s2_111 = __p2_111; \ + float32x4_t __rev2_111; __rev2_111 = __builtin_shufflevector(__s2_111, __s2_111, 3, 2, 1, 0); \ + float32_t __ret_111; \ + __ret_111 = __noswap_vfmas_laneq_f32(__s0_111, -__s1_111, __rev2_111, __p3_111); \ __ret_111; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsd_laneq_f64(__p0_112, __p1_112, __p2_112, __p3_112) __extension__ ({ \ - float64_t __s0_112 = __p0_112; \ - float64_t __s1_112 = __p1_112; \ +#define vfmsq_laneq_f64(__p0_112, __p1_112, __p2_112, __p3_112) __extension__ ({ \ + float64x2_t __s0_112 = __p0_112; \ + float64x2_t __s1_112 = __p1_112; \ float64x2_t __s2_112 = __p2_112; \ - float64_t __ret_112; \ - __ret_112 = vfmad_laneq_f64(__s0_112, -__s1_112, __s2_112, __p3_112); \ + float64x2_t __ret_112; \ + __ret_112 = vfmaq_laneq_f64(__s0_112, -__s1_112, __s2_112, __p3_112); \ __ret_112; \ }) #else -#define vfmsd_laneq_f64(__p0_113, __p1_113, __p2_113, __p3_113) __extension__ ({ \ - float64_t __s0_113 = __p0_113; \ - float64_t __s1_113 = __p1_113; \ +#define vfmsq_laneq_f64(__p0_113, __p1_113, __p2_113, __p3_113) __extension__ ({ \ + float64x2_t __s0_113 = __p0_113; \ + float64x2_t __s1_113 = __p1_113; \ float64x2_t __s2_113 = __p2_113; \ + float64x2_t __rev0_113; __rev0_113 = __builtin_shufflevector(__s0_113, __s0_113, 1, 0); \ + float64x2_t __rev1_113; __rev1_113 = __builtin_shufflevector(__s1_113, __s1_113, 1, 0); \ float64x2_t __rev2_113; __rev2_113 = __builtin_shufflevector(__s2_113, __s2_113, 1, 0); \ - float64_t __ret_113; \ - __ret_113 = __noswap_vfmad_laneq_f64(__s0_113, -__s1_113, __rev2_113, __p3_113); \ + float64x2_t __ret_113; \ + __ret_113 = __noswap_vfmaq_laneq_f64(__rev0_113, -__rev1_113, __rev2_113, __p3_113); \ + __ret_113 = __builtin_shufflevector(__ret_113, __ret_113, 1, 0); \ __ret_113; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmss_laneq_f32(__p0_114, __p1_114, __p2_114, __p3_114) __extension__ ({ \ - float32_t __s0_114 = __p0_114; \ - float32_t __s1_114 = __p1_114; \ +#define vfmsq_laneq_f32(__p0_114, __p1_114, __p2_114, __p3_114) __extension__ ({ \ + float32x4_t __s0_114 = __p0_114; \ + float32x4_t __s1_114 = __p1_114; \ float32x4_t __s2_114 = __p2_114; \ - float32_t __ret_114; \ - __ret_114 = vfmas_laneq_f32(__s0_114, -__s1_114, __s2_114, __p3_114); \ + float32x4_t __ret_114; \ + __ret_114 = vfmaq_laneq_f32(__s0_114, -__s1_114, __s2_114, __p3_114); \ __ret_114; \ }) #else -#define vfmss_laneq_f32(__p0_115, __p1_115, __p2_115, __p3_115) __extension__ ({ \ - float32_t __s0_115 = __p0_115; \ - float32_t __s1_115 = __p1_115; \ +#define vfmsq_laneq_f32(__p0_115, __p1_115, __p2_115, __p3_115) __extension__ ({ \ + float32x4_t __s0_115 = __p0_115; \ + float32x4_t __s1_115 = __p1_115; \ float32x4_t __s2_115 = __p2_115; \ + float32x4_t __rev0_115; __rev0_115 = __builtin_shufflevector(__s0_115, __s0_115, 3, 2, 1, 0); \ + float32x4_t __rev1_115; __rev1_115 = __builtin_shufflevector(__s1_115, __s1_115, 3, 2, 1, 0); \ float32x4_t __rev2_115; __rev2_115 = __builtin_shufflevector(__s2_115, __s2_115, 3, 2, 1, 0); \ - float32_t __ret_115; \ - __ret_115 = __noswap_vfmas_laneq_f32(__s0_115, -__s1_115, __rev2_115, __p3_115); \ + float32x4_t __ret_115; \ + __ret_115 = __noswap_vfmaq_laneq_f32(__rev0_115, -__rev1_115, __rev2_115, __p3_115); \ + __ret_115 = __builtin_shufflevector(__ret_115, __ret_115, 3, 2, 1, 0); \ __ret_115; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f64(__p0_116, __p1_116, __p2_116, __p3_116) __extension__ ({ \ - float64x2_t __s0_116 = __p0_116; \ - float64x2_t __s1_116 = __p1_116; \ +#define vfms_laneq_f64(__p0_116, __p1_116, __p2_116, __p3_116) __extension__ ({ \ + float64x1_t __s0_116 = __p0_116; \ + float64x1_t __s1_116 = __p1_116; \ float64x2_t __s2_116 = __p2_116; \ - float64x2_t __ret_116; \ - __ret_116 = vfmaq_laneq_f64(__s0_116, -__s1_116, __s2_116, __p3_116); \ + float64x1_t __ret_116; \ + __ret_116 = vfma_laneq_f64(__s0_116, -__s1_116, __s2_116, __p3_116); \ __ret_116; \ }) #else -#define vfmsq_laneq_f64(__p0_117, __p1_117, __p2_117, __p3_117) __extension__ ({ \ - float64x2_t __s0_117 = __p0_117; \ - float64x2_t __s1_117 = __p1_117; \ +#define vfms_laneq_f64(__p0_117, __p1_117, __p2_117, __p3_117) __extension__ ({ \ + float64x1_t __s0_117 = __p0_117; \ + float64x1_t __s1_117 = __p1_117; \ float64x2_t __s2_117 = __p2_117; \ - float64x2_t __rev0_117; __rev0_117 = __builtin_shufflevector(__s0_117, __s0_117, 1, 0); \ - float64x2_t __rev1_117; __rev1_117 = __builtin_shufflevector(__s1_117, __s1_117, 1, 0); \ float64x2_t __rev2_117; __rev2_117 = __builtin_shufflevector(__s2_117, __s2_117, 1, 0); \ - float64x2_t __ret_117; \ - __ret_117 = __noswap_vfmaq_laneq_f64(__rev0_117, -__rev1_117, __rev2_117, __p3_117); \ - __ret_117 = __builtin_shufflevector(__ret_117, __ret_117, 1, 0); \ + float64x1_t __ret_117; \ + __ret_117 = __noswap_vfma_laneq_f64(__s0_117, -__s1_117, __rev2_117, __p3_117); \ __ret_117; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f32(__p0_118, __p1_118, __p2_118, __p3_118) __extension__ ({ \ - float32x4_t __s0_118 = __p0_118; \ - float32x4_t __s1_118 = __p1_118; \ +#define vfms_laneq_f32(__p0_118, __p1_118, __p2_118, __p3_118) __extension__ ({ \ + float32x2_t __s0_118 = __p0_118; \ + float32x2_t __s1_118 = __p1_118; \ float32x4_t __s2_118 = __p2_118; \ - float32x4_t __ret_118; \ - __ret_118 = vfmaq_laneq_f32(__s0_118, -__s1_118, __s2_118, __p3_118); \ + float32x2_t __ret_118; \ + __ret_118 = vfma_laneq_f32(__s0_118, -__s1_118, __s2_118, __p3_118); \ __ret_118; \ }) #else -#define vfmsq_laneq_f32(__p0_119, __p1_119, __p2_119, __p3_119) __extension__ ({ \ - float32x4_t __s0_119 = __p0_119; \ - float32x4_t __s1_119 = __p1_119; \ +#define vfms_laneq_f32(__p0_119, __p1_119, __p2_119, __p3_119) __extension__ ({ \ + float32x2_t __s0_119 = __p0_119; \ + float32x2_t __s1_119 = __p1_119; \ float32x4_t __s2_119 = __p2_119; \ - float32x4_t __rev0_119; __rev0_119 = __builtin_shufflevector(__s0_119, __s0_119, 3, 2, 1, 0); \ - float32x4_t __rev1_119; __rev1_119 = __builtin_shufflevector(__s1_119, __s1_119, 3, 2, 1, 0); \ + float32x2_t __rev0_119; __rev0_119 = __builtin_shufflevector(__s0_119, __s0_119, 1, 0); \ + float32x2_t __rev1_119; __rev1_119 = __builtin_shufflevector(__s1_119, __s1_119, 1, 0); \ float32x4_t __rev2_119; __rev2_119 = __builtin_shufflevector(__s2_119, __s2_119, 3, 2, 1, 0); \ - float32x4_t __ret_119; \ - __ret_119 = __noswap_vfmaq_laneq_f32(__rev0_119, -__rev1_119, __rev2_119, __p3_119); \ - __ret_119 = __builtin_shufflevector(__ret_119, __ret_119, 3, 2, 1, 0); \ + float32x2_t __ret_119; \ + __ret_119 = __noswap_vfma_laneq_f32(__rev0_119, -__rev1_119, __rev2_119, __p3_119); \ + __ret_119 = __builtin_shufflevector(__ret_119, __ret_119, 1, 0); \ __ret_119; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f64(__p0_120, __p1_120, __p2_120, __p3_120) __extension__ ({ \ - float64x1_t __s0_120 = __p0_120; \ - float64x1_t __s1_120 = __p1_120; \ - float64x2_t __s2_120 = __p2_120; \ - float64x1_t __ret_120; \ - __ret_120 = vfma_laneq_f64(__s0_120, -__s1_120, __s2_120, __p3_120); \ - __ret_120; \ -}) -#else -#define vfms_laneq_f64(__p0_121, __p1_121, __p2_121, __p3_121) __extension__ ({ \ - float64x1_t __s0_121 = __p0_121; \ - float64x1_t __s1_121 = __p1_121; \ - float64x2_t __s2_121 = __p2_121; \ - float64x2_t __rev2_121; __rev2_121 = __builtin_shufflevector(__s2_121, __s2_121, 1, 0); \ - float64x1_t __ret_121; \ - __ret_121 = __noswap_vfma_laneq_f64(__s0_121, -__s1_121, __rev2_121, __p3_121); \ - __ret_121; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f32(__p0_122, __p1_122, __p2_122, __p3_122) __extension__ ({ \ - float32x2_t __s0_122 = __p0_122; \ - float32x2_t __s1_122 = __p1_122; \ - float32x4_t __s2_122 = __p2_122; \ - float32x2_t __ret_122; \ - __ret_122 = vfma_laneq_f32(__s0_122, -__s1_122, __s2_122, __p3_122); \ - __ret_122; \ -}) -#else -#define vfms_laneq_f32(__p0_123, __p1_123, __p2_123, __p3_123) __extension__ ({ \ - float32x2_t __s0_123 = __p0_123; \ - float32x2_t __s1_123 = __p1_123; \ - float32x4_t __s2_123 = __p2_123; \ - float32x2_t __rev0_123; __rev0_123 = __builtin_shufflevector(__s0_123, __s0_123, 1, 0); \ - float32x2_t __rev1_123; __rev1_123 = __builtin_shufflevector(__s1_123, __s1_123, 1, 0); \ - float32x4_t __rev2_123; __rev2_123 = __builtin_shufflevector(__s2_123, __s2_123, 3, 2, 1, 0); \ - float32x2_t __ret_123; \ - __ret_123 = __noswap_vfma_laneq_f32(__rev0_123, -__rev1_123, __rev2_123, __p3_123); \ - __ret_123 = __builtin_shufflevector(__ret_123, __ret_123, 1, 0); \ - __ret_123; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; @@ -55078,20 +46764,11 @@ __ai float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vfms_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { float64x1_t __ret; __ret = vfma_f64(__p0, -__p1, (float64x1_t) {__p2}); return __ret; } -#else -__ai float64x1_t vfms_n_f64(float64x1_t __p0, float64x1_t __p1, float64_t __p2) { - float64x1_t __ret; - __ret = __noswap_vfma_f64(__p0, -__p1, (float64x1_t) {__p2}); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; @@ -55144,33 +46821,17 @@ __ai float64x1_t vget_high_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ #define vget_lane_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s0 = __p0; \ poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ + __ret = (poly64_t) __builtin_neon_vget_lane_i64((poly64x1_t)__s0, __p1); \ __ret; \ }) -#else -#define vget_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#define __noswap_vget_lane_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vget_lane_i64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __s0 = __p0; \ poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ + __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__s0, __p1); \ __ret; \ }) #else @@ -55178,13 +46839,13 @@ __ai float64x1_t vget_high_f64(float64x2_t __p0) { poly64x2_t __s0 = __p0; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__rev0, __p1); \ + __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __s0 = __p0; \ poly64_t __ret; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((int8x16_t)__s0, __p1); \ + __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__s0, __p1); \ __ret; \ }) #endif @@ -55193,7 +46854,7 @@ __ai float64x1_t vget_high_f64(float64x2_t __p0) { #define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__s0, __p1); \ + __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__s0, __p1); \ __ret; \ }) #else @@ -55201,39 +46862,23 @@ __ai float64x1_t vget_high_f64(float64x2_t __p0) { float64x2_t __s0 = __p0; \ float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__rev0, __p1); \ + __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__rev0, __p1); \ __ret; \ }) #define __noswap_vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s0 = __p0; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((int8x16_t)__s0, __p1); \ + __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__s0, __p1); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vget_lane_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s0 = __p0; \ float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ + __ret = (float64_t) __builtin_neon_vget_lane_f64((float64x1_t)__s0, __p1); \ __ret; \ }) -#else -#define vget_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#define __noswap_vget_lane_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64_t __ret; \ - __ret = (float64_t) __builtin_neon_vget_lane_f64((int8x8_t)__s0, __p1); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vget_low_p64(poly64x2_t __p0) { poly64x1_t __ret; @@ -55264,20 +46909,11 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ __ret; \ }) -#else -#define vld1_p64(__p0) __extension__ ({ \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ @@ -55308,34 +46944,16 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ __ret; \ }) -#else -#define vld1_f64(__p0) __extension__ ({ \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld1_dup_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ __ret; \ }) -#else -#define vld1_dup_p64(__p0) __extension__ ({ \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ @@ -55366,36 +46984,17 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_dup_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ __ret; \ }) -#else -#define vld1_dup_f64(__p0) __extension__ ({ \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s1 = __p1; \ poly64x1_t __ret; \ __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) -#else -#define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ @@ -55432,36 +47031,17 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s1 = __p1; \ float64x1_t __ret; \ __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) -#else -#define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld1_p64_x2(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld1_p64_x2(__p0) __extension__ ({ \ - poly64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_p64_x2(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ @@ -55496,34 +47076,16 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_f64_x2(__p0) __extension__ ({ \ float64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld1_f64_x2(__p0) __extension__ ({ \ - float64x1x2_t __ret; \ - __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld1_p64_x3(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld1_p64_x3(__p0) __extension__ ({ \ - poly64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_p64_x3(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ @@ -55560,34 +47122,16 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_f64_x3(__p0) __extension__ ({ \ float64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld1_f64_x3(__p0) __extension__ ({ \ - float64x1x3_t __ret; \ - __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld1_p64_x4(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld1_p64_x4(__p0) __extension__ ({ \ - poly64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld1q_p64_x4(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ @@ -55626,34 +47170,16 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld1_f64_x4(__p0) __extension__ ({ \ float64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld1_f64_x4(__p0) __extension__ ({ \ - float64x1x4_t __ret; \ - __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld2_p64(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld2_p64(__p0) __extension__ ({ \ - poly64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld2q_p64(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ @@ -55722,34 +47248,16 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld2_f64(__p0) __extension__ ({ \ float64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld2_f64(__p0) __extension__ ({ \ - float64x1x2_t __ret; \ - __builtin_neon_vld2_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld2_dup_p64(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld2_dup_p64(__p0) __extension__ ({ \ - poly64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld2q_dup_p64(__p0) __extension__ ({ \ poly64x2x2_t __ret; \ @@ -55784,36 +47292,17 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld2_dup_f64(__p0) __extension__ ({ \ float64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld2_dup_f64(__p0) __extension__ ({ \ - float64x1x2_t __ret; \ - __builtin_neon_vld2_dup_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ poly64x1x2_t __ret; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ __ret; \ }) -#else -#define vld2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1x2_t __s1 = __p1; \ - poly64x1x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ @@ -55928,7 +47417,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #define vld2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 42); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 42); \ __ret; \ }) #else @@ -55938,7 +47427,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ float64x2x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 42); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -55950,7 +47439,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #define vld2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 35); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 35); \ __ret; \ }) #else @@ -55960,7 +47449,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ int64x2x2_t __ret; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __p2, 35); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -55968,68 +47457,29 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ uint64x1x2_t __ret; \ __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ __ret; \ }) -#else -#define vld2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1x2_t __s1 = __p1; \ - uint64x1x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ float64x1x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 10); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 10); \ __ret; \ }) -#else -#define vld2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1x2_t __s1 = __p1; \ - float64x1x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ int64x1x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 3); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 3); \ __ret; \ }) -#else -#define vld2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1x2_t __s1 = __p1; \ - int64x1x2_t __ret; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __p2, 3); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld3_p64(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld3_p64(__p0) __extension__ ({ \ - poly64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3q_p64(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ @@ -56102,34 +47552,16 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld3_f64(__p0) __extension__ ({ \ float64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld3_f64(__p0) __extension__ ({ \ - float64x1x3_t __ret; \ - __builtin_neon_vld3_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld3_dup_p64(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld3_dup_p64(__p0) __extension__ ({ \ - poly64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3q_dup_p64(__p0) __extension__ ({ \ poly64x2x3_t __ret; \ @@ -56166,36 +47598,17 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld3_dup_f64(__p0) __extension__ ({ \ float64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld3_dup_f64(__p0) __extension__ ({ \ - float64x1x3_t __ret; \ - __builtin_neon_vld3_dup_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ poly64x1x3_t __ret; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ __ret; \ }) -#else -#define vld3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1x3_t __s1 = __p1; \ - poly64x1x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ @@ -56320,7 +47733,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #define vld3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 42); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 42); \ __ret; \ }) #else @@ -56331,7 +47744,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ float64x2x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 42); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -56344,7 +47757,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #define vld3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 35); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 35); \ __ret; \ }) #else @@ -56355,7 +47768,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ int64x2x3_t __ret; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 35); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -56364,68 +47777,29 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ uint64x1x3_t __ret; \ __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ __ret; \ }) -#else -#define vld3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1x3_t __s1 = __p1; \ - uint64x1x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ float64x1x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 10); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 10); \ __ret; \ }) -#else -#define vld3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1x3_t __s1 = __p1; \ - float64x1x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ int64x1x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 3); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 3); \ __ret; \ }) -#else -#define vld3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1x3_t __s1 = __p1; \ - int64x1x3_t __ret; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 3); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld4_p64(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld4_p64(__p0) __extension__ ({ \ - poly64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4q_p64(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ @@ -56502,34 +47876,16 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld4_f64(__p0) __extension__ ({ \ float64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld4_f64(__p0) __extension__ ({ \ - float64x1x4_t __ret; \ - __builtin_neon_vld4_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld4_dup_p64(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 6); \ __ret; \ }) -#else -#define vld4_dup_p64(__p0) __extension__ ({ \ - poly64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4q_dup_p64(__p0) __extension__ ({ \ poly64x2x4_t __ret; \ @@ -56568,36 +47924,17 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld4_dup_f64(__p0) __extension__ ({ \ float64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 10); \ __ret; \ }) -#else -#define vld4_dup_f64(__p0) __extension__ ({ \ - float64x1x4_t __ret; \ - __builtin_neon_vld4_dup_v(&__ret, __p0, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ poly64x1x4_t __ret; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ __ret; \ }) -#else -#define vld4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1x4_t __s1 = __p1; \ - poly64x1x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ @@ -56732,7 +48069,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #define vld4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 42); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 42); \ __ret; \ }) #else @@ -56744,7 +48081,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ float64x2x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 42); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 42); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -56758,7 +48095,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { #define vld4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 35); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 35); \ __ret; \ }) #else @@ -56770,7 +48107,7 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ int64x2x4_t __ret; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 35); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 35); \ \ __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ @@ -56780,68 +48117,29 @@ __ai float64x1_t vget_low_f64(float64x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vld4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ uint64x1x4_t __ret; \ __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ __ret; \ }) -#else -#define vld4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1x4_t __s1 = __p1; \ - uint64x1x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ float64x1x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 10); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 10); \ __ret; \ }) -#else -#define vld4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1x4_t __s1 = __p1; \ - float64x1x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vld4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ int64x1x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 3); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 3); \ __ret; \ }) -#else -#define vld4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1x4_t __s1 = __p1; \ - int64x1x4_t __ret; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 3); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vldrq_p128(__p0) __extension__ ({ \ poly128_t __ret; \ __ret = (poly128_t) __builtin_neon_vldrq_p128(__p0); \ __ret; \ }) -#else -#define vldrq_p128(__p0) __extension__ ({ \ - poly128_t __ret; \ - __ret = (poly128_t) __builtin_neon_vldrq_p128(__p0); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; @@ -56859,31 +48157,22 @@ __ai float64x2_t vmaxq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmax_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } -#else -__ai float64x1_t vmax_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64_t vmaxnmvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vmaxnmvq_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vmaxnmvq_f64(__p0); return __ret; } #else __ai float64_t vmaxnmvq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vmaxnmvq_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vmaxnmvq_f64(__rev0); return __ret; } #endif @@ -56891,14 +48180,14 @@ __ai float64_t vmaxnmvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxnmvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxnmvq_f32((int8x16_t)__p0); + __ret = (float32_t) __builtin_neon_vmaxnmvq_f32(__p0); return __ret; } #else __ai float32_t vmaxnmvq_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxnmvq_f32((int8x16_t)__rev0); + __ret = (float32_t) __builtin_neon_vmaxnmvq_f32(__rev0); return __ret; } #endif @@ -56906,14 +48195,14 @@ __ai float32_t vmaxnmvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxnmv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxnmv_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vmaxnmv_f32(__p0); return __ret; } #else __ai float32_t vmaxnmv_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxnmv_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vmaxnmv_f32(__rev0); return __ret; } #endif @@ -56921,14 +48210,14 @@ __ai float32_t vmaxnmv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint8_t vmaxvq_u8(uint8x16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vmaxvq_u8((int8x16_t)__p0); + __ret = (uint8_t) __builtin_neon_vmaxvq_u8(__p0); return __ret; } #else __ai uint8_t vmaxvq_u8(uint8x16_t __p0) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vmaxvq_u8((int8x16_t)__rev0); + __ret = (uint8_t) __builtin_neon_vmaxvq_u8(__rev0); return __ret; } #endif @@ -56936,14 +48225,14 @@ __ai uint8_t vmaxvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vmaxvq_u32(uint32x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vmaxvq_u32((int8x16_t)__p0); + __ret = (uint32_t) __builtin_neon_vmaxvq_u32(__p0); return __ret; } #else __ai uint32_t vmaxvq_u32(uint32x4_t __p0) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vmaxvq_u32((int8x16_t)__rev0); + __ret = (uint32_t) __builtin_neon_vmaxvq_u32(__rev0); return __ret; } #endif @@ -56951,14 +48240,14 @@ __ai uint32_t vmaxvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vmaxvq_u16(uint16x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vmaxvq_u16((int8x16_t)__p0); + __ret = (uint16_t) __builtin_neon_vmaxvq_u16(__p0); return __ret; } #else __ai uint16_t vmaxvq_u16(uint16x8_t __p0) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vmaxvq_u16((int8x16_t)__rev0); + __ret = (uint16_t) __builtin_neon_vmaxvq_u16(__rev0); return __ret; } #endif @@ -56966,14 +48255,14 @@ __ai uint16_t vmaxvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int8_t vmaxvq_s8(int8x16_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vmaxvq_s8((int8x16_t)__p0); + __ret = (int8_t) __builtin_neon_vmaxvq_s8(__p0); return __ret; } #else __ai int8_t vmaxvq_s8(int8x16_t __p0) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8_t __ret; - __ret = (int8_t) __builtin_neon_vmaxvq_s8((int8x16_t)__rev0); + __ret = (int8_t) __builtin_neon_vmaxvq_s8(__rev0); return __ret; } #endif @@ -56981,14 +48270,14 @@ __ai int8_t vmaxvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vmaxvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vmaxvq_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vmaxvq_f64(__p0); return __ret; } #else __ai float64_t vmaxvq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vmaxvq_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vmaxvq_f64(__rev0); return __ret; } #endif @@ -56996,14 +48285,14 @@ __ai float64_t vmaxvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxvq_f32((int8x16_t)__p0); + __ret = (float32_t) __builtin_neon_vmaxvq_f32(__p0); return __ret; } #else __ai float32_t vmaxvq_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxvq_f32((int8x16_t)__rev0); + __ret = (float32_t) __builtin_neon_vmaxvq_f32(__rev0); return __ret; } #endif @@ -57011,14 +48300,14 @@ __ai float32_t vmaxvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vmaxvq_s32(int32x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vmaxvq_s32((int8x16_t)__p0); + __ret = (int32_t) __builtin_neon_vmaxvq_s32(__p0); return __ret; } #else __ai int32_t vmaxvq_s32(int32x4_t __p0) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vmaxvq_s32((int8x16_t)__rev0); + __ret = (int32_t) __builtin_neon_vmaxvq_s32(__rev0); return __ret; } #endif @@ -57026,14 +48315,14 @@ __ai int32_t vmaxvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vmaxvq_s16(int16x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vmaxvq_s16((int8x16_t)__p0); + __ret = (int16_t) __builtin_neon_vmaxvq_s16(__p0); return __ret; } #else __ai int16_t vmaxvq_s16(int16x8_t __p0) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vmaxvq_s16((int8x16_t)__rev0); + __ret = (int16_t) __builtin_neon_vmaxvq_s16(__rev0); return __ret; } #endif @@ -57041,14 +48330,14 @@ __ai int16_t vmaxvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint8_t vmaxv_u8(uint8x8_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vmaxv_u8((int8x8_t)__p0); + __ret = (uint8_t) __builtin_neon_vmaxv_u8(__p0); return __ret; } #else __ai uint8_t vmaxv_u8(uint8x8_t __p0) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vmaxv_u8((int8x8_t)__rev0); + __ret = (uint8_t) __builtin_neon_vmaxv_u8(__rev0); return __ret; } #endif @@ -57056,14 +48345,14 @@ __ai uint8_t vmaxv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vmaxv_u32(uint32x2_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vmaxv_u32((int8x8_t)__p0); + __ret = (uint32_t) __builtin_neon_vmaxv_u32(__p0); return __ret; } #else __ai uint32_t vmaxv_u32(uint32x2_t __p0) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vmaxv_u32((int8x8_t)__rev0); + __ret = (uint32_t) __builtin_neon_vmaxv_u32(__rev0); return __ret; } #endif @@ -57071,14 +48360,14 @@ __ai uint32_t vmaxv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vmaxv_u16(uint16x4_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vmaxv_u16((int8x8_t)__p0); + __ret = (uint16_t) __builtin_neon_vmaxv_u16(__p0); return __ret; } #else __ai uint16_t vmaxv_u16(uint16x4_t __p0) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vmaxv_u16((int8x8_t)__rev0); + __ret = (uint16_t) __builtin_neon_vmaxv_u16(__rev0); return __ret; } #endif @@ -57086,14 +48375,14 @@ __ai uint16_t vmaxv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int8_t vmaxv_s8(int8x8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vmaxv_s8((int8x8_t)__p0); + __ret = (int8_t) __builtin_neon_vmaxv_s8(__p0); return __ret; } #else __ai int8_t vmaxv_s8(int8x8_t __p0) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8_t __ret; - __ret = (int8_t) __builtin_neon_vmaxv_s8((int8x8_t)__rev0); + __ret = (int8_t) __builtin_neon_vmaxv_s8(__rev0); return __ret; } #endif @@ -57101,14 +48390,14 @@ __ai int8_t vmaxv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vmaxv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxv_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vmaxv_f32(__p0); return __ret; } #else __ai float32_t vmaxv_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxv_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vmaxv_f32(__rev0); return __ret; } #endif @@ -57116,14 +48405,14 @@ __ai float32_t vmaxv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vmaxv_s32(int32x2_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vmaxv_s32((int8x8_t)__p0); + __ret = (int32_t) __builtin_neon_vmaxv_s32(__p0); return __ret; } #else __ai int32_t vmaxv_s32(int32x2_t __p0) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vmaxv_s32((int8x8_t)__rev0); + __ret = (int32_t) __builtin_neon_vmaxv_s32(__rev0); return __ret; } #endif @@ -57131,14 +48420,14 @@ __ai int32_t vmaxv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vmaxv_s16(int16x4_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vmaxv_s16((int8x8_t)__p0); + __ret = (int16_t) __builtin_neon_vmaxv_s16(__p0); return __ret; } #else __ai int16_t vmaxv_s16(int16x4_t __p0) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vmaxv_s16((int8x8_t)__rev0); + __ret = (int16_t) __builtin_neon_vmaxv_s16(__rev0); return __ret; } #endif @@ -57160,31 +48449,22 @@ __ai float64x2_t vminq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmin_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } -#else -__ai float64x1_t vmin_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64_t vminnmvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vminnmvq_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vminnmvq_f64(__p0); return __ret; } #else __ai float64_t vminnmvq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vminnmvq_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vminnmvq_f64(__rev0); return __ret; } #endif @@ -57192,14 +48472,14 @@ __ai float64_t vminnmvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vminnmvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminnmvq_f32((int8x16_t)__p0); + __ret = (float32_t) __builtin_neon_vminnmvq_f32(__p0); return __ret; } #else __ai float32_t vminnmvq_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vminnmvq_f32((int8x16_t)__rev0); + __ret = (float32_t) __builtin_neon_vminnmvq_f32(__rev0); return __ret; } #endif @@ -57207,14 +48487,14 @@ __ai float32_t vminnmvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vminnmv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminnmv_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vminnmv_f32(__p0); return __ret; } #else __ai float32_t vminnmv_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vminnmv_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vminnmv_f32(__rev0); return __ret; } #endif @@ -57222,14 +48502,14 @@ __ai float32_t vminnmv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint8_t vminvq_u8(uint8x16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminvq_u8((int8x16_t)__p0); + __ret = (uint8_t) __builtin_neon_vminvq_u8(__p0); return __ret; } #else __ai uint8_t vminvq_u8(uint8x16_t __p0) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminvq_u8((int8x16_t)__rev0); + __ret = (uint8_t) __builtin_neon_vminvq_u8(__rev0); return __ret; } #endif @@ -57237,14 +48517,14 @@ __ai uint8_t vminvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vminvq_u32(uint32x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminvq_u32((int8x16_t)__p0); + __ret = (uint32_t) __builtin_neon_vminvq_u32(__p0); return __ret; } #else __ai uint32_t vminvq_u32(uint32x4_t __p0) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminvq_u32((int8x16_t)__rev0); + __ret = (uint32_t) __builtin_neon_vminvq_u32(__rev0); return __ret; } #endif @@ -57252,14 +48532,14 @@ __ai uint32_t vminvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vminvq_u16(uint16x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminvq_u16((int8x16_t)__p0); + __ret = (uint16_t) __builtin_neon_vminvq_u16(__p0); return __ret; } #else __ai uint16_t vminvq_u16(uint16x8_t __p0) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminvq_u16((int8x16_t)__rev0); + __ret = (uint16_t) __builtin_neon_vminvq_u16(__rev0); return __ret; } #endif @@ -57267,14 +48547,14 @@ __ai uint16_t vminvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int8_t vminvq_s8(int8x16_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vminvq_s8((int8x16_t)__p0); + __ret = (int8_t) __builtin_neon_vminvq_s8(__p0); return __ret; } #else __ai int8_t vminvq_s8(int8x16_t __p0) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8_t __ret; - __ret = (int8_t) __builtin_neon_vminvq_s8((int8x16_t)__rev0); + __ret = (int8_t) __builtin_neon_vminvq_s8(__rev0); return __ret; } #endif @@ -57282,14 +48562,14 @@ __ai int8_t vminvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vminvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vminvq_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vminvq_f64(__p0); return __ret; } #else __ai float64_t vminvq_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vminvq_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vminvq_f64(__rev0); return __ret; } #endif @@ -57297,14 +48577,14 @@ __ai float64_t vminvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vminvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminvq_f32((int8x16_t)__p0); + __ret = (float32_t) __builtin_neon_vminvq_f32(__p0); return __ret; } #else __ai float32_t vminvq_f32(float32x4_t __p0) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vminvq_f32((int8x16_t)__rev0); + __ret = (float32_t) __builtin_neon_vminvq_f32(__rev0); return __ret; } #endif @@ -57312,14 +48592,14 @@ __ai float32_t vminvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vminvq_s32(int32x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vminvq_s32((int8x16_t)__p0); + __ret = (int32_t) __builtin_neon_vminvq_s32(__p0); return __ret; } #else __ai int32_t vminvq_s32(int32x4_t __p0) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vminvq_s32((int8x16_t)__rev0); + __ret = (int32_t) __builtin_neon_vminvq_s32(__rev0); return __ret; } #endif @@ -57327,14 +48607,14 @@ __ai int32_t vminvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vminvq_s16(int16x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vminvq_s16((int8x16_t)__p0); + __ret = (int16_t) __builtin_neon_vminvq_s16(__p0); return __ret; } #else __ai int16_t vminvq_s16(int16x8_t __p0) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vminvq_s16((int8x16_t)__rev0); + __ret = (int16_t) __builtin_neon_vminvq_s16(__rev0); return __ret; } #endif @@ -57342,14 +48622,14 @@ __ai int16_t vminvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint8_t vminv_u8(uint8x8_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminv_u8((int8x8_t)__p0); + __ret = (uint8_t) __builtin_neon_vminv_u8(__p0); return __ret; } #else __ai uint8_t vminv_u8(uint8x8_t __p0) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminv_u8((int8x8_t)__rev0); + __ret = (uint8_t) __builtin_neon_vminv_u8(__rev0); return __ret; } #endif @@ -57357,14 +48637,14 @@ __ai uint8_t vminv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint32_t vminv_u32(uint32x2_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminv_u32((int8x8_t)__p0); + __ret = (uint32_t) __builtin_neon_vminv_u32(__p0); return __ret; } #else __ai uint32_t vminv_u32(uint32x2_t __p0) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminv_u32((int8x8_t)__rev0); + __ret = (uint32_t) __builtin_neon_vminv_u32(__rev0); return __ret; } #endif @@ -57372,14 +48652,14 @@ __ai uint32_t vminv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai uint16_t vminv_u16(uint16x4_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminv_u16((int8x8_t)__p0); + __ret = (uint16_t) __builtin_neon_vminv_u16(__p0); return __ret; } #else __ai uint16_t vminv_u16(uint16x4_t __p0) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminv_u16((int8x8_t)__rev0); + __ret = (uint16_t) __builtin_neon_vminv_u16(__rev0); return __ret; } #endif @@ -57387,14 +48667,14 @@ __ai uint16_t vminv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int8_t vminv_s8(int8x8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vminv_s8((int8x8_t)__p0); + __ret = (int8_t) __builtin_neon_vminv_s8(__p0); return __ret; } #else __ai int8_t vminv_s8(int8x8_t __p0) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8_t __ret; - __ret = (int8_t) __builtin_neon_vminv_s8((int8x8_t)__rev0); + __ret = (int8_t) __builtin_neon_vminv_s8(__rev0); return __ret; } #endif @@ -57402,14 +48682,14 @@ __ai int8_t vminv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vminv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminv_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vminv_f32(__p0); return __ret; } #else __ai float32_t vminv_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vminv_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vminv_f32(__rev0); return __ret; } #endif @@ -57417,14 +48697,14 @@ __ai float32_t vminv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int32_t vminv_s32(int32x2_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vminv_s32((int8x8_t)__p0); + __ret = (int32_t) __builtin_neon_vminv_s32(__p0); return __ret; } #else __ai int32_t vminv_s32(int32x2_t __p0) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int32_t __ret; - __ret = (int32_t) __builtin_neon_vminv_s32((int8x8_t)__rev0); + __ret = (int32_t) __builtin_neon_vminv_s32(__rev0); return __ret; } #endif @@ -57432,14 +48712,14 @@ __ai int32_t vminv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int16_t vminv_s16(int16x4_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vminv_s16((int8x8_t)__p0); + __ret = (int16_t) __builtin_neon_vminv_s16(__p0); return __ret; } #else __ai int16_t vminv_s16(int16x4_t __p0) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int16_t __ret; - __ret = (int16_t) __builtin_neon_vminv_s16((int8x8_t)__rev0); + __ret = (int16_t) __builtin_neon_vminv_s16(__rev0); return __ret; } #endif @@ -57462,20 +48742,11 @@ __ai float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } -#else -__ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = __p0 + __p1 * __p2; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ #define vmlaq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ @@ -58039,20 +49310,11 @@ __ai float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } -#else -__ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = __p0 - __p1 * __p2; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ #define vmlsq_laneq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \ uint32x4_t __s0 = __p0; \ @@ -58598,20 +49860,11 @@ __ai float64x2_t vmlsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) }) #endif -#ifdef __LITTLE_ENDIAN__ __ai poly64x1_t vmov_n_p64(poly64_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t) {__p0}; return __ret; } -#else -__ai poly64x1_t vmov_n_p64(poly64_t __p0) { - poly64x1_t __ret; - __ret = (poly64x1_t) {__p0}; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai poly64x2_t vmovq_n_p64(poly64_t __p0) { poly64x2_t __ret; @@ -58642,162 +49895,153 @@ __ai float64x2_t vmovq_n_f64(float64_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmov_n_f64(float64_t __p0) { float64x1_t __ret; __ret = (float64x1_t) {__p0}; return __ret; } +#ifdef __LITTLE_ENDIAN__ +__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_120) { + uint16x8_t __ret_120; + uint8x8_t __a1_120 = vget_high_u8(__p0_120); + __ret_120 = (uint16x8_t)(vshll_n_u8(__a1_120, 0)); + return __ret_120; +} #else -__ai float64x1_t vmov_n_f64(float64_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) {__p0}; - return __ret; +__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_121) { + uint8x16_t __rev0_121; __rev0_121 = __builtin_shufflevector(__p0_121, __p0_121, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret_121; + uint8x8_t __a1_121 = __noswap_vget_high_u8(__rev0_121); + __ret_121 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_121, 0)); + __ret_121 = __builtin_shufflevector(__ret_121, __ret_121, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret_121; +} +__ai uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_122) { + uint16x8_t __ret_122; + uint8x8_t __a1_122 = __noswap_vget_high_u8(__p0_122); + __ret_122 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_122, 0)); + return __ret_122; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_124) { - uint16x8_t __ret_124; - uint8x8_t __a1_124 = vget_high_u8(__p0_124); - __ret_124 = (uint16x8_t)(vshll_n_u8(__a1_124, 0)); +__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_123) { + uint64x2_t __ret_123; + uint32x2_t __a1_123 = vget_high_u32(__p0_123); + __ret_123 = (uint64x2_t)(vshll_n_u32(__a1_123, 0)); + return __ret_123; +} +#else +__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_124) { + uint32x4_t __rev0_124; __rev0_124 = __builtin_shufflevector(__p0_124, __p0_124, 3, 2, 1, 0); + uint64x2_t __ret_124; + uint32x2_t __a1_124 = __noswap_vget_high_u32(__rev0_124); + __ret_124 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_124, 0)); + __ret_124 = __builtin_shufflevector(__ret_124, __ret_124, 1, 0); return __ret_124; } -#else -__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_125) { - uint8x16_t __rev0_125; __rev0_125 = __builtin_shufflevector(__p0_125, __p0_125, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret_125; - uint8x8_t __a1_125 = __noswap_vget_high_u8(__rev0_125); - __ret_125 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_125, 0)); - __ret_125 = __builtin_shufflevector(__ret_125, __ret_125, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_125) { + uint64x2_t __ret_125; + uint32x2_t __a1_125 = __noswap_vget_high_u32(__p0_125); + __ret_125 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_125, 0)); return __ret_125; } -__ai uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_126) { - uint16x8_t __ret_126; - uint8x8_t __a1_126 = __noswap_vget_high_u8(__p0_126); - __ret_126 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_126, 0)); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_126) { + uint32x4_t __ret_126; + uint16x4_t __a1_126 = vget_high_u16(__p0_126); + __ret_126 = (uint32x4_t)(vshll_n_u16(__a1_126, 0)); return __ret_126; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_127) { - uint64x2_t __ret_127; - uint32x2_t __a1_127 = vget_high_u32(__p0_127); - __ret_127 = (uint64x2_t)(vshll_n_u32(__a1_127, 0)); +#else +__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_127) { + uint16x8_t __rev0_127; __rev0_127 = __builtin_shufflevector(__p0_127, __p0_127, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __ret_127; + uint16x4_t __a1_127 = __noswap_vget_high_u16(__rev0_127); + __ret_127 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_127, 0)); + __ret_127 = __builtin_shufflevector(__ret_127, __ret_127, 3, 2, 1, 0); return __ret_127; } -#else -__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_128) { - uint32x4_t __rev0_128; __rev0_128 = __builtin_shufflevector(__p0_128, __p0_128, 3, 2, 1, 0); - uint64x2_t __ret_128; - uint32x2_t __a1_128 = __noswap_vget_high_u32(__rev0_128); - __ret_128 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_128, 0)); - __ret_128 = __builtin_shufflevector(__ret_128, __ret_128, 1, 0); +__ai uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_128) { + uint32x4_t __ret_128; + uint16x4_t __a1_128 = __noswap_vget_high_u16(__p0_128); + __ret_128 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_128, 0)); return __ret_128; } -__ai uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_129) { - uint64x2_t __ret_129; - uint32x2_t __a1_129 = __noswap_vget_high_u32(__p0_129); - __ret_129 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_129, 0)); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int16x8_t vmovl_high_s8(int8x16_t __p0_129) { + int16x8_t __ret_129; + int8x8_t __a1_129 = vget_high_s8(__p0_129); + __ret_129 = (int16x8_t)(vshll_n_s8(__a1_129, 0)); return __ret_129; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_130) { - uint32x4_t __ret_130; - uint16x4_t __a1_130 = vget_high_u16(__p0_130); - __ret_130 = (uint32x4_t)(vshll_n_u16(__a1_130, 0)); +#else +__ai int16x8_t vmovl_high_s8(int8x16_t __p0_130) { + int8x16_t __rev0_130; __rev0_130 = __builtin_shufflevector(__p0_130, __p0_130, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret_130; + int8x8_t __a1_130 = __noswap_vget_high_s8(__rev0_130); + __ret_130 = (int16x8_t)(__noswap_vshll_n_s8(__a1_130, 0)); + __ret_130 = __builtin_shufflevector(__ret_130, __ret_130, 7, 6, 5, 4, 3, 2, 1, 0); return __ret_130; } -#else -__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_131) { - uint16x8_t __rev0_131; __rev0_131 = __builtin_shufflevector(__p0_131, __p0_131, 7, 6, 5, 4, 3, 2, 1, 0); - uint32x4_t __ret_131; - uint16x4_t __a1_131 = __noswap_vget_high_u16(__rev0_131); - __ret_131 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_131, 0)); - __ret_131 = __builtin_shufflevector(__ret_131, __ret_131, 3, 2, 1, 0); +__ai int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_131) { + int16x8_t __ret_131; + int8x8_t __a1_131 = __noswap_vget_high_s8(__p0_131); + __ret_131 = (int16x8_t)(__noswap_vshll_n_s8(__a1_131, 0)); return __ret_131; } -__ai uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_132) { - uint32x4_t __ret_132; - uint16x4_t __a1_132 = __noswap_vget_high_u16(__p0_132); - __ret_132 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_132, 0)); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int64x2_t vmovl_high_s32(int32x4_t __p0_132) { + int64x2_t __ret_132; + int32x2_t __a1_132 = vget_high_s32(__p0_132); + __ret_132 = (int64x2_t)(vshll_n_s32(__a1_132, 0)); return __ret_132; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vmovl_high_s8(int8x16_t __p0_133) { - int16x8_t __ret_133; - int8x8_t __a1_133 = vget_high_s8(__p0_133); - __ret_133 = (int16x8_t)(vshll_n_s8(__a1_133, 0)); +#else +__ai int64x2_t vmovl_high_s32(int32x4_t __p0_133) { + int32x4_t __rev0_133; __rev0_133 = __builtin_shufflevector(__p0_133, __p0_133, 3, 2, 1, 0); + int64x2_t __ret_133; + int32x2_t __a1_133 = __noswap_vget_high_s32(__rev0_133); + __ret_133 = (int64x2_t)(__noswap_vshll_n_s32(__a1_133, 0)); + __ret_133 = __builtin_shufflevector(__ret_133, __ret_133, 1, 0); return __ret_133; } -#else -__ai int16x8_t vmovl_high_s8(int8x16_t __p0_134) { - int8x16_t __rev0_134; __rev0_134 = __builtin_shufflevector(__p0_134, __p0_134, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret_134; - int8x8_t __a1_134 = __noswap_vget_high_s8(__rev0_134); - __ret_134 = (int16x8_t)(__noswap_vshll_n_s8(__a1_134, 0)); - __ret_134 = __builtin_shufflevector(__ret_134, __ret_134, 7, 6, 5, 4, 3, 2, 1, 0); +__ai int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_134) { + int64x2_t __ret_134; + int32x2_t __a1_134 = __noswap_vget_high_s32(__p0_134); + __ret_134 = (int64x2_t)(__noswap_vshll_n_s32(__a1_134, 0)); return __ret_134; } -__ai int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_135) { - int16x8_t __ret_135; - int8x8_t __a1_135 = __noswap_vget_high_s8(__p0_135); - __ret_135 = (int16x8_t)(__noswap_vshll_n_s8(__a1_135, 0)); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai int32x4_t vmovl_high_s16(int16x8_t __p0_135) { + int32x4_t __ret_135; + int16x4_t __a1_135 = vget_high_s16(__p0_135); + __ret_135 = (int32x4_t)(vshll_n_s16(__a1_135, 0)); return __ret_135; } -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vmovl_high_s32(int32x4_t __p0_136) { - int64x2_t __ret_136; - int32x2_t __a1_136 = vget_high_s32(__p0_136); - __ret_136 = (int64x2_t)(vshll_n_s32(__a1_136, 0)); +#else +__ai int32x4_t vmovl_high_s16(int16x8_t __p0_136) { + int16x8_t __rev0_136; __rev0_136 = __builtin_shufflevector(__p0_136, __p0_136, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret_136; + int16x4_t __a1_136 = __noswap_vget_high_s16(__rev0_136); + __ret_136 = (int32x4_t)(__noswap_vshll_n_s16(__a1_136, 0)); + __ret_136 = __builtin_shufflevector(__ret_136, __ret_136, 3, 2, 1, 0); return __ret_136; } -#else -__ai int64x2_t vmovl_high_s32(int32x4_t __p0_137) { - int32x4_t __rev0_137; __rev0_137 = __builtin_shufflevector(__p0_137, __p0_137, 3, 2, 1, 0); - int64x2_t __ret_137; - int32x2_t __a1_137 = __noswap_vget_high_s32(__rev0_137); - __ret_137 = (int64x2_t)(__noswap_vshll_n_s32(__a1_137, 0)); - __ret_137 = __builtin_shufflevector(__ret_137, __ret_137, 1, 0); +__ai int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_137) { + int32x4_t __ret_137; + int16x4_t __a1_137 = __noswap_vget_high_s16(__p0_137); + __ret_137 = (int32x4_t)(__noswap_vshll_n_s16(__a1_137, 0)); return __ret_137; } -__ai int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_138) { - int64x2_t __ret_138; - int32x2_t __a1_138 = __noswap_vget_high_s32(__p0_138); - __ret_138 = (int64x2_t)(__noswap_vshll_n_s32(__a1_138, 0)); - return __ret_138; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vmovl_high_s16(int16x8_t __p0_139) { - int32x4_t __ret_139; - int16x4_t __a1_139 = vget_high_s16(__p0_139); - __ret_139 = (int32x4_t)(vshll_n_s16(__a1_139, 0)); - return __ret_139; -} -#else -__ai int32x4_t vmovl_high_s16(int16x8_t __p0_140) { - int16x8_t __rev0_140; __rev0_140 = __builtin_shufflevector(__p0_140, __p0_140, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret_140; - int16x4_t __a1_140 = __noswap_vget_high_s16(__rev0_140); - __ret_140 = (int32x4_t)(__noswap_vshll_n_s16(__a1_140, 0)); - __ret_140 = __builtin_shufflevector(__ret_140, __ret_140, 3, 2, 1, 0); - return __ret_140; -} -__ai int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_141) { - int32x4_t __ret_141; - int16x4_t __a1_141 = __noswap_vget_high_s16(__p0_141); - __ret_141 = (int32x4_t)(__noswap_vshll_n_s16(__a1_141, 0)); - return __ret_141; -} #endif #ifdef __LITTLE_ENDIAN__ @@ -58919,58 +50163,37 @@ __ai float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 * __p1; return __ret; } -#else -__ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 * __p1; - return __ret; -} -#endif - +#define vmuld_lane_f64(__p0_138, __p1_138, __p2_138) __extension__ ({ \ + float64_t __s0_138 = __p0_138; \ + float64x1_t __s1_138 = __p1_138; \ + float64_t __ret_138; \ + __ret_138 = __s0_138 * vget_lane_f64(__s1_138, __p2_138); \ + __ret_138; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vmuld_lane_f64(__p0_142, __p1_142, __p2_142) __extension__ ({ \ - float64_t __s0_142 = __p0_142; \ - float64x1_t __s1_142 = __p1_142; \ - float64_t __ret_142; \ - __ret_142 = __s0_142 * vget_lane_f64(__s1_142, __p2_142); \ - __ret_142; \ +#define vmuls_lane_f32(__p0_139, __p1_139, __p2_139) __extension__ ({ \ + float32_t __s0_139 = __p0_139; \ + float32x2_t __s1_139 = __p1_139; \ + float32_t __ret_139; \ + __ret_139 = __s0_139 * vget_lane_f32(__s1_139, __p2_139); \ + __ret_139; \ }) #else -#define vmuld_lane_f64(__p0_143, __p1_143, __p2_143) __extension__ ({ \ - float64_t __s0_143 = __p0_143; \ - float64x1_t __s1_143 = __p1_143; \ - float64_t __ret_143; \ - __ret_143 = __s0_143 * __noswap_vget_lane_f64(__s1_143, __p2_143); \ - __ret_143; \ +#define vmuls_lane_f32(__p0_140, __p1_140, __p2_140) __extension__ ({ \ + float32_t __s0_140 = __p0_140; \ + float32x2_t __s1_140 = __p1_140; \ + float32x2_t __rev1_140; __rev1_140 = __builtin_shufflevector(__s1_140, __s1_140, 1, 0); \ + float32_t __ret_140; \ + __ret_140 = __s0_140 * __noswap_vget_lane_f32(__rev1_140, __p2_140); \ + __ret_140; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vmuls_lane_f32(__p0_144, __p1_144, __p2_144) __extension__ ({ \ - float32_t __s0_144 = __p0_144; \ - float32x2_t __s1_144 = __p1_144; \ - float32_t __ret_144; \ - __ret_144 = __s0_144 * vget_lane_f32(__s1_144, __p2_144); \ - __ret_144; \ -}) -#else -#define vmuls_lane_f32(__p0_145, __p1_145, __p2_145) __extension__ ({ \ - float32_t __s0_145 = __p0_145; \ - float32x2_t __s1_145 = __p1_145; \ - float32x2_t __rev1_145; __rev1_145 = __builtin_shufflevector(__s1_145, __s1_145, 1, 0); \ - float32_t __ret_145; \ - __ret_145 = __s0_145 * __noswap_vget_lane_f32(__rev1_145, __p2_145); \ - __ret_145; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vmul_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ @@ -58978,16 +50201,6 @@ __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ __ret; \ }) -#else -#define vmul_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vmulq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s0 = __p0; \ @@ -59009,40 +50222,40 @@ __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmuld_laneq_f64(__p0_146, __p1_146, __p2_146) __extension__ ({ \ - float64_t __s0_146 = __p0_146; \ - float64x2_t __s1_146 = __p1_146; \ - float64_t __ret_146; \ - __ret_146 = __s0_146 * vgetq_lane_f64(__s1_146, __p2_146); \ - __ret_146; \ +#define vmuld_laneq_f64(__p0_141, __p1_141, __p2_141) __extension__ ({ \ + float64_t __s0_141 = __p0_141; \ + float64x2_t __s1_141 = __p1_141; \ + float64_t __ret_141; \ + __ret_141 = __s0_141 * vgetq_lane_f64(__s1_141, __p2_141); \ + __ret_141; \ }) #else -#define vmuld_laneq_f64(__p0_147, __p1_147, __p2_147) __extension__ ({ \ - float64_t __s0_147 = __p0_147; \ - float64x2_t __s1_147 = __p1_147; \ - float64x2_t __rev1_147; __rev1_147 = __builtin_shufflevector(__s1_147, __s1_147, 1, 0); \ - float64_t __ret_147; \ - __ret_147 = __s0_147 * __noswap_vgetq_lane_f64(__rev1_147, __p2_147); \ - __ret_147; \ +#define vmuld_laneq_f64(__p0_142, __p1_142, __p2_142) __extension__ ({ \ + float64_t __s0_142 = __p0_142; \ + float64x2_t __s1_142 = __p1_142; \ + float64x2_t __rev1_142; __rev1_142 = __builtin_shufflevector(__s1_142, __s1_142, 1, 0); \ + float64_t __ret_142; \ + __ret_142 = __s0_142 * __noswap_vgetq_lane_f64(__rev1_142, __p2_142); \ + __ret_142; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmuls_laneq_f32(__p0_148, __p1_148, __p2_148) __extension__ ({ \ - float32_t __s0_148 = __p0_148; \ - float32x4_t __s1_148 = __p1_148; \ - float32_t __ret_148; \ - __ret_148 = __s0_148 * vgetq_lane_f32(__s1_148, __p2_148); \ - __ret_148; \ +#define vmuls_laneq_f32(__p0_143, __p1_143, __p2_143) __extension__ ({ \ + float32_t __s0_143 = __p0_143; \ + float32x4_t __s1_143 = __p1_143; \ + float32_t __ret_143; \ + __ret_143 = __s0_143 * vgetq_lane_f32(__s1_143, __p2_143); \ + __ret_143; \ }) #else -#define vmuls_laneq_f32(__p0_149, __p1_149, __p2_149) __extension__ ({ \ - float32_t __s0_149 = __p0_149; \ - float32x4_t __s1_149 = __p1_149; \ - float32x4_t __rev1_149; __rev1_149 = __builtin_shufflevector(__s1_149, __s1_149, 3, 2, 1, 0); \ - float32_t __ret_149; \ - __ret_149 = __s0_149 * __noswap_vgetq_lane_f32(__rev1_149, __p2_149); \ - __ret_149; \ +#define vmuls_laneq_f32(__p0_144, __p1_144, __p2_144) __extension__ ({ \ + float32_t __s0_144 = __p0_144; \ + float32x4_t __s1_144 = __p1_144; \ + float32x4_t __rev1_144; __rev1_144 = __builtin_shufflevector(__s1_144, __s1_144, 3, 2, 1, 0); \ + float32_t __ret_144; \ + __ret_144 = __s0_144 * __noswap_vgetq_lane_f32(__rev1_144, __p2_144); \ + __ret_144; \ }) #endif @@ -59296,20 +50509,11 @@ __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmul_n_f64((int8x8_t)__p0, __p1); + __ret = (float64x1_t) __builtin_neon_vmul_n_f64((float64x1_t)__p0, __p1); return __ret; } -#else -__ai float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmul_n_f64((int8x8_t)__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { float64x2_t __ret; @@ -59326,25 +50530,11 @@ __ai float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { poly128_t __ret; __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); return __ret; } -#else -__ai poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { - poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); - return __ret; -} -__ai poly128_t __noswap_vmull_p64(poly64_t __p0, poly64_t __p1) { - poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { poly16x8_t __ret; @@ -59475,7 +50665,7 @@ __ai poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); poly128_t __ret; - __ret = __noswap_vmull_p64((poly64_t)(__noswap_vget_high_p64(__rev0)), (poly64_t)(__noswap_vget_high_p64(__rev1))); + __ret = vmull_p64((poly64_t)(__noswap_vget_high_p64(__rev0)), (poly64_t)(__noswap_vget_high_p64(__rev1))); return __ret; } #endif @@ -59840,20 +51030,11 @@ __ai float32x4_t __noswap_vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } -#else -__ai float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; @@ -59876,78 +51057,39 @@ __ai float32x2_t __noswap_vmulx_f32(float32x2_t __p0, float32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); return __ret; } -#else -__ai float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); - return __ret; -} -__ai float64_t __noswap_vmulxd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); return __ret; } -#else -__ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); - return __ret; -} -__ai float32_t __noswap_vmulxs_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); - return __ret; -} -#endif - +#define vmulxd_lane_f64(__p0_145, __p1_145, __p2_145) __extension__ ({ \ + float64_t __s0_145 = __p0_145; \ + float64x1_t __s1_145 = __p1_145; \ + float64_t __ret_145; \ + __ret_145 = vmulxd_f64(__s0_145, vget_lane_f64(__s1_145, __p2_145)); \ + __ret_145; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vmulxd_lane_f64(__p0_150, __p1_150, __p2_150) __extension__ ({ \ - float64_t __s0_150 = __p0_150; \ - float64x1_t __s1_150 = __p1_150; \ - float64_t __ret_150; \ - __ret_150 = vmulxd_f64(__s0_150, vget_lane_f64(__s1_150, __p2_150)); \ - __ret_150; \ +#define vmulxs_lane_f32(__p0_146, __p1_146, __p2_146) __extension__ ({ \ + float32_t __s0_146 = __p0_146; \ + float32x2_t __s1_146 = __p1_146; \ + float32_t __ret_146; \ + __ret_146 = vmulxs_f32(__s0_146, vget_lane_f32(__s1_146, __p2_146)); \ + __ret_146; \ }) #else -#define vmulxd_lane_f64(__p0_151, __p1_151, __p2_151) __extension__ ({ \ - float64_t __s0_151 = __p0_151; \ - float64x1_t __s1_151 = __p1_151; \ - float64_t __ret_151; \ - __ret_151 = __noswap_vmulxd_f64(__s0_151, __noswap_vget_lane_f64(__s1_151, __p2_151)); \ - __ret_151; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxs_lane_f32(__p0_152, __p1_152, __p2_152) __extension__ ({ \ - float32_t __s0_152 = __p0_152; \ - float32x2_t __s1_152 = __p1_152; \ - float32_t __ret_152; \ - __ret_152 = vmulxs_f32(__s0_152, vget_lane_f32(__s1_152, __p2_152)); \ - __ret_152; \ -}) -#else -#define vmulxs_lane_f32(__p0_153, __p1_153, __p2_153) __extension__ ({ \ - float32_t __s0_153 = __p0_153; \ - float32x2_t __s1_153 = __p1_153; \ - float32x2_t __rev1_153; __rev1_153 = __builtin_shufflevector(__s1_153, __s1_153, 1, 0); \ - float32_t __ret_153; \ - __ret_153 = __noswap_vmulxs_f32(__s0_153, __noswap_vget_lane_f32(__rev1_153, __p2_153)); \ - __ret_153; \ +#define vmulxs_lane_f32(__p0_147, __p1_147, __p2_147) __extension__ ({ \ + float32_t __s0_147 = __p0_147; \ + float32x2_t __s1_147 = __p1_147; \ + float32x2_t __rev1_147; __rev1_147 = __builtin_shufflevector(__s1_147, __s1_147, 1, 0); \ + float32_t __ret_147; \ + __ret_147 = vmulxs_f32(__s0_147, __noswap_vget_lane_f32(__rev1_147, __p2_147)); \ + __ret_147; \ }) #endif @@ -60014,40 +51156,40 @@ __ai float32_t __noswap_vmulxs_f32(float32_t __p0, float32_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxd_laneq_f64(__p0_154, __p1_154, __p2_154) __extension__ ({ \ - float64_t __s0_154 = __p0_154; \ - float64x2_t __s1_154 = __p1_154; \ - float64_t __ret_154; \ - __ret_154 = vmulxd_f64(__s0_154, vgetq_lane_f64(__s1_154, __p2_154)); \ - __ret_154; \ +#define vmulxd_laneq_f64(__p0_148, __p1_148, __p2_148) __extension__ ({ \ + float64_t __s0_148 = __p0_148; \ + float64x2_t __s1_148 = __p1_148; \ + float64_t __ret_148; \ + __ret_148 = vmulxd_f64(__s0_148, vgetq_lane_f64(__s1_148, __p2_148)); \ + __ret_148; \ }) #else -#define vmulxd_laneq_f64(__p0_155, __p1_155, __p2_155) __extension__ ({ \ - float64_t __s0_155 = __p0_155; \ - float64x2_t __s1_155 = __p1_155; \ - float64x2_t __rev1_155; __rev1_155 = __builtin_shufflevector(__s1_155, __s1_155, 1, 0); \ - float64_t __ret_155; \ - __ret_155 = __noswap_vmulxd_f64(__s0_155, __noswap_vgetq_lane_f64(__rev1_155, __p2_155)); \ - __ret_155; \ +#define vmulxd_laneq_f64(__p0_149, __p1_149, __p2_149) __extension__ ({ \ + float64_t __s0_149 = __p0_149; \ + float64x2_t __s1_149 = __p1_149; \ + float64x2_t __rev1_149; __rev1_149 = __builtin_shufflevector(__s1_149, __s1_149, 1, 0); \ + float64_t __ret_149; \ + __ret_149 = vmulxd_f64(__s0_149, __noswap_vgetq_lane_f64(__rev1_149, __p2_149)); \ + __ret_149; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxs_laneq_f32(__p0_156, __p1_156, __p2_156) __extension__ ({ \ - float32_t __s0_156 = __p0_156; \ - float32x4_t __s1_156 = __p1_156; \ - float32_t __ret_156; \ - __ret_156 = vmulxs_f32(__s0_156, vgetq_lane_f32(__s1_156, __p2_156)); \ - __ret_156; \ +#define vmulxs_laneq_f32(__p0_150, __p1_150, __p2_150) __extension__ ({ \ + float32_t __s0_150 = __p0_150; \ + float32x4_t __s1_150 = __p1_150; \ + float32_t __ret_150; \ + __ret_150 = vmulxs_f32(__s0_150, vgetq_lane_f32(__s1_150, __p2_150)); \ + __ret_150; \ }) #else -#define vmulxs_laneq_f32(__p0_157, __p1_157, __p2_157) __extension__ ({ \ - float32_t __s0_157 = __p0_157; \ - float32x4_t __s1_157 = __p1_157; \ - float32x4_t __rev1_157; __rev1_157 = __builtin_shufflevector(__s1_157, __s1_157, 3, 2, 1, 0); \ - float32_t __ret_157; \ - __ret_157 = __noswap_vmulxs_f32(__s0_157, __noswap_vgetq_lane_f32(__rev1_157, __p2_157)); \ - __ret_157; \ +#define vmulxs_laneq_f32(__p0_151, __p1_151, __p2_151) __extension__ ({ \ + float32_t __s0_151 = __p0_151; \ + float32x4_t __s1_151 = __p1_151; \ + float32x4_t __rev1_151; __rev1_151 = __builtin_shufflevector(__s1_151, __s1_151, 3, 2, 1, 0); \ + float32_t __ret_151; \ + __ret_151 = vmulxs_f32(__s0_151, __noswap_vgetq_lane_f32(__rev1_151, __p2_151)); \ + __ret_151; \ }) #endif @@ -60146,48 +51288,21 @@ __ai int64x2_t vnegq_s64(int64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vneg_f64(float64x1_t __p0) { float64x1_t __ret; __ret = -__p0; return __ret; } -#else -__ai float64x1_t vneg_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = -__p0; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vneg_s64(int64x1_t __p0) { int64x1_t __ret; __ret = -__p0; return __ret; } -#else -__ai int64x1_t vneg_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = -__p0; - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vnegd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); return __ret; } -#else -__ai int64_t vnegd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; @@ -60361,14 +51476,14 @@ __ai int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai uint64_t vpaddd_u64(uint64x2_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vpaddd_u64((int8x16_t)__p0); + __ret = (uint64_t) __builtin_neon_vpaddd_u64(__p0); return __ret; } #else __ai uint64_t vpaddd_u64(uint64x2_t __p0) { uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vpaddd_u64((int8x16_t)__rev0); + __ret = (uint64_t) __builtin_neon_vpaddd_u64(__rev0); return __ret; } #endif @@ -60376,14 +51491,14 @@ __ai uint64_t vpaddd_u64(uint64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vpaddd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpaddd_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vpaddd_f64(__p0); return __ret; } #else __ai float64_t vpaddd_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vpaddd_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vpaddd_f64(__rev0); return __ret; } #endif @@ -60391,14 +51506,14 @@ __ai float64_t vpaddd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai int64_t vpaddd_s64(int64x2_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vpaddd_s64((int8x16_t)__p0); + __ret = (int64_t) __builtin_neon_vpaddd_s64(__p0); return __ret; } #else __ai int64_t vpaddd_s64(int64x2_t __p0) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64_t __ret; - __ret = (int64_t) __builtin_neon_vpaddd_s64((int8x16_t)__rev0); + __ret = (int64_t) __builtin_neon_vpaddd_s64(__rev0); return __ret; } #endif @@ -60406,14 +51521,14 @@ __ai int64_t vpaddd_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vpadds_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpadds_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vpadds_f32(__p0); return __ret; } #else __ai float32_t vpadds_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vpadds_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vpadds_f32(__rev0); return __ret; } #endif @@ -60557,14 +51672,14 @@ __ai int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vpmaxqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxqd_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vpmaxqd_f64(__p0); return __ret; } #else __ai float64_t vpmaxqd_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxqd_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vpmaxqd_f64(__rev0); return __ret; } #endif @@ -60572,14 +51687,14 @@ __ai float64_t vpmaxqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vpmaxs_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxs_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vpmaxs_f32(__p0); return __ret; } #else __ai float32_t vpmaxs_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxs_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vpmaxs_f32(__rev0); return __ret; } #endif @@ -60638,14 +51753,14 @@ __ai float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64(__p0); return __ret; } #else __ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64(__rev0); return __ret; } #endif @@ -60653,14 +51768,14 @@ __ai float64_t vpmaxnmqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vpmaxnms_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxnms_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vpmaxnms_f32(__p0); return __ret; } #else __ai float32_t vpmaxnms_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxnms_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vpmaxnms_f32(__rev0); return __ret; } #endif @@ -60804,14 +51919,14 @@ __ai int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vpminqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminqd_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vpminqd_f64(__p0); return __ret; } #else __ai float64_t vpminqd_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminqd_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vpminqd_f64(__rev0); return __ret; } #endif @@ -60819,14 +51934,14 @@ __ai float64_t vpminqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vpmins_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmins_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vpmins_f32(__p0); return __ret; } #else __ai float32_t vpmins_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmins_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vpmins_f32(__rev0); return __ret; } #endif @@ -60885,14 +52000,14 @@ __ai float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { #ifdef __LITTLE_ENDIAN__ __ai float64_t vpminnmqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminnmqd_f64((int8x16_t)__p0); + __ret = (float64_t) __builtin_neon_vpminnmqd_f64(__p0); return __ret; } #else __ai float64_t vpminnmqd_f64(float64x2_t __p0) { float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminnmqd_f64((int8x16_t)__rev0); + __ret = (float64_t) __builtin_neon_vpminnmqd_f64(__rev0); return __ret; } #endif @@ -60900,14 +52015,14 @@ __ai float64_t vpminnmqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai float32_t vpminnms_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpminnms_f32((int8x8_t)__p0); + __ret = (float32_t) __builtin_neon_vpminnms_f32(__p0); return __ret; } #else __ai float32_t vpminnms_f32(float32x2_t __p0) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float32_t __ret; - __ret = (float32_t) __builtin_neon_vpminnms_f32((int8x8_t)__rev0); + __ret = (float32_t) __builtin_neon_vpminnms_f32(__rev0); return __ret; } #endif @@ -60928,226 +52043,81 @@ __ai int64x2_t vqabsq_s64(int64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vqabs_s64(int64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vqabs_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vqabsb_s8(int8_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); return __ret; } -#else -__ai int8_t vqabsb_s8(int8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqabss_s32(int32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); return __ret; } -#else -__ai int32_t vqabss_s32(int32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqabsd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); return __ret; } -#else -__ai int64_t vqabsd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqabsh_s16(int16_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); return __ret; } -#else -__ai int16_t vqabsh_s16(int16_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); return __ret; } -#else -__ai uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); return __ret; } -#else -__ai uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); return __ret; } -#else -__ai uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); return __ret; } -#else -__ai int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqadds_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); return __ret; } -#else -__ai int32_t vqadds_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqadds_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); return __ret; } -#else -__ai int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); - return __ret; -} -__ai int16_t __noswap_vqaddh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); return __ret; } -#else -__ai int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); return __ret; } -#else -__ai int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; @@ -61320,7 +52290,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61330,7 +52300,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32x2_t __s2 = __p2; \ int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61341,7 +52311,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61351,7 +52321,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16x4_t __s2 = __p2; \ int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61362,7 +52332,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61372,7 +52342,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32x4_t __s2 = __p2; \ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61383,7 +52353,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61393,7 +52363,7 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16x8_t __s2 = __p2; \ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61446,34 +52416,16 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) }) #endif -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); return __ret; } -#else -__ai int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); return __ret; } -#else -__ai int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; @@ -61646,7 +52598,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61656,7 +52608,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32x2_t __s2 = __p2; \ int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61667,7 +52619,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, (int8x8_t)__s2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61677,7 +52629,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16x4_t __s2 = __p2; \ int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, (int8x8_t)__rev2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61688,7 +52640,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61698,7 +52650,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int32x4_t __s2 = __p2; \ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61709,7 +52661,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, (int8x16_t)__s2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __s2, __p3); \ __ret; \ }) #else @@ -61719,7 +52671,7 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) int16x8_t __s2 = __p2; \ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, (int8x16_t)__rev2, __p3); \ + __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __rev2, __p3); \ __ret; \ }) #endif @@ -61772,120 +52724,92 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) }) #endif -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); return __ret; } -#else -__ai int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqdmulhs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); return __ret; } +#ifdef __LITTLE_ENDIAN__ +#define vqdmulhs_lane_s32(__p0_152, __p1_152, __p2_152) __extension__ ({ \ + int32_t __s0_152 = __p0_152; \ + int32x2_t __s1_152 = __p1_152; \ + int32_t __ret_152; \ + __ret_152 = vqdmulhs_s32(__s0_152, vget_lane_s32(__s1_152, __p2_152)); \ + __ret_152; \ +}) #else -__ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); - return __ret; -} -__ai int16_t __noswap_vqdmulhh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); - return __ret; -} +#define vqdmulhs_lane_s32(__p0_153, __p1_153, __p2_153) __extension__ ({ \ + int32_t __s0_153 = __p0_153; \ + int32x2_t __s1_153 = __p1_153; \ + int32x2_t __rev1_153; __rev1_153 = __builtin_shufflevector(__s1_153, __s1_153, 1, 0); \ + int32_t __ret_153; \ + __ret_153 = vqdmulhs_s32(__s0_153, __noswap_vget_lane_s32(__rev1_153, __p2_153)); \ + __ret_153; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_lane_s32(__p0_158, __p1_158, __p2_158) __extension__ ({ \ - int32_t __s0_158 = __p0_158; \ - int32x2_t __s1_158 = __p1_158; \ - int32_t __ret_158; \ - __ret_158 = vqdmulhs_s32(__s0_158, vget_lane_s32(__s1_158, __p2_158)); \ +#define vqdmulhh_lane_s16(__p0_154, __p1_154, __p2_154) __extension__ ({ \ + int16_t __s0_154 = __p0_154; \ + int16x4_t __s1_154 = __p1_154; \ + int16_t __ret_154; \ + __ret_154 = vqdmulhh_s16(__s0_154, vget_lane_s16(__s1_154, __p2_154)); \ + __ret_154; \ +}) +#else +#define vqdmulhh_lane_s16(__p0_155, __p1_155, __p2_155) __extension__ ({ \ + int16_t __s0_155 = __p0_155; \ + int16x4_t __s1_155 = __p1_155; \ + int16x4_t __rev1_155; __rev1_155 = __builtin_shufflevector(__s1_155, __s1_155, 3, 2, 1, 0); \ + int16_t __ret_155; \ + __ret_155 = vqdmulhh_s16(__s0_155, __noswap_vget_lane_s16(__rev1_155, __p2_155)); \ + __ret_155; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqdmulhs_laneq_s32(__p0_156, __p1_156, __p2_156) __extension__ ({ \ + int32_t __s0_156 = __p0_156; \ + int32x4_t __s1_156 = __p1_156; \ + int32_t __ret_156; \ + __ret_156 = vqdmulhs_s32(__s0_156, vgetq_lane_s32(__s1_156, __p2_156)); \ + __ret_156; \ +}) +#else +#define vqdmulhs_laneq_s32(__p0_157, __p1_157, __p2_157) __extension__ ({ \ + int32_t __s0_157 = __p0_157; \ + int32x4_t __s1_157 = __p1_157; \ + int32x4_t __rev1_157; __rev1_157 = __builtin_shufflevector(__s1_157, __s1_157, 3, 2, 1, 0); \ + int32_t __ret_157; \ + __ret_157 = vqdmulhs_s32(__s0_157, __noswap_vgetq_lane_s32(__rev1_157, __p2_157)); \ + __ret_157; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqdmulhh_laneq_s16(__p0_158, __p1_158, __p2_158) __extension__ ({ \ + int16_t __s0_158 = __p0_158; \ + int16x8_t __s1_158 = __p1_158; \ + int16_t __ret_158; \ + __ret_158 = vqdmulhh_s16(__s0_158, vgetq_lane_s16(__s1_158, __p2_158)); \ __ret_158; \ }) #else -#define vqdmulhs_lane_s32(__p0_159, __p1_159, __p2_159) __extension__ ({ \ - int32_t __s0_159 = __p0_159; \ - int32x2_t __s1_159 = __p1_159; \ - int32x2_t __rev1_159; __rev1_159 = __builtin_shufflevector(__s1_159, __s1_159, 1, 0); \ - int32_t __ret_159; \ - __ret_159 = __noswap_vqdmulhs_s32(__s0_159, __noswap_vget_lane_s32(__rev1_159, __p2_159)); \ +#define vqdmulhh_laneq_s16(__p0_159, __p1_159, __p2_159) __extension__ ({ \ + int16_t __s0_159 = __p0_159; \ + int16x8_t __s1_159 = __p1_159; \ + int16x8_t __rev1_159; __rev1_159 = __builtin_shufflevector(__s1_159, __s1_159, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_159; \ + __ret_159 = vqdmulhh_s16(__s0_159, __noswap_vgetq_lane_s16(__rev1_159, __p2_159)); \ __ret_159; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_lane_s16(__p0_160, __p1_160, __p2_160) __extension__ ({ \ - int16_t __s0_160 = __p0_160; \ - int16x4_t __s1_160 = __p1_160; \ - int16_t __ret_160; \ - __ret_160 = vqdmulhh_s16(__s0_160, vget_lane_s16(__s1_160, __p2_160)); \ - __ret_160; \ -}) -#else -#define vqdmulhh_lane_s16(__p0_161, __p1_161, __p2_161) __extension__ ({ \ - int16_t __s0_161 = __p0_161; \ - int16x4_t __s1_161 = __p1_161; \ - int16x4_t __rev1_161; __rev1_161 = __builtin_shufflevector(__s1_161, __s1_161, 3, 2, 1, 0); \ - int16_t __ret_161; \ - __ret_161 = __noswap_vqdmulhh_s16(__s0_161, __noswap_vget_lane_s16(__rev1_161, __p2_161)); \ - __ret_161; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_laneq_s32(__p0_162, __p1_162, __p2_162) __extension__ ({ \ - int32_t __s0_162 = __p0_162; \ - int32x4_t __s1_162 = __p1_162; \ - int32_t __ret_162; \ - __ret_162 = vqdmulhs_s32(__s0_162, vgetq_lane_s32(__s1_162, __p2_162)); \ - __ret_162; \ -}) -#else -#define vqdmulhs_laneq_s32(__p0_163, __p1_163, __p2_163) __extension__ ({ \ - int32_t __s0_163 = __p0_163; \ - int32x4_t __s1_163 = __p1_163; \ - int32x4_t __rev1_163; __rev1_163 = __builtin_shufflevector(__s1_163, __s1_163, 3, 2, 1, 0); \ - int32_t __ret_163; \ - __ret_163 = __noswap_vqdmulhs_s32(__s0_163, __noswap_vgetq_lane_s32(__rev1_163, __p2_163)); \ - __ret_163; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_laneq_s16(__p0_164, __p1_164, __p2_164) __extension__ ({ \ - int16_t __s0_164 = __p0_164; \ - int16x8_t __s1_164 = __p1_164; \ - int16_t __ret_164; \ - __ret_164 = vqdmulhh_s16(__s0_164, vgetq_lane_s16(__s1_164, __p2_164)); \ - __ret_164; \ -}) -#else -#define vqdmulhh_laneq_s16(__p0_165, __p1_165, __p2_165) __extension__ ({ \ - int16_t __s0_165 = __p0_165; \ - int16x8_t __s1_165 = __p1_165; \ - int16x8_t __rev1_165; __rev1_165 = __builtin_shufflevector(__s1_165, __s1_165, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_165; \ - __ret_165 = __noswap_vqdmulhh_s16(__s0_165, __noswap_vgetq_lane_s16(__rev1_165, __p2_165)); \ - __ret_165; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vqdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ @@ -61970,44 +52894,16 @@ __ai int16_t __noswap_vqdmulhh_s16(int16_t __p0, int16_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); return __ret; } -#else -__ai int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); - return __ret; -} -__ai int64_t __noswap_vqdmulls_s32(int32_t __p0, int32_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); return __ret; } -#else -__ai int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqdmullh_s16(int16_t __p0, int16_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; @@ -62159,81 +53055,81 @@ __ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulls_lane_s32(__p0_166, __p1_166, __p2_166) __extension__ ({ \ - int32_t __s0_166 = __p0_166; \ - int32x2_t __s1_166 = __p1_166; \ - int64_t __ret_166; \ - __ret_166 = vqdmulls_s32(__s0_166, vget_lane_s32(__s1_166, __p2_166)); \ +#define vqdmulls_lane_s32(__p0_160, __p1_160, __p2_160) __extension__ ({ \ + int32_t __s0_160 = __p0_160; \ + int32x2_t __s1_160 = __p1_160; \ + int64_t __ret_160; \ + __ret_160 = vqdmulls_s32(__s0_160, vget_lane_s32(__s1_160, __p2_160)); \ + __ret_160; \ +}) +#else +#define vqdmulls_lane_s32(__p0_161, __p1_161, __p2_161) __extension__ ({ \ + int32_t __s0_161 = __p0_161; \ + int32x2_t __s1_161 = __p1_161; \ + int32x2_t __rev1_161; __rev1_161 = __builtin_shufflevector(__s1_161, __s1_161, 1, 0); \ + int64_t __ret_161; \ + __ret_161 = vqdmulls_s32(__s0_161, __noswap_vget_lane_s32(__rev1_161, __p2_161)); \ + __ret_161; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqdmullh_lane_s16(__p0_162, __p1_162, __p2_162) __extension__ ({ \ + int16_t __s0_162 = __p0_162; \ + int16x4_t __s1_162 = __p1_162; \ + int32_t __ret_162; \ + __ret_162 = vqdmullh_s16(__s0_162, vget_lane_s16(__s1_162, __p2_162)); \ + __ret_162; \ +}) +#else +#define vqdmullh_lane_s16(__p0_163, __p1_163, __p2_163) __extension__ ({ \ + int16_t __s0_163 = __p0_163; \ + int16x4_t __s1_163 = __p1_163; \ + int16x4_t __rev1_163; __rev1_163 = __builtin_shufflevector(__s1_163, __s1_163, 3, 2, 1, 0); \ + int32_t __ret_163; \ + __ret_163 = vqdmullh_s16(__s0_163, __noswap_vget_lane_s16(__rev1_163, __p2_163)); \ + __ret_163; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqdmulls_laneq_s32(__p0_164, __p1_164, __p2_164) __extension__ ({ \ + int32_t __s0_164 = __p0_164; \ + int32x4_t __s1_164 = __p1_164; \ + int64_t __ret_164; \ + __ret_164 = vqdmulls_s32(__s0_164, vgetq_lane_s32(__s1_164, __p2_164)); \ + __ret_164; \ +}) +#else +#define vqdmulls_laneq_s32(__p0_165, __p1_165, __p2_165) __extension__ ({ \ + int32_t __s0_165 = __p0_165; \ + int32x4_t __s1_165 = __p1_165; \ + int32x4_t __rev1_165; __rev1_165 = __builtin_shufflevector(__s1_165, __s1_165, 3, 2, 1, 0); \ + int64_t __ret_165; \ + __ret_165 = vqdmulls_s32(__s0_165, __noswap_vgetq_lane_s32(__rev1_165, __p2_165)); \ + __ret_165; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqdmullh_laneq_s16(__p0_166, __p1_166, __p2_166) __extension__ ({ \ + int16_t __s0_166 = __p0_166; \ + int16x8_t __s1_166 = __p1_166; \ + int32_t __ret_166; \ + __ret_166 = vqdmullh_s16(__s0_166, vgetq_lane_s16(__s1_166, __p2_166)); \ __ret_166; \ }) #else -#define vqdmulls_lane_s32(__p0_167, __p1_167, __p2_167) __extension__ ({ \ - int32_t __s0_167 = __p0_167; \ - int32x2_t __s1_167 = __p1_167; \ - int32x2_t __rev1_167; __rev1_167 = __builtin_shufflevector(__s1_167, __s1_167, 1, 0); \ - int64_t __ret_167; \ - __ret_167 = __noswap_vqdmulls_s32(__s0_167, __noswap_vget_lane_s32(__rev1_167, __p2_167)); \ +#define vqdmullh_laneq_s16(__p0_167, __p1_167, __p2_167) __extension__ ({ \ + int16_t __s0_167 = __p0_167; \ + int16x8_t __s1_167 = __p1_167; \ + int16x8_t __rev1_167; __rev1_167 = __builtin_shufflevector(__s1_167, __s1_167, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32_t __ret_167; \ + __ret_167 = vqdmullh_s16(__s0_167, __noswap_vgetq_lane_s16(__rev1_167, __p2_167)); \ __ret_167; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vqdmullh_lane_s16(__p0_168, __p1_168, __p2_168) __extension__ ({ \ - int16_t __s0_168 = __p0_168; \ - int16x4_t __s1_168 = __p1_168; \ - int32_t __ret_168; \ - __ret_168 = vqdmullh_s16(__s0_168, vget_lane_s16(__s1_168, __p2_168)); \ - __ret_168; \ -}) -#else -#define vqdmullh_lane_s16(__p0_169, __p1_169, __p2_169) __extension__ ({ \ - int16_t __s0_169 = __p0_169; \ - int16x4_t __s1_169 = __p1_169; \ - int16x4_t __rev1_169; __rev1_169 = __builtin_shufflevector(__s1_169, __s1_169, 3, 2, 1, 0); \ - int32_t __ret_169; \ - __ret_169 = __noswap_vqdmullh_s16(__s0_169, __noswap_vget_lane_s16(__rev1_169, __p2_169)); \ - __ret_169; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmulls_laneq_s32(__p0_170, __p1_170, __p2_170) __extension__ ({ \ - int32_t __s0_170 = __p0_170; \ - int32x4_t __s1_170 = __p1_170; \ - int64_t __ret_170; \ - __ret_170 = vqdmulls_s32(__s0_170, vgetq_lane_s32(__s1_170, __p2_170)); \ - __ret_170; \ -}) -#else -#define vqdmulls_laneq_s32(__p0_171, __p1_171, __p2_171) __extension__ ({ \ - int32_t __s0_171 = __p0_171; \ - int32x4_t __s1_171 = __p1_171; \ - int32x4_t __rev1_171; __rev1_171 = __builtin_shufflevector(__s1_171, __s1_171, 3, 2, 1, 0); \ - int64_t __ret_171; \ - __ret_171 = __noswap_vqdmulls_s32(__s0_171, __noswap_vgetq_lane_s32(__rev1_171, __p2_171)); \ - __ret_171; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqdmullh_laneq_s16(__p0_172, __p1_172, __p2_172) __extension__ ({ \ - int16_t __s0_172 = __p0_172; \ - int16x8_t __s1_172 = __p1_172; \ - int32_t __ret_172; \ - __ret_172 = vqdmullh_s16(__s0_172, vgetq_lane_s16(__s1_172, __p2_172)); \ - __ret_172; \ -}) -#else -#define vqdmullh_laneq_s16(__p0_173, __p1_173, __p2_173) __extension__ ({ \ - int16_t __s0_173 = __p0_173; \ - int16x8_t __s1_173 = __p1_173; \ - int16x8_t __rev1_173; __rev1_173 = __builtin_shufflevector(__s1_173, __s1_173, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32_t __ret_173; \ - __ret_173 = __noswap_vqdmullh_s16(__s0_173, __noswap_vgetq_lane_s16(__rev1_173, __p2_173)); \ - __ret_173; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vqdmull_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s0 = __p0; \ @@ -62276,90 +53172,36 @@ __ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqmovns_s32(int32_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); return __ret; } -#else -__ai int16_t vqmovns_s32(int32_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqmovnd_s64(int64_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); return __ret; } -#else -__ai int32_t vqmovnd_s64(int64_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vqmovnh_s16(int16_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); return __ret; } -#else -__ai int8_t vqmovnh_s16(int16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16_t vqmovns_u32(uint32_t __p0) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); return __ret; } -#else -__ai uint16_t vqmovns_u32(uint32_t __p0) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vqmovnd_u64(uint64_t __p0) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); return __ret; } -#else -__ai uint32_t vqmovnd_u64(uint64_t __p0) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8_t vqmovnh_u16(uint16_t __p0) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); return __ret; } -#else -__ai uint8_t vqmovnh_u16(uint16_t __p0) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { uint16x8_t __ret; @@ -62462,48 +53304,21 @@ __ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqmovuns_s32(int32_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqmovuns_s32(__p0); return __ret; } -#else -__ai int16_t vqmovuns_s32(int32_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovuns_s32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqmovund_s64(int64_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqmovund_s64(__p0); return __ret; } -#else -__ai int32_t vqmovund_s64(int64_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovund_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vqmovunh_s16(int16_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqmovunh_s16(__p0); return __ret; } -#else -__ai int8_t vqmovunh_s16(int16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovunh_s16(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { uint16x8_t __ret; @@ -62571,190 +53386,117 @@ __ai int64x2_t vqnegq_s64(int64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vqneg_s64(int64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); return __ret; } -#else -__ai int64x1_t vqneg_s64(int64x1_t __p0) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vqnegb_s8(int8_t __p0) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); return __ret; } -#else -__ai int8_t vqnegb_s8(int8_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqnegs_s32(int32_t __p0) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); return __ret; } -#else -__ai int32_t vqnegs_s32(int32_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqnegd_s64(int64_t __p0) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); return __ret; } -#else -__ai int64_t vqnegd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqnegh_s16(int16_t __p0) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); return __ret; } -#else -__ai int16_t vqnegh_s16(int16_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); return __ret; } -#else -__ai int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqrdmulhs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); return __ret; } +#ifdef __LITTLE_ENDIAN__ +#define vqrdmulhs_lane_s32(__p0_168, __p1_168, __p2_168) __extension__ ({ \ + int32_t __s0_168 = __p0_168; \ + int32x2_t __s1_168 = __p1_168; \ + int32_t __ret_168; \ + __ret_168 = vqrdmulhs_s32(__s0_168, vget_lane_s32(__s1_168, __p2_168)); \ + __ret_168; \ +}) #else -__ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); - return __ret; -} -__ai int16_t __noswap_vqrdmulhh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); - return __ret; -} +#define vqrdmulhs_lane_s32(__p0_169, __p1_169, __p2_169) __extension__ ({ \ + int32_t __s0_169 = __p0_169; \ + int32x2_t __s1_169 = __p1_169; \ + int32x2_t __rev1_169; __rev1_169 = __builtin_shufflevector(__s1_169, __s1_169, 1, 0); \ + int32_t __ret_169; \ + __ret_169 = vqrdmulhs_s32(__s0_169, __noswap_vget_lane_s32(__rev1_169, __p2_169)); \ + __ret_169; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_lane_s32(__p0_174, __p1_174, __p2_174) __extension__ ({ \ - int32_t __s0_174 = __p0_174; \ - int32x2_t __s1_174 = __p1_174; \ - int32_t __ret_174; \ - __ret_174 = vqrdmulhs_s32(__s0_174, vget_lane_s32(__s1_174, __p2_174)); \ +#define vqrdmulhh_lane_s16(__p0_170, __p1_170, __p2_170) __extension__ ({ \ + int16_t __s0_170 = __p0_170; \ + int16x4_t __s1_170 = __p1_170; \ + int16_t __ret_170; \ + __ret_170 = vqrdmulhh_s16(__s0_170, vget_lane_s16(__s1_170, __p2_170)); \ + __ret_170; \ +}) +#else +#define vqrdmulhh_lane_s16(__p0_171, __p1_171, __p2_171) __extension__ ({ \ + int16_t __s0_171 = __p0_171; \ + int16x4_t __s1_171 = __p1_171; \ + int16x4_t __rev1_171; __rev1_171 = __builtin_shufflevector(__s1_171, __s1_171, 3, 2, 1, 0); \ + int16_t __ret_171; \ + __ret_171 = vqrdmulhh_s16(__s0_171, __noswap_vget_lane_s16(__rev1_171, __p2_171)); \ + __ret_171; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmulhs_laneq_s32(__p0_172, __p1_172, __p2_172) __extension__ ({ \ + int32_t __s0_172 = __p0_172; \ + int32x4_t __s1_172 = __p1_172; \ + int32_t __ret_172; \ + __ret_172 = vqrdmulhs_s32(__s0_172, vgetq_lane_s32(__s1_172, __p2_172)); \ + __ret_172; \ +}) +#else +#define vqrdmulhs_laneq_s32(__p0_173, __p1_173, __p2_173) __extension__ ({ \ + int32_t __s0_173 = __p0_173; \ + int32x4_t __s1_173 = __p1_173; \ + int32x4_t __rev1_173; __rev1_173 = __builtin_shufflevector(__s1_173, __s1_173, 3, 2, 1, 0); \ + int32_t __ret_173; \ + __ret_173 = vqrdmulhs_s32(__s0_173, __noswap_vgetq_lane_s32(__rev1_173, __p2_173)); \ + __ret_173; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmulhh_laneq_s16(__p0_174, __p1_174, __p2_174) __extension__ ({ \ + int16_t __s0_174 = __p0_174; \ + int16x8_t __s1_174 = __p1_174; \ + int16_t __ret_174; \ + __ret_174 = vqrdmulhh_s16(__s0_174, vgetq_lane_s16(__s1_174, __p2_174)); \ __ret_174; \ }) #else -#define vqrdmulhs_lane_s32(__p0_175, __p1_175, __p2_175) __extension__ ({ \ - int32_t __s0_175 = __p0_175; \ - int32x2_t __s1_175 = __p1_175; \ - int32x2_t __rev1_175; __rev1_175 = __builtin_shufflevector(__s1_175, __s1_175, 1, 0); \ - int32_t __ret_175; \ - __ret_175 = __noswap_vqrdmulhs_s32(__s0_175, __noswap_vget_lane_s32(__rev1_175, __p2_175)); \ +#define vqrdmulhh_laneq_s16(__p0_175, __p1_175, __p2_175) __extension__ ({ \ + int16_t __s0_175 = __p0_175; \ + int16x8_t __s1_175 = __p1_175; \ + int16x8_t __rev1_175; __rev1_175 = __builtin_shufflevector(__s1_175, __s1_175, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_175; \ + __ret_175 = vqrdmulhh_s16(__s0_175, __noswap_vgetq_lane_s16(__rev1_175, __p2_175)); \ __ret_175; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_lane_s16(__p0_176, __p1_176, __p2_176) __extension__ ({ \ - int16_t __s0_176 = __p0_176; \ - int16x4_t __s1_176 = __p1_176; \ - int16_t __ret_176; \ - __ret_176 = vqrdmulhh_s16(__s0_176, vget_lane_s16(__s1_176, __p2_176)); \ - __ret_176; \ -}) -#else -#define vqrdmulhh_lane_s16(__p0_177, __p1_177, __p2_177) __extension__ ({ \ - int16_t __s0_177 = __p0_177; \ - int16x4_t __s1_177 = __p1_177; \ - int16x4_t __rev1_177; __rev1_177 = __builtin_shufflevector(__s1_177, __s1_177, 3, 2, 1, 0); \ - int16_t __ret_177; \ - __ret_177 = __noswap_vqrdmulhh_s16(__s0_177, __noswap_vget_lane_s16(__rev1_177, __p2_177)); \ - __ret_177; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_laneq_s32(__p0_178, __p1_178, __p2_178) __extension__ ({ \ - int32_t __s0_178 = __p0_178; \ - int32x4_t __s1_178 = __p1_178; \ - int32_t __ret_178; \ - __ret_178 = vqrdmulhs_s32(__s0_178, vgetq_lane_s32(__s1_178, __p2_178)); \ - __ret_178; \ -}) -#else -#define vqrdmulhs_laneq_s32(__p0_179, __p1_179, __p2_179) __extension__ ({ \ - int32_t __s0_179 = __p0_179; \ - int32x4_t __s1_179 = __p1_179; \ - int32x4_t __rev1_179; __rev1_179 = __builtin_shufflevector(__s1_179, __s1_179, 3, 2, 1, 0); \ - int32_t __ret_179; \ - __ret_179 = __noswap_vqrdmulhs_s32(__s0_179, __noswap_vgetq_lane_s32(__rev1_179, __p2_179)); \ - __ret_179; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_laneq_s16(__p0_180, __p1_180, __p2_180) __extension__ ({ \ - int16_t __s0_180 = __p0_180; \ - int16x8_t __s1_180 = __p1_180; \ - int16_t __ret_180; \ - __ret_180 = vqrdmulhh_s16(__s0_180, vgetq_lane_s16(__s1_180, __p2_180)); \ - __ret_180; \ -}) -#else -#define vqrdmulhh_laneq_s16(__p0_181, __p1_181, __p2_181) __extension__ ({ \ - int16_t __s0_181 = __p0_181; \ - int16x8_t __s1_181 = __p1_181; \ - int16x8_t __rev1_181; __rev1_181 = __builtin_shufflevector(__s1_181, __s1_181, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_181; \ - __ret_181 = __noswap_vqrdmulhh_s16(__s0_181, __noswap_vgetq_lane_s16(__rev1_181, __p2_181)); \ - __ret_181; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vqrdmulhq_laneq_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s0 = __p0; \ @@ -62839,1210 +53581,684 @@ __ai int16_t __noswap_vqrdmulhh_s16(int16_t __p0, int16_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ __ai uint8_t vqrshlb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); return __ret; } -#else -__ai uint8_t vqrshlb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vqrshls_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); return __ret; } -#else -__ai uint32_t vqrshls_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vqrshld_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vqrshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16_t vqrshlh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); return __ret; } -#else -__ai uint16_t vqrshlh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); return __ret; } -#else -__ai int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); return __ret; } -#else -__ai int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); return __ret; } +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_high_n_u32(__p0_176, __p1_176, __p2_176) __extension__ ({ \ + uint16x4_t __s0_176 = __p0_176; \ + uint32x4_t __s1_176 = __p1_176; \ + uint16x8_t __ret_176; \ + __ret_176 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_176), (uint16x4_t)(vqrshrn_n_u32(__s1_176, __p2_176)))); \ + __ret_176; \ +}) #else -__ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); - return __ret; -} +#define vqrshrn_high_n_u32(__p0_177, __p1_177, __p2_177) __extension__ ({ \ + uint16x4_t __s0_177 = __p0_177; \ + uint32x4_t __s1_177 = __p1_177; \ + uint16x4_t __rev0_177; __rev0_177 = __builtin_shufflevector(__s0_177, __s0_177, 3, 2, 1, 0); \ + uint32x4_t __rev1_177; __rev1_177 = __builtin_shufflevector(__s1_177, __s1_177, 3, 2, 1, 0); \ + uint16x8_t __ret_177; \ + __ret_177 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_177), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_177, __p2_177)))); \ + __ret_177 = __builtin_shufflevector(__ret_177, __ret_177, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_177; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u32(__p0_182, __p1_182, __p2_182) __extension__ ({ \ - uint16x4_t __s0_182 = __p0_182; \ - uint32x4_t __s1_182 = __p1_182; \ - uint16x8_t __ret_182; \ - __ret_182 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_182), (uint16x4_t)(vqrshrn_n_u32(__s1_182, __p2_182)))); \ +#define vqrshrn_high_n_u64(__p0_178, __p1_178, __p2_178) __extension__ ({ \ + uint32x2_t __s0_178 = __p0_178; \ + uint64x2_t __s1_178 = __p1_178; \ + uint32x4_t __ret_178; \ + __ret_178 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_178), (uint32x2_t)(vqrshrn_n_u64(__s1_178, __p2_178)))); \ + __ret_178; \ +}) +#else +#define vqrshrn_high_n_u64(__p0_179, __p1_179, __p2_179) __extension__ ({ \ + uint32x2_t __s0_179 = __p0_179; \ + uint64x2_t __s1_179 = __p1_179; \ + uint32x2_t __rev0_179; __rev0_179 = __builtin_shufflevector(__s0_179, __s0_179, 1, 0); \ + uint64x2_t __rev1_179; __rev1_179 = __builtin_shufflevector(__s1_179, __s1_179, 1, 0); \ + uint32x4_t __ret_179; \ + __ret_179 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_179), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_179, __p2_179)))); \ + __ret_179 = __builtin_shufflevector(__ret_179, __ret_179, 3, 2, 1, 0); \ + __ret_179; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_high_n_u16(__p0_180, __p1_180, __p2_180) __extension__ ({ \ + uint8x8_t __s0_180 = __p0_180; \ + uint16x8_t __s1_180 = __p1_180; \ + uint8x16_t __ret_180; \ + __ret_180 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_180), (uint8x8_t)(vqrshrn_n_u16(__s1_180, __p2_180)))); \ + __ret_180; \ +}) +#else +#define vqrshrn_high_n_u16(__p0_181, __p1_181, __p2_181) __extension__ ({ \ + uint8x8_t __s0_181 = __p0_181; \ + uint16x8_t __s1_181 = __p1_181; \ + uint8x8_t __rev0_181; __rev0_181 = __builtin_shufflevector(__s0_181, __s0_181, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_181; __rev1_181 = __builtin_shufflevector(__s1_181, __s1_181, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_181; \ + __ret_181 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_181), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_181, __p2_181)))); \ + __ret_181 = __builtin_shufflevector(__ret_181, __ret_181, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_181; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrshrn_high_n_s32(__p0_182, __p1_182, __p2_182) __extension__ ({ \ + int16x4_t __s0_182 = __p0_182; \ + int32x4_t __s1_182 = __p1_182; \ + int16x8_t __ret_182; \ + __ret_182 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_182), (int16x4_t)(vqrshrn_n_s32(__s1_182, __p2_182)))); \ __ret_182; \ }) #else -#define vqrshrn_high_n_u32(__p0_183, __p1_183, __p2_183) __extension__ ({ \ - uint16x4_t __s0_183 = __p0_183; \ - uint32x4_t __s1_183 = __p1_183; \ - uint16x4_t __rev0_183; __rev0_183 = __builtin_shufflevector(__s0_183, __s0_183, 3, 2, 1, 0); \ - uint32x4_t __rev1_183; __rev1_183 = __builtin_shufflevector(__s1_183, __s1_183, 3, 2, 1, 0); \ - uint16x8_t __ret_183; \ - __ret_183 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_183), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_183, __p2_183)))); \ +#define vqrshrn_high_n_s32(__p0_183, __p1_183, __p2_183) __extension__ ({ \ + int16x4_t __s0_183 = __p0_183; \ + int32x4_t __s1_183 = __p1_183; \ + int16x4_t __rev0_183; __rev0_183 = __builtin_shufflevector(__s0_183, __s0_183, 3, 2, 1, 0); \ + int32x4_t __rev1_183; __rev1_183 = __builtin_shufflevector(__s1_183, __s1_183, 3, 2, 1, 0); \ + int16x8_t __ret_183; \ + __ret_183 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_183), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_183, __p2_183)))); \ __ret_183 = __builtin_shufflevector(__ret_183, __ret_183, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_183; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u64(__p0_184, __p1_184, __p2_184) __extension__ ({ \ - uint32x2_t __s0_184 = __p0_184; \ - uint64x2_t __s1_184 = __p1_184; \ - uint32x4_t __ret_184; \ - __ret_184 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_184), (uint32x2_t)(vqrshrn_n_u64(__s1_184, __p2_184)))); \ +#define vqrshrn_high_n_s64(__p0_184, __p1_184, __p2_184) __extension__ ({ \ + int32x2_t __s0_184 = __p0_184; \ + int64x2_t __s1_184 = __p1_184; \ + int32x4_t __ret_184; \ + __ret_184 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_184), (int32x2_t)(vqrshrn_n_s64(__s1_184, __p2_184)))); \ __ret_184; \ }) #else -#define vqrshrn_high_n_u64(__p0_185, __p1_185, __p2_185) __extension__ ({ \ - uint32x2_t __s0_185 = __p0_185; \ - uint64x2_t __s1_185 = __p1_185; \ - uint32x2_t __rev0_185; __rev0_185 = __builtin_shufflevector(__s0_185, __s0_185, 1, 0); \ - uint64x2_t __rev1_185; __rev1_185 = __builtin_shufflevector(__s1_185, __s1_185, 1, 0); \ - uint32x4_t __ret_185; \ - __ret_185 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_185), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_185, __p2_185)))); \ +#define vqrshrn_high_n_s64(__p0_185, __p1_185, __p2_185) __extension__ ({ \ + int32x2_t __s0_185 = __p0_185; \ + int64x2_t __s1_185 = __p1_185; \ + int32x2_t __rev0_185; __rev0_185 = __builtin_shufflevector(__s0_185, __s0_185, 1, 0); \ + int64x2_t __rev1_185; __rev1_185 = __builtin_shufflevector(__s1_185, __s1_185, 1, 0); \ + int32x4_t __ret_185; \ + __ret_185 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_185), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_185, __p2_185)))); \ __ret_185 = __builtin_shufflevector(__ret_185, __ret_185, 3, 2, 1, 0); \ __ret_185; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u16(__p0_186, __p1_186, __p2_186) __extension__ ({ \ - uint8x8_t __s0_186 = __p0_186; \ - uint16x8_t __s1_186 = __p1_186; \ - uint8x16_t __ret_186; \ - __ret_186 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_186), (uint8x8_t)(vqrshrn_n_u16(__s1_186, __p2_186)))); \ +#define vqrshrn_high_n_s16(__p0_186, __p1_186, __p2_186) __extension__ ({ \ + int8x8_t __s0_186 = __p0_186; \ + int16x8_t __s1_186 = __p1_186; \ + int8x16_t __ret_186; \ + __ret_186 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_186), (int8x8_t)(vqrshrn_n_s16(__s1_186, __p2_186)))); \ __ret_186; \ }) #else -#define vqrshrn_high_n_u16(__p0_187, __p1_187, __p2_187) __extension__ ({ \ - uint8x8_t __s0_187 = __p0_187; \ - uint16x8_t __s1_187 = __p1_187; \ - uint8x8_t __rev0_187; __rev0_187 = __builtin_shufflevector(__s0_187, __s0_187, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_187; __rev1_187 = __builtin_shufflevector(__s1_187, __s1_187, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_187; \ - __ret_187 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_187), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_187, __p2_187)))); \ +#define vqrshrn_high_n_s16(__p0_187, __p1_187, __p2_187) __extension__ ({ \ + int8x8_t __s0_187 = __p0_187; \ + int16x8_t __s1_187 = __p1_187; \ + int8x8_t __rev0_187; __rev0_187 = __builtin_shufflevector(__s0_187, __s0_187, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_187; __rev1_187 = __builtin_shufflevector(__s1_187, __s1_187, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_187; \ + __ret_187 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_187), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_187, __p2_187)))); \ __ret_187 = __builtin_shufflevector(__ret_187, __ret_187, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_187; \ }) #endif +#define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ + __ret; \ +}) +#define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ + __ret; \ +}) +#define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ + __ret; \ +}) +#define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ + __ret; \ +}) +#define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ + __ret; \ +}) +#define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ + __ret; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s32(__p0_188, __p1_188, __p2_188) __extension__ ({ \ +#define vqrshrun_high_n_s32(__p0_188, __p1_188, __p2_188) __extension__ ({ \ int16x4_t __s0_188 = __p0_188; \ int32x4_t __s1_188 = __p1_188; \ int16x8_t __ret_188; \ - __ret_188 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_188), (int16x4_t)(vqrshrn_n_s32(__s1_188, __p2_188)))); \ + __ret_188 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_188), (int16x4_t)(vqrshrun_n_s32(__s1_188, __p2_188)))); \ __ret_188; \ }) #else -#define vqrshrn_high_n_s32(__p0_189, __p1_189, __p2_189) __extension__ ({ \ +#define vqrshrun_high_n_s32(__p0_189, __p1_189, __p2_189) __extension__ ({ \ int16x4_t __s0_189 = __p0_189; \ int32x4_t __s1_189 = __p1_189; \ int16x4_t __rev0_189; __rev0_189 = __builtin_shufflevector(__s0_189, __s0_189, 3, 2, 1, 0); \ int32x4_t __rev1_189; __rev1_189 = __builtin_shufflevector(__s1_189, __s1_189, 3, 2, 1, 0); \ int16x8_t __ret_189; \ - __ret_189 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_189), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_189, __p2_189)))); \ + __ret_189 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_189), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_189, __p2_189)))); \ __ret_189 = __builtin_shufflevector(__ret_189, __ret_189, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_189; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s64(__p0_190, __p1_190, __p2_190) __extension__ ({ \ +#define vqrshrun_high_n_s64(__p0_190, __p1_190, __p2_190) __extension__ ({ \ int32x2_t __s0_190 = __p0_190; \ int64x2_t __s1_190 = __p1_190; \ int32x4_t __ret_190; \ - __ret_190 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_190), (int32x2_t)(vqrshrn_n_s64(__s1_190, __p2_190)))); \ + __ret_190 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_190), (int32x2_t)(vqrshrun_n_s64(__s1_190, __p2_190)))); \ __ret_190; \ }) #else -#define vqrshrn_high_n_s64(__p0_191, __p1_191, __p2_191) __extension__ ({ \ +#define vqrshrun_high_n_s64(__p0_191, __p1_191, __p2_191) __extension__ ({ \ int32x2_t __s0_191 = __p0_191; \ int64x2_t __s1_191 = __p1_191; \ int32x2_t __rev0_191; __rev0_191 = __builtin_shufflevector(__s0_191, __s0_191, 1, 0); \ int64x2_t __rev1_191; __rev1_191 = __builtin_shufflevector(__s1_191, __s1_191, 1, 0); \ int32x4_t __ret_191; \ - __ret_191 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_191), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_191, __p2_191)))); \ + __ret_191 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_191), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_191, __p2_191)))); \ __ret_191 = __builtin_shufflevector(__ret_191, __ret_191, 3, 2, 1, 0); \ __ret_191; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s16(__p0_192, __p1_192, __p2_192) __extension__ ({ \ +#define vqrshrun_high_n_s16(__p0_192, __p1_192, __p2_192) __extension__ ({ \ int8x8_t __s0_192 = __p0_192; \ int16x8_t __s1_192 = __p1_192; \ int8x16_t __ret_192; \ - __ret_192 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_192), (int8x8_t)(vqrshrn_n_s16(__s1_192, __p2_192)))); \ + __ret_192 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_192), (int8x8_t)(vqrshrun_n_s16(__s1_192, __p2_192)))); \ __ret_192; \ }) #else -#define vqrshrn_high_n_s16(__p0_193, __p1_193, __p2_193) __extension__ ({ \ +#define vqrshrun_high_n_s16(__p0_193, __p1_193, __p2_193) __extension__ ({ \ int8x8_t __s0_193 = __p0_193; \ int16x8_t __s1_193 = __p1_193; \ int8x8_t __rev0_193; __rev0_193 = __builtin_shufflevector(__s0_193, __s0_193, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_193; __rev1_193 = __builtin_shufflevector(__s1_193, __s1_193, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __ret_193; \ - __ret_193 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_193), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_193, __p2_193)))); \ + __ret_193 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_193), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_193, __p2_193)))); \ __ret_193 = __builtin_shufflevector(__ret_193, __ret_193, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_193; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ +#define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ int32_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ __ret; \ }) -#else -#define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ +#define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ + __ret; \ +}) +#define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ + __ret; \ +}) +__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); + return __ret; +} +__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); + return __ret; +} +__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); + return __ret; +} +__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); + return __ret; +} +__ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { + int8_t __ret; + __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); + return __ret; +} +__ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { + int32_t __ret; + __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); + return __ret; +} +__ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { + int64_t __ret; + __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); + return __ret; +} +__ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { + int16_t __ret; + __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); + return __ret; +} +#define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ + uint8_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ + __ret; \ +}) +#define vqshls_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ + __ret; \ +}) +#define vqshld_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ + __ret; \ +}) +#define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ + __ret; \ +}) +#define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ + int8_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ + __ret; \ +}) +#define vqshls_n_s32(__p0, __p1) __extension__ ({ \ int32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ + __ret; \ +}) +#define vqshld_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ + __ret; \ +}) +#define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ + __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ __ret; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ +#define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ + int8_t __s0 = __p0; \ int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ + __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ __ret; \ }) -#else -#define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ +#define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ + __ret; \ +}) +#define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ + __ret; \ +}) +#define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ __ret; \ }) -#endif - #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s32(__p0_194, __p1_194, __p2_194) __extension__ ({ \ - int16x4_t __s0_194 = __p0_194; \ - int32x4_t __s1_194 = __p1_194; \ - int16x8_t __ret_194; \ - __ret_194 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_194), (int16x4_t)(vqrshrun_n_s32(__s1_194, __p2_194)))); \ +#define vqshrn_high_n_u32(__p0_194, __p1_194, __p2_194) __extension__ ({ \ + uint16x4_t __s0_194 = __p0_194; \ + uint32x4_t __s1_194 = __p1_194; \ + uint16x8_t __ret_194; \ + __ret_194 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_194), (uint16x4_t)(vqshrn_n_u32(__s1_194, __p2_194)))); \ __ret_194; \ }) #else -#define vqrshrun_high_n_s32(__p0_195, __p1_195, __p2_195) __extension__ ({ \ - int16x4_t __s0_195 = __p0_195; \ - int32x4_t __s1_195 = __p1_195; \ - int16x4_t __rev0_195; __rev0_195 = __builtin_shufflevector(__s0_195, __s0_195, 3, 2, 1, 0); \ - int32x4_t __rev1_195; __rev1_195 = __builtin_shufflevector(__s1_195, __s1_195, 3, 2, 1, 0); \ - int16x8_t __ret_195; \ - __ret_195 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_195), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_195, __p2_195)))); \ +#define vqshrn_high_n_u32(__p0_195, __p1_195, __p2_195) __extension__ ({ \ + uint16x4_t __s0_195 = __p0_195; \ + uint32x4_t __s1_195 = __p1_195; \ + uint16x4_t __rev0_195; __rev0_195 = __builtin_shufflevector(__s0_195, __s0_195, 3, 2, 1, 0); \ + uint32x4_t __rev1_195; __rev1_195 = __builtin_shufflevector(__s1_195, __s1_195, 3, 2, 1, 0); \ + uint16x8_t __ret_195; \ + __ret_195 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_195), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_195, __p2_195)))); \ __ret_195 = __builtin_shufflevector(__ret_195, __ret_195, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_195; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s64(__p0_196, __p1_196, __p2_196) __extension__ ({ \ - int32x2_t __s0_196 = __p0_196; \ - int64x2_t __s1_196 = __p1_196; \ - int32x4_t __ret_196; \ - __ret_196 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_196), (int32x2_t)(vqrshrun_n_s64(__s1_196, __p2_196)))); \ +#define vqshrn_high_n_u64(__p0_196, __p1_196, __p2_196) __extension__ ({ \ + uint32x2_t __s0_196 = __p0_196; \ + uint64x2_t __s1_196 = __p1_196; \ + uint32x4_t __ret_196; \ + __ret_196 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_196), (uint32x2_t)(vqshrn_n_u64(__s1_196, __p2_196)))); \ __ret_196; \ }) #else -#define vqrshrun_high_n_s64(__p0_197, __p1_197, __p2_197) __extension__ ({ \ - int32x2_t __s0_197 = __p0_197; \ - int64x2_t __s1_197 = __p1_197; \ - int32x2_t __rev0_197; __rev0_197 = __builtin_shufflevector(__s0_197, __s0_197, 1, 0); \ - int64x2_t __rev1_197; __rev1_197 = __builtin_shufflevector(__s1_197, __s1_197, 1, 0); \ - int32x4_t __ret_197; \ - __ret_197 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_197), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_197, __p2_197)))); \ +#define vqshrn_high_n_u64(__p0_197, __p1_197, __p2_197) __extension__ ({ \ + uint32x2_t __s0_197 = __p0_197; \ + uint64x2_t __s1_197 = __p1_197; \ + uint32x2_t __rev0_197; __rev0_197 = __builtin_shufflevector(__s0_197, __s0_197, 1, 0); \ + uint64x2_t __rev1_197; __rev1_197 = __builtin_shufflevector(__s1_197, __s1_197, 1, 0); \ + uint32x4_t __ret_197; \ + __ret_197 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_197), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_197, __p2_197)))); \ __ret_197 = __builtin_shufflevector(__ret_197, __ret_197, 3, 2, 1, 0); \ __ret_197; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s16(__p0_198, __p1_198, __p2_198) __extension__ ({ \ - int8x8_t __s0_198 = __p0_198; \ - int16x8_t __s1_198 = __p1_198; \ - int8x16_t __ret_198; \ - __ret_198 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_198), (int8x8_t)(vqrshrun_n_s16(__s1_198, __p2_198)))); \ +#define vqshrn_high_n_u16(__p0_198, __p1_198, __p2_198) __extension__ ({ \ + uint8x8_t __s0_198 = __p0_198; \ + uint16x8_t __s1_198 = __p1_198; \ + uint8x16_t __ret_198; \ + __ret_198 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_198), (uint8x8_t)(vqshrn_n_u16(__s1_198, __p2_198)))); \ __ret_198; \ }) #else -#define vqrshrun_high_n_s16(__p0_199, __p1_199, __p2_199) __extension__ ({ \ - int8x8_t __s0_199 = __p0_199; \ - int16x8_t __s1_199 = __p1_199; \ - int8x8_t __rev0_199; __rev0_199 = __builtin_shufflevector(__s0_199, __s0_199, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_199; __rev1_199 = __builtin_shufflevector(__s1_199, __s1_199, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_199; \ - __ret_199 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_199), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_199, __p2_199)))); \ +#define vqshrn_high_n_u16(__p0_199, __p1_199, __p2_199) __extension__ ({ \ + uint8x8_t __s0_199 = __p0_199; \ + uint16x8_t __s1_199 = __p1_199; \ + uint8x8_t __rev0_199; __rev0_199 = __builtin_shufflevector(__s0_199, __s0_199, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_199; __rev1_199 = __builtin_shufflevector(__s1_199, __s1_199, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_199; \ + __ret_199 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_199), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_199, __p2_199)))); \ __ret_199 = __builtin_shufflevector(__ret_199, __ret_199, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_199; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); - return __ret; -} -#else -__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); - return __ret; -} -#else -__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); - return __ret; -} -#else -__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); - return __ret; -} -#else -__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); - return __ret; -} -#else -__ai int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); - return __ret; -} -#else -__ai int32_t vqshls_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); - return __ret; -} -#else -__ai int64_t vqshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); - return __ret; -} -#else -__ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ - uint8_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshls_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshls_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshld_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshld_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshls_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshls_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshld_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshld_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ - int8_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u32(__p0_200, __p1_200, __p2_200) __extension__ ({ \ - uint16x4_t __s0_200 = __p0_200; \ - uint32x4_t __s1_200 = __p1_200; \ - uint16x8_t __ret_200; \ - __ret_200 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_200), (uint16x4_t)(vqshrn_n_u32(__s1_200, __p2_200)))); \ +#define vqshrn_high_n_s32(__p0_200, __p1_200, __p2_200) __extension__ ({ \ + int16x4_t __s0_200 = __p0_200; \ + int32x4_t __s1_200 = __p1_200; \ + int16x8_t __ret_200; \ + __ret_200 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_200), (int16x4_t)(vqshrn_n_s32(__s1_200, __p2_200)))); \ __ret_200; \ }) #else -#define vqshrn_high_n_u32(__p0_201, __p1_201, __p2_201) __extension__ ({ \ - uint16x4_t __s0_201 = __p0_201; \ - uint32x4_t __s1_201 = __p1_201; \ - uint16x4_t __rev0_201; __rev0_201 = __builtin_shufflevector(__s0_201, __s0_201, 3, 2, 1, 0); \ - uint32x4_t __rev1_201; __rev1_201 = __builtin_shufflevector(__s1_201, __s1_201, 3, 2, 1, 0); \ - uint16x8_t __ret_201; \ - __ret_201 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_201), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_201, __p2_201)))); \ +#define vqshrn_high_n_s32(__p0_201, __p1_201, __p2_201) __extension__ ({ \ + int16x4_t __s0_201 = __p0_201; \ + int32x4_t __s1_201 = __p1_201; \ + int16x4_t __rev0_201; __rev0_201 = __builtin_shufflevector(__s0_201, __s0_201, 3, 2, 1, 0); \ + int32x4_t __rev1_201; __rev1_201 = __builtin_shufflevector(__s1_201, __s1_201, 3, 2, 1, 0); \ + int16x8_t __ret_201; \ + __ret_201 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_201), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_201, __p2_201)))); \ __ret_201 = __builtin_shufflevector(__ret_201, __ret_201, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_201; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u64(__p0_202, __p1_202, __p2_202) __extension__ ({ \ - uint32x2_t __s0_202 = __p0_202; \ - uint64x2_t __s1_202 = __p1_202; \ - uint32x4_t __ret_202; \ - __ret_202 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_202), (uint32x2_t)(vqshrn_n_u64(__s1_202, __p2_202)))); \ +#define vqshrn_high_n_s64(__p0_202, __p1_202, __p2_202) __extension__ ({ \ + int32x2_t __s0_202 = __p0_202; \ + int64x2_t __s1_202 = __p1_202; \ + int32x4_t __ret_202; \ + __ret_202 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_202), (int32x2_t)(vqshrn_n_s64(__s1_202, __p2_202)))); \ __ret_202; \ }) #else -#define vqshrn_high_n_u64(__p0_203, __p1_203, __p2_203) __extension__ ({ \ - uint32x2_t __s0_203 = __p0_203; \ - uint64x2_t __s1_203 = __p1_203; \ - uint32x2_t __rev0_203; __rev0_203 = __builtin_shufflevector(__s0_203, __s0_203, 1, 0); \ - uint64x2_t __rev1_203; __rev1_203 = __builtin_shufflevector(__s1_203, __s1_203, 1, 0); \ - uint32x4_t __ret_203; \ - __ret_203 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_203), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_203, __p2_203)))); \ +#define vqshrn_high_n_s64(__p0_203, __p1_203, __p2_203) __extension__ ({ \ + int32x2_t __s0_203 = __p0_203; \ + int64x2_t __s1_203 = __p1_203; \ + int32x2_t __rev0_203; __rev0_203 = __builtin_shufflevector(__s0_203, __s0_203, 1, 0); \ + int64x2_t __rev1_203; __rev1_203 = __builtin_shufflevector(__s1_203, __s1_203, 1, 0); \ + int32x4_t __ret_203; \ + __ret_203 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_203), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_203, __p2_203)))); \ __ret_203 = __builtin_shufflevector(__ret_203, __ret_203, 3, 2, 1, 0); \ __ret_203; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u16(__p0_204, __p1_204, __p2_204) __extension__ ({ \ - uint8x8_t __s0_204 = __p0_204; \ - uint16x8_t __s1_204 = __p1_204; \ - uint8x16_t __ret_204; \ - __ret_204 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_204), (uint8x8_t)(vqshrn_n_u16(__s1_204, __p2_204)))); \ +#define vqshrn_high_n_s16(__p0_204, __p1_204, __p2_204) __extension__ ({ \ + int8x8_t __s0_204 = __p0_204; \ + int16x8_t __s1_204 = __p1_204; \ + int8x16_t __ret_204; \ + __ret_204 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_204), (int8x8_t)(vqshrn_n_s16(__s1_204, __p2_204)))); \ __ret_204; \ }) #else -#define vqshrn_high_n_u16(__p0_205, __p1_205, __p2_205) __extension__ ({ \ - uint8x8_t __s0_205 = __p0_205; \ - uint16x8_t __s1_205 = __p1_205; \ - uint8x8_t __rev0_205; __rev0_205 = __builtin_shufflevector(__s0_205, __s0_205, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_205; __rev1_205 = __builtin_shufflevector(__s1_205, __s1_205, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_205; \ - __ret_205 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_205), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_205, __p2_205)))); \ +#define vqshrn_high_n_s16(__p0_205, __p1_205, __p2_205) __extension__ ({ \ + int8x8_t __s0_205 = __p0_205; \ + int16x8_t __s1_205 = __p1_205; \ + int8x8_t __rev0_205; __rev0_205 = __builtin_shufflevector(__s0_205, __s0_205, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_205; __rev1_205 = __builtin_shufflevector(__s1_205, __s1_205, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_205; \ + __ret_205 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_205), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_205, __p2_205)))); \ __ret_205 = __builtin_shufflevector(__ret_205, __ret_205, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_205; \ }) #endif +#define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ + uint32_t __s0 = __p0; \ + uint16_t __ret; \ + __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ + __ret; \ +}) +#define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint32_t __ret; \ + __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ + __ret; \ +}) +#define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ + uint16_t __s0 = __p0; \ + uint8_t __ret; \ + __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ + __ret; \ +}) +#define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ + int32_t __s0 = __p0; \ + int16_t __ret; \ + __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ + __ret; \ +}) +#define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int32_t __ret; \ + __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ + __ret; \ +}) +#define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ + int16_t __s0 = __p0; \ + int8_t __ret; \ + __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ + __ret; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s32(__p0_206, __p1_206, __p2_206) __extension__ ({ \ +#define vqshrun_high_n_s32(__p0_206, __p1_206, __p2_206) __extension__ ({ \ int16x4_t __s0_206 = __p0_206; \ int32x4_t __s1_206 = __p1_206; \ int16x8_t __ret_206; \ - __ret_206 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_206), (int16x4_t)(vqshrn_n_s32(__s1_206, __p2_206)))); \ + __ret_206 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_206), (int16x4_t)(vqshrun_n_s32(__s1_206, __p2_206)))); \ __ret_206; \ }) #else -#define vqshrn_high_n_s32(__p0_207, __p1_207, __p2_207) __extension__ ({ \ +#define vqshrun_high_n_s32(__p0_207, __p1_207, __p2_207) __extension__ ({ \ int16x4_t __s0_207 = __p0_207; \ int32x4_t __s1_207 = __p1_207; \ int16x4_t __rev0_207; __rev0_207 = __builtin_shufflevector(__s0_207, __s0_207, 3, 2, 1, 0); \ int32x4_t __rev1_207; __rev1_207 = __builtin_shufflevector(__s1_207, __s1_207, 3, 2, 1, 0); \ int16x8_t __ret_207; \ - __ret_207 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_207), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_207, __p2_207)))); \ + __ret_207 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_207), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_207, __p2_207)))); \ __ret_207 = __builtin_shufflevector(__ret_207, __ret_207, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_207; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s64(__p0_208, __p1_208, __p2_208) __extension__ ({ \ +#define vqshrun_high_n_s64(__p0_208, __p1_208, __p2_208) __extension__ ({ \ int32x2_t __s0_208 = __p0_208; \ int64x2_t __s1_208 = __p1_208; \ int32x4_t __ret_208; \ - __ret_208 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_208), (int32x2_t)(vqshrn_n_s64(__s1_208, __p2_208)))); \ + __ret_208 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_208), (int32x2_t)(vqshrun_n_s64(__s1_208, __p2_208)))); \ __ret_208; \ }) #else -#define vqshrn_high_n_s64(__p0_209, __p1_209, __p2_209) __extension__ ({ \ +#define vqshrun_high_n_s64(__p0_209, __p1_209, __p2_209) __extension__ ({ \ int32x2_t __s0_209 = __p0_209; \ int64x2_t __s1_209 = __p1_209; \ int32x2_t __rev0_209; __rev0_209 = __builtin_shufflevector(__s0_209, __s0_209, 1, 0); \ int64x2_t __rev1_209; __rev1_209 = __builtin_shufflevector(__s1_209, __s1_209, 1, 0); \ int32x4_t __ret_209; \ - __ret_209 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_209), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_209, __p2_209)))); \ + __ret_209 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_209), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_209, __p2_209)))); \ __ret_209 = __builtin_shufflevector(__ret_209, __ret_209, 3, 2, 1, 0); \ __ret_209; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s16(__p0_210, __p1_210, __p2_210) __extension__ ({ \ +#define vqshrun_high_n_s16(__p0_210, __p1_210, __p2_210) __extension__ ({ \ int8x8_t __s0_210 = __p0_210; \ int16x8_t __s1_210 = __p1_210; \ int8x16_t __ret_210; \ - __ret_210 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_210), (int8x8_t)(vqshrn_n_s16(__s1_210, __p2_210)))); \ + __ret_210 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_210), (int8x8_t)(vqshrun_n_s16(__s1_210, __p2_210)))); \ __ret_210; \ }) #else -#define vqshrn_high_n_s16(__p0_211, __p1_211, __p2_211) __extension__ ({ \ +#define vqshrun_high_n_s16(__p0_211, __p1_211, __p2_211) __extension__ ({ \ int8x8_t __s0_211 = __p0_211; \ int16x8_t __s1_211 = __p1_211; \ int8x8_t __rev0_211; __rev0_211 = __builtin_shufflevector(__s0_211, __s0_211, 7, 6, 5, 4, 3, 2, 1, 0); \ int16x8_t __rev1_211; __rev1_211 = __builtin_shufflevector(__s1_211, __s1_211, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __ret_211; \ - __ret_211 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_211), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_211, __p2_211)))); \ + __ret_211 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_211), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_211, __p2_211)))); \ __ret_211 = __builtin_shufflevector(__ret_211, __ret_211, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_211; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ - uint32_t __s0 = __p0; \ - uint16_t __ret; \ - __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint32_t __ret; \ - __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ - uint16_t __s0 = __p0; \ - uint8_t __ret; \ - __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ - __ret; \ -}) -#else -#define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s32(__p0_212, __p1_212, __p2_212) __extension__ ({ \ - int16x4_t __s0_212 = __p0_212; \ - int32x4_t __s1_212 = __p1_212; \ - int16x8_t __ret_212; \ - __ret_212 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_212), (int16x4_t)(vqshrun_n_s32(__s1_212, __p2_212)))); \ - __ret_212; \ -}) -#else -#define vqshrun_high_n_s32(__p0_213, __p1_213, __p2_213) __extension__ ({ \ - int16x4_t __s0_213 = __p0_213; \ - int32x4_t __s1_213 = __p1_213; \ - int16x4_t __rev0_213; __rev0_213 = __builtin_shufflevector(__s0_213, __s0_213, 3, 2, 1, 0); \ - int32x4_t __rev1_213; __rev1_213 = __builtin_shufflevector(__s1_213, __s1_213, 3, 2, 1, 0); \ - int16x8_t __ret_213; \ - __ret_213 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_213), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_213, __p2_213)))); \ - __ret_213 = __builtin_shufflevector(__ret_213, __ret_213, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_213; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s64(__p0_214, __p1_214, __p2_214) __extension__ ({ \ - int32x2_t __s0_214 = __p0_214; \ - int64x2_t __s1_214 = __p1_214; \ - int32x4_t __ret_214; \ - __ret_214 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_214), (int32x2_t)(vqshrun_n_s64(__s1_214, __p2_214)))); \ - __ret_214; \ -}) -#else -#define vqshrun_high_n_s64(__p0_215, __p1_215, __p2_215) __extension__ ({ \ - int32x2_t __s0_215 = __p0_215; \ - int64x2_t __s1_215 = __p1_215; \ - int32x2_t __rev0_215; __rev0_215 = __builtin_shufflevector(__s0_215, __s0_215, 1, 0); \ - int64x2_t __rev1_215; __rev1_215 = __builtin_shufflevector(__s1_215, __s1_215, 1, 0); \ - int32x4_t __ret_215; \ - __ret_215 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_215), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_215, __p2_215)))); \ - __ret_215 = __builtin_shufflevector(__ret_215, __ret_215, 3, 2, 1, 0); \ - __ret_215; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s16(__p0_216, __p1_216, __p2_216) __extension__ ({ \ - int8x8_t __s0_216 = __p0_216; \ - int16x8_t __s1_216 = __p1_216; \ - int8x16_t __ret_216; \ - __ret_216 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_216), (int8x8_t)(vqshrun_n_s16(__s1_216, __p2_216)))); \ - __ret_216; \ -}) -#else -#define vqshrun_high_n_s16(__p0_217, __p1_217, __p2_217) __extension__ ({ \ - int8x8_t __s0_217 = __p0_217; \ - int16x8_t __s1_217 = __p1_217; \ - int8x8_t __rev0_217; __rev0_217 = __builtin_shufflevector(__s0_217, __s0_217, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_217; __rev1_217 = __builtin_shufflevector(__s1_217, __s1_217, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_217; \ - __ret_217 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_217), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_217, __p2_217)))); \ - __ret_217 = __builtin_shufflevector(__ret_217, __ret_217, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_217; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ int32_t __s0 = __p0; \ int16_t __ret; \ __ret = (int16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ __ret; \ }) -#else -#define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ - int32_t __s0 = __p0; \ - int16_t __ret; \ - __ret = (int16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ int64_t __s0 = __p0; \ int32_t __ret; \ __ret = (int32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ __ret; \ }) -#else -#define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int32_t __ret; \ - __ret = (int32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ int16_t __s0 = __p0; \ int8_t __ret; \ __ret = (int8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ __ret; \ }) -#else -#define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ - int16_t __s0 = __p0; \ - int8_t __ret; \ - __ret = (int8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); return __ret; } -#else -__ai uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); return __ret; } -#else -__ai uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); return __ret; } -#else -__ai uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); return __ret; } -#else -__ai int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); return __ret; } -#else -__ai int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); - return __ret; -} -__ai int32_t __noswap_vqsubs_s32(int32_t __p0, int32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); return __ret; } -#else -__ai int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); - return __ret; -} -__ai int16_t __noswap_vqsubh_s16(int16_t __p0, int16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { poly8x8_t __ret; @@ -65211,48 +55427,21 @@ __ai float64x2_t vrecpeq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrecpe_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrecpe_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64_t vrecped_f64(float64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); return __ret; } -#else -__ai float64_t vrecped_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vrecpes_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); return __ret; } -#else -__ai float32_t vrecpes_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; @@ -65270,262 +55459,179 @@ __ai float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } -#else -__ai float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); return __ret; } -#else -__ai float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); return __ret; } -#else -__ai float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64_t vrecpxd_f64(float64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); return __ret; } -#else -__ai float64_t vrecpxd_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vrecpxs_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); return __ret; } -#else -__ai float32_t vrecpxs_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vrshld_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vrshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ __ret; \ }) -#else -#define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ int64_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vrshrn_high_n_u32(__p0_212, __p1_212, __p2_212) __extension__ ({ \ + uint16x4_t __s0_212 = __p0_212; \ + uint32x4_t __s1_212 = __p1_212; \ + uint16x8_t __ret_212; \ + __ret_212 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_212), (uint16x4_t)(vrshrn_n_u32(__s1_212, __p2_212)))); \ + __ret_212; \ +}) #else -#define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ - __ret; \ +#define vrshrn_high_n_u32(__p0_213, __p1_213, __p2_213) __extension__ ({ \ + uint16x4_t __s0_213 = __p0_213; \ + uint32x4_t __s1_213 = __p1_213; \ + uint16x4_t __rev0_213; __rev0_213 = __builtin_shufflevector(__s0_213, __s0_213, 3, 2, 1, 0); \ + uint32x4_t __rev1_213; __rev1_213 = __builtin_shufflevector(__s1_213, __s1_213, 3, 2, 1, 0); \ + uint16x8_t __ret_213; \ + __ret_213 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_213), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_213, __p2_213)))); \ + __ret_213 = __builtin_shufflevector(__ret_213, __ret_213, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_213; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u32(__p0_218, __p1_218, __p2_218) __extension__ ({ \ - uint16x4_t __s0_218 = __p0_218; \ - uint32x4_t __s1_218 = __p1_218; \ - uint16x8_t __ret_218; \ - __ret_218 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_218), (uint16x4_t)(vrshrn_n_u32(__s1_218, __p2_218)))); \ +#define vrshrn_high_n_u64(__p0_214, __p1_214, __p2_214) __extension__ ({ \ + uint32x2_t __s0_214 = __p0_214; \ + uint64x2_t __s1_214 = __p1_214; \ + uint32x4_t __ret_214; \ + __ret_214 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_214), (uint32x2_t)(vrshrn_n_u64(__s1_214, __p2_214)))); \ + __ret_214; \ +}) +#else +#define vrshrn_high_n_u64(__p0_215, __p1_215, __p2_215) __extension__ ({ \ + uint32x2_t __s0_215 = __p0_215; \ + uint64x2_t __s1_215 = __p1_215; \ + uint32x2_t __rev0_215; __rev0_215 = __builtin_shufflevector(__s0_215, __s0_215, 1, 0); \ + uint64x2_t __rev1_215; __rev1_215 = __builtin_shufflevector(__s1_215, __s1_215, 1, 0); \ + uint32x4_t __ret_215; \ + __ret_215 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_215), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_215, __p2_215)))); \ + __ret_215 = __builtin_shufflevector(__ret_215, __ret_215, 3, 2, 1, 0); \ + __ret_215; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrn_high_n_u16(__p0_216, __p1_216, __p2_216) __extension__ ({ \ + uint8x8_t __s0_216 = __p0_216; \ + uint16x8_t __s1_216 = __p1_216; \ + uint8x16_t __ret_216; \ + __ret_216 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_216), (uint8x8_t)(vrshrn_n_u16(__s1_216, __p2_216)))); \ + __ret_216; \ +}) +#else +#define vrshrn_high_n_u16(__p0_217, __p1_217, __p2_217) __extension__ ({ \ + uint8x8_t __s0_217 = __p0_217; \ + uint16x8_t __s1_217 = __p1_217; \ + uint8x8_t __rev0_217; __rev0_217 = __builtin_shufflevector(__s0_217, __s0_217, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_217; __rev1_217 = __builtin_shufflevector(__s1_217, __s1_217, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_217; \ + __ret_217 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_217), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_217, __p2_217)))); \ + __ret_217 = __builtin_shufflevector(__ret_217, __ret_217, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_217; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vrshrn_high_n_s32(__p0_218, __p1_218, __p2_218) __extension__ ({ \ + int16x4_t __s0_218 = __p0_218; \ + int32x4_t __s1_218 = __p1_218; \ + int16x8_t __ret_218; \ + __ret_218 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_218), (int16x4_t)(vrshrn_n_s32(__s1_218, __p2_218)))); \ __ret_218; \ }) #else -#define vrshrn_high_n_u32(__p0_219, __p1_219, __p2_219) __extension__ ({ \ - uint16x4_t __s0_219 = __p0_219; \ - uint32x4_t __s1_219 = __p1_219; \ - uint16x4_t __rev0_219; __rev0_219 = __builtin_shufflevector(__s0_219, __s0_219, 3, 2, 1, 0); \ - uint32x4_t __rev1_219; __rev1_219 = __builtin_shufflevector(__s1_219, __s1_219, 3, 2, 1, 0); \ - uint16x8_t __ret_219; \ - __ret_219 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_219), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_219, __p2_219)))); \ +#define vrshrn_high_n_s32(__p0_219, __p1_219, __p2_219) __extension__ ({ \ + int16x4_t __s0_219 = __p0_219; \ + int32x4_t __s1_219 = __p1_219; \ + int16x4_t __rev0_219; __rev0_219 = __builtin_shufflevector(__s0_219, __s0_219, 3, 2, 1, 0); \ + int32x4_t __rev1_219; __rev1_219 = __builtin_shufflevector(__s1_219, __s1_219, 3, 2, 1, 0); \ + int16x8_t __ret_219; \ + __ret_219 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_219), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_219, __p2_219)))); \ __ret_219 = __builtin_shufflevector(__ret_219, __ret_219, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_219; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u64(__p0_220, __p1_220, __p2_220) __extension__ ({ \ - uint32x2_t __s0_220 = __p0_220; \ - uint64x2_t __s1_220 = __p1_220; \ - uint32x4_t __ret_220; \ - __ret_220 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_220), (uint32x2_t)(vrshrn_n_u64(__s1_220, __p2_220)))); \ +#define vrshrn_high_n_s64(__p0_220, __p1_220, __p2_220) __extension__ ({ \ + int32x2_t __s0_220 = __p0_220; \ + int64x2_t __s1_220 = __p1_220; \ + int32x4_t __ret_220; \ + __ret_220 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_220), (int32x2_t)(vrshrn_n_s64(__s1_220, __p2_220)))); \ __ret_220; \ }) #else -#define vrshrn_high_n_u64(__p0_221, __p1_221, __p2_221) __extension__ ({ \ - uint32x2_t __s0_221 = __p0_221; \ - uint64x2_t __s1_221 = __p1_221; \ - uint32x2_t __rev0_221; __rev0_221 = __builtin_shufflevector(__s0_221, __s0_221, 1, 0); \ - uint64x2_t __rev1_221; __rev1_221 = __builtin_shufflevector(__s1_221, __s1_221, 1, 0); \ - uint32x4_t __ret_221; \ - __ret_221 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_221), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_221, __p2_221)))); \ +#define vrshrn_high_n_s64(__p0_221, __p1_221, __p2_221) __extension__ ({ \ + int32x2_t __s0_221 = __p0_221; \ + int64x2_t __s1_221 = __p1_221; \ + int32x2_t __rev0_221; __rev0_221 = __builtin_shufflevector(__s0_221, __s0_221, 1, 0); \ + int64x2_t __rev1_221; __rev1_221 = __builtin_shufflevector(__s1_221, __s1_221, 1, 0); \ + int32x4_t __ret_221; \ + __ret_221 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_221), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_221, __p2_221)))); \ __ret_221 = __builtin_shufflevector(__ret_221, __ret_221, 3, 2, 1, 0); \ __ret_221; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u16(__p0_222, __p1_222, __p2_222) __extension__ ({ \ - uint8x8_t __s0_222 = __p0_222; \ - uint16x8_t __s1_222 = __p1_222; \ - uint8x16_t __ret_222; \ - __ret_222 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_222), (uint8x8_t)(vrshrn_n_u16(__s1_222, __p2_222)))); \ +#define vrshrn_high_n_s16(__p0_222, __p1_222, __p2_222) __extension__ ({ \ + int8x8_t __s0_222 = __p0_222; \ + int16x8_t __s1_222 = __p1_222; \ + int8x16_t __ret_222; \ + __ret_222 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_222), (int8x8_t)(vrshrn_n_s16(__s1_222, __p2_222)))); \ __ret_222; \ }) #else -#define vrshrn_high_n_u16(__p0_223, __p1_223, __p2_223) __extension__ ({ \ - uint8x8_t __s0_223 = __p0_223; \ - uint16x8_t __s1_223 = __p1_223; \ - uint8x8_t __rev0_223; __rev0_223 = __builtin_shufflevector(__s0_223, __s0_223, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_223; __rev1_223 = __builtin_shufflevector(__s1_223, __s1_223, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_223; \ - __ret_223 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_223), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_223, __p2_223)))); \ +#define vrshrn_high_n_s16(__p0_223, __p1_223, __p2_223) __extension__ ({ \ + int8x8_t __s0_223 = __p0_223; \ + int16x8_t __s1_223 = __p1_223; \ + int8x8_t __rev0_223; __rev0_223 = __builtin_shufflevector(__s0_223, __s0_223, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_223; __rev1_223 = __builtin_shufflevector(__s1_223, __s1_223, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_223; \ + __ret_223 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_223), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_223, __p2_223)))); \ __ret_223 = __builtin_shufflevector(__ret_223, __ret_223, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_223; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s32(__p0_224, __p1_224, __p2_224) __extension__ ({ \ - int16x4_t __s0_224 = __p0_224; \ - int32x4_t __s1_224 = __p1_224; \ - int16x8_t __ret_224; \ - __ret_224 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_224), (int16x4_t)(vrshrn_n_s32(__s1_224, __p2_224)))); \ - __ret_224; \ -}) -#else -#define vrshrn_high_n_s32(__p0_225, __p1_225, __p2_225) __extension__ ({ \ - int16x4_t __s0_225 = __p0_225; \ - int32x4_t __s1_225 = __p1_225; \ - int16x4_t __rev0_225; __rev0_225 = __builtin_shufflevector(__s0_225, __s0_225, 3, 2, 1, 0); \ - int32x4_t __rev1_225; __rev1_225 = __builtin_shufflevector(__s1_225, __s1_225, 3, 2, 1, 0); \ - int16x8_t __ret_225; \ - __ret_225 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_225), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_225, __p2_225)))); \ - __ret_225 = __builtin_shufflevector(__ret_225, __ret_225, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_225; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s64(__p0_226, __p1_226, __p2_226) __extension__ ({ \ - int32x2_t __s0_226 = __p0_226; \ - int64x2_t __s1_226 = __p1_226; \ - int32x4_t __ret_226; \ - __ret_226 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_226), (int32x2_t)(vrshrn_n_s64(__s1_226, __p2_226)))); \ - __ret_226; \ -}) -#else -#define vrshrn_high_n_s64(__p0_227, __p1_227, __p2_227) __extension__ ({ \ - int32x2_t __s0_227 = __p0_227; \ - int64x2_t __s1_227 = __p1_227; \ - int32x2_t __rev0_227; __rev0_227 = __builtin_shufflevector(__s0_227, __s0_227, 1, 0); \ - int64x2_t __rev1_227; __rev1_227 = __builtin_shufflevector(__s1_227, __s1_227, 1, 0); \ - int32x4_t __ret_227; \ - __ret_227 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_227), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_227, __p2_227)))); \ - __ret_227 = __builtin_shufflevector(__ret_227, __ret_227, 3, 2, 1, 0); \ - __ret_227; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s16(__p0_228, __p1_228, __p2_228) __extension__ ({ \ - int8x8_t __s0_228 = __p0_228; \ - int16x8_t __s1_228 = __p1_228; \ - int8x16_t __ret_228; \ - __ret_228 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_228), (int8x8_t)(vrshrn_n_s16(__s1_228, __p2_228)))); \ - __ret_228; \ -}) -#else -#define vrshrn_high_n_s16(__p0_229, __p1_229, __p2_229) __extension__ ({ \ - int8x8_t __s0_229 = __p0_229; \ - int16x8_t __s1_229 = __p1_229; \ - int8x8_t __rev0_229; __rev0_229 = __builtin_shufflevector(__s0_229, __s0_229, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_229; __rev1_229 = __builtin_shufflevector(__s1_229, __s1_229, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_229; \ - __ret_229 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_229), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_229, __p2_229)))); \ - __ret_229 = __builtin_shufflevector(__ret_229, __ret_229, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_229; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -65542,48 +55648,21 @@ __ai float64x2_t vrsqrteq_f64(float64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrsqrte_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vrsqrte_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64_t vrsqrted_f64(float64_t __p0) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); return __ret; } -#else -__ai float64_t vrsqrted_f64(float64_t __p0) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vrsqrtes_f32(float32_t __p0) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); return __ret; } -#else -__ai float32_t vrsqrtes_f32(float32_t __p0) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; @@ -65601,49 +55680,21 @@ __ai float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); return __ret; } -#else -__ai float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); return __ret; } -#else -__ai float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { - float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { float32_t __ret; __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); return __ret; } -#else -__ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { - float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ @@ -65651,17 +55702,6 @@ __ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ @@ -65669,16 +55709,6 @@ __ai float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; @@ -65787,37 +55817,19 @@ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { } #endif -#ifdef __LITTLE_ENDIAN__ #define vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ + __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (poly64x1_t)__s1, __p2); \ __ret; \ }) -#else -#define vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#define __noswap_vset_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__s1, __p2); \ __ret; \ }) #else @@ -65826,7 +55838,7 @@ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) @@ -65834,7 +55846,7 @@ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__s1, __p2); \ __ret; \ }) #endif @@ -65844,7 +55856,7 @@ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__s1, __p2); \ __ret; \ }) #else @@ -65853,7 +55865,7 @@ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { float64x2_t __s1 = __p1; \ float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__rev1, __p2); \ + __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__rev1, __p2); \ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ __ret; \ }) @@ -65861,363 +55873,286 @@ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (int8x16_t)__s1, __p2); \ + __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__s1, __p2); \ __ret; \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ + __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (float64x1_t)__s1, __p2); \ __ret; \ }) -#else -#define vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#define __noswap_vset_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64_t __s0 = __p0; \ - float64x1_t __s1 = __p1; \ - float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (int8x8_t)__s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vshld_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vshld_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ #define vshld_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __s0 = __p0; \ uint64_t __ret; \ __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ __ret; \ }) -#else -#define vshld_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vshld_n_s64(__p0, __p1) __extension__ ({ \ int64_t __s0 = __p0; \ int64_t __ret; \ __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vshll_high_n_u8(__p0_224, __p1_224) __extension__ ({ \ + uint8x16_t __s0_224 = __p0_224; \ + uint16x8_t __ret_224; \ + __ret_224 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_224), __p1_224)); \ + __ret_224; \ +}) #else -#define vshld_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ - __ret; \ +#define vshll_high_n_u8(__p0_225, __p1_225) __extension__ ({ \ + uint8x16_t __s0_225 = __p0_225; \ + uint8x16_t __rev0_225; __rev0_225 = __builtin_shufflevector(__s0_225, __s0_225, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_225; \ + __ret_225 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_225), __p1_225)); \ + __ret_225 = __builtin_shufflevector(__ret_225, __ret_225, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_225; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u8(__p0_230, __p1_230) __extension__ ({ \ - uint8x16_t __s0_230 = __p0_230; \ - uint16x8_t __ret_230; \ - __ret_230 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_230), __p1_230)); \ +#define vshll_high_n_u32(__p0_226, __p1_226) __extension__ ({ \ + uint32x4_t __s0_226 = __p0_226; \ + uint64x2_t __ret_226; \ + __ret_226 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_226), __p1_226)); \ + __ret_226; \ +}) +#else +#define vshll_high_n_u32(__p0_227, __p1_227) __extension__ ({ \ + uint32x4_t __s0_227 = __p0_227; \ + uint32x4_t __rev0_227; __rev0_227 = __builtin_shufflevector(__s0_227, __s0_227, 3, 2, 1, 0); \ + uint64x2_t __ret_227; \ + __ret_227 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_227), __p1_227)); \ + __ret_227 = __builtin_shufflevector(__ret_227, __ret_227, 1, 0); \ + __ret_227; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vshll_high_n_u16(__p0_228, __p1_228) __extension__ ({ \ + uint16x8_t __s0_228 = __p0_228; \ + uint32x4_t __ret_228; \ + __ret_228 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_228), __p1_228)); \ + __ret_228; \ +}) +#else +#define vshll_high_n_u16(__p0_229, __p1_229) __extension__ ({ \ + uint16x8_t __s0_229 = __p0_229; \ + uint16x8_t __rev0_229; __rev0_229 = __builtin_shufflevector(__s0_229, __s0_229, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_229; \ + __ret_229 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_229), __p1_229)); \ + __ret_229 = __builtin_shufflevector(__ret_229, __ret_229, 3, 2, 1, 0); \ + __ret_229; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vshll_high_n_s8(__p0_230, __p1_230) __extension__ ({ \ + int8x16_t __s0_230 = __p0_230; \ + int16x8_t __ret_230; \ + __ret_230 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_230), __p1_230)); \ __ret_230; \ }) #else -#define vshll_high_n_u8(__p0_231, __p1_231) __extension__ ({ \ - uint8x16_t __s0_231 = __p0_231; \ - uint8x16_t __rev0_231; __rev0_231 = __builtin_shufflevector(__s0_231, __s0_231, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_231; \ - __ret_231 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_231), __p1_231)); \ +#define vshll_high_n_s8(__p0_231, __p1_231) __extension__ ({ \ + int8x16_t __s0_231 = __p0_231; \ + int8x16_t __rev0_231; __rev0_231 = __builtin_shufflevector(__s0_231, __s0_231, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_231; \ + __ret_231 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_231), __p1_231)); \ __ret_231 = __builtin_shufflevector(__ret_231, __ret_231, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_231; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u32(__p0_232, __p1_232) __extension__ ({ \ - uint32x4_t __s0_232 = __p0_232; \ - uint64x2_t __ret_232; \ - __ret_232 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_232), __p1_232)); \ +#define vshll_high_n_s32(__p0_232, __p1_232) __extension__ ({ \ + int32x4_t __s0_232 = __p0_232; \ + int64x2_t __ret_232; \ + __ret_232 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_232), __p1_232)); \ __ret_232; \ }) #else -#define vshll_high_n_u32(__p0_233, __p1_233) __extension__ ({ \ - uint32x4_t __s0_233 = __p0_233; \ - uint32x4_t __rev0_233; __rev0_233 = __builtin_shufflevector(__s0_233, __s0_233, 3, 2, 1, 0); \ - uint64x2_t __ret_233; \ - __ret_233 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_233), __p1_233)); \ +#define vshll_high_n_s32(__p0_233, __p1_233) __extension__ ({ \ + int32x4_t __s0_233 = __p0_233; \ + int32x4_t __rev0_233; __rev0_233 = __builtin_shufflevector(__s0_233, __s0_233, 3, 2, 1, 0); \ + int64x2_t __ret_233; \ + __ret_233 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_233), __p1_233)); \ __ret_233 = __builtin_shufflevector(__ret_233, __ret_233, 1, 0); \ __ret_233; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u16(__p0_234, __p1_234) __extension__ ({ \ - uint16x8_t __s0_234 = __p0_234; \ - uint32x4_t __ret_234; \ - __ret_234 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_234), __p1_234)); \ +#define vshll_high_n_s16(__p0_234, __p1_234) __extension__ ({ \ + int16x8_t __s0_234 = __p0_234; \ + int32x4_t __ret_234; \ + __ret_234 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_234), __p1_234)); \ __ret_234; \ }) #else -#define vshll_high_n_u16(__p0_235, __p1_235) __extension__ ({ \ - uint16x8_t __s0_235 = __p0_235; \ - uint16x8_t __rev0_235; __rev0_235 = __builtin_shufflevector(__s0_235, __s0_235, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_235; \ - __ret_235 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_235), __p1_235)); \ +#define vshll_high_n_s16(__p0_235, __p1_235) __extension__ ({ \ + int16x8_t __s0_235 = __p0_235; \ + int16x8_t __rev0_235; __rev0_235 = __builtin_shufflevector(__s0_235, __s0_235, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_235; \ + __ret_235 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_235), __p1_235)); \ __ret_235 = __builtin_shufflevector(__ret_235, __ret_235, 3, 2, 1, 0); \ __ret_235; \ }) #endif +#define vshrd_n_u64(__p0, __p1) __extension__ ({ \ + uint64_t __s0 = __p0; \ + uint64_t __ret; \ + __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ + __ret; \ +}) +#define vshrd_n_s64(__p0, __p1) __extension__ ({ \ + int64_t __s0 = __p0; \ + int64_t __ret; \ + __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ + __ret; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s8(__p0_236, __p1_236) __extension__ ({ \ - int8x16_t __s0_236 = __p0_236; \ - int16x8_t __ret_236; \ - __ret_236 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_236), __p1_236)); \ +#define vshrn_high_n_u32(__p0_236, __p1_236, __p2_236) __extension__ ({ \ + uint16x4_t __s0_236 = __p0_236; \ + uint32x4_t __s1_236 = __p1_236; \ + uint16x8_t __ret_236; \ + __ret_236 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_236), (uint16x4_t)(vshrn_n_u32(__s1_236, __p2_236)))); \ __ret_236; \ }) #else -#define vshll_high_n_s8(__p0_237, __p1_237) __extension__ ({ \ - int8x16_t __s0_237 = __p0_237; \ - int8x16_t __rev0_237; __rev0_237 = __builtin_shufflevector(__s0_237, __s0_237, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_237; \ - __ret_237 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_237), __p1_237)); \ +#define vshrn_high_n_u32(__p0_237, __p1_237, __p2_237) __extension__ ({ \ + uint16x4_t __s0_237 = __p0_237; \ + uint32x4_t __s1_237 = __p1_237; \ + uint16x4_t __rev0_237; __rev0_237 = __builtin_shufflevector(__s0_237, __s0_237, 3, 2, 1, 0); \ + uint32x4_t __rev1_237; __rev1_237 = __builtin_shufflevector(__s1_237, __s1_237, 3, 2, 1, 0); \ + uint16x8_t __ret_237; \ + __ret_237 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_237), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_237, __p2_237)))); \ __ret_237 = __builtin_shufflevector(__ret_237, __ret_237, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_237; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s32(__p0_238, __p1_238) __extension__ ({ \ - int32x4_t __s0_238 = __p0_238; \ - int64x2_t __ret_238; \ - __ret_238 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_238), __p1_238)); \ +#define vshrn_high_n_u64(__p0_238, __p1_238, __p2_238) __extension__ ({ \ + uint32x2_t __s0_238 = __p0_238; \ + uint64x2_t __s1_238 = __p1_238; \ + uint32x4_t __ret_238; \ + __ret_238 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_238), (uint32x2_t)(vshrn_n_u64(__s1_238, __p2_238)))); \ __ret_238; \ }) #else -#define vshll_high_n_s32(__p0_239, __p1_239) __extension__ ({ \ - int32x4_t __s0_239 = __p0_239; \ - int32x4_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, 3, 2, 1, 0); \ - int64x2_t __ret_239; \ - __ret_239 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_239), __p1_239)); \ - __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, 1, 0); \ +#define vshrn_high_n_u64(__p0_239, __p1_239, __p2_239) __extension__ ({ \ + uint32x2_t __s0_239 = __p0_239; \ + uint64x2_t __s1_239 = __p1_239; \ + uint32x2_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, 1, 0); \ + uint64x2_t __rev1_239; __rev1_239 = __builtin_shufflevector(__s1_239, __s1_239, 1, 0); \ + uint32x4_t __ret_239; \ + __ret_239 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_239), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_239, __p2_239)))); \ + __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, 3, 2, 1, 0); \ __ret_239; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s16(__p0_240, __p1_240) __extension__ ({ \ - int16x8_t __s0_240 = __p0_240; \ - int32x4_t __ret_240; \ - __ret_240 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_240), __p1_240)); \ +#define vshrn_high_n_u16(__p0_240, __p1_240, __p2_240) __extension__ ({ \ + uint8x8_t __s0_240 = __p0_240; \ + uint16x8_t __s1_240 = __p1_240; \ + uint8x16_t __ret_240; \ + __ret_240 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_240), (uint8x8_t)(vshrn_n_u16(__s1_240, __p2_240)))); \ __ret_240; \ }) #else -#define vshll_high_n_s16(__p0_241, __p1_241) __extension__ ({ \ - int16x8_t __s0_241 = __p0_241; \ - int16x8_t __rev0_241; __rev0_241 = __builtin_shufflevector(__s0_241, __s0_241, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_241; \ - __ret_241 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_241), __p1_241)); \ - __ret_241 = __builtin_shufflevector(__ret_241, __ret_241, 3, 2, 1, 0); \ +#define vshrn_high_n_u16(__p0_241, __p1_241, __p2_241) __extension__ ({ \ + uint8x8_t __s0_241 = __p0_241; \ + uint16x8_t __s1_241 = __p1_241; \ + uint8x8_t __rev0_241; __rev0_241 = __builtin_shufflevector(__s0_241, __s0_241, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_241; __rev1_241 = __builtin_shufflevector(__s1_241, __s1_241, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_241; \ + __ret_241 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_241), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_241, __p2_241)))); \ + __ret_241 = __builtin_shufflevector(__ret_241, __ret_241, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_241; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ - __ret; \ -}) -#else -#define vshrd_n_u64(__p0, __p1) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ - __ret; \ -}) -#else -#define vshrd_n_s64(__p0, __p1) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u32(__p0_242, __p1_242, __p2_242) __extension__ ({ \ - uint16x4_t __s0_242 = __p0_242; \ - uint32x4_t __s1_242 = __p1_242; \ - uint16x8_t __ret_242; \ - __ret_242 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_242), (uint16x4_t)(vshrn_n_u32(__s1_242, __p2_242)))); \ +#define vshrn_high_n_s32(__p0_242, __p1_242, __p2_242) __extension__ ({ \ + int16x4_t __s0_242 = __p0_242; \ + int32x4_t __s1_242 = __p1_242; \ + int16x8_t __ret_242; \ + __ret_242 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_242), (int16x4_t)(vshrn_n_s32(__s1_242, __p2_242)))); \ __ret_242; \ }) #else -#define vshrn_high_n_u32(__p0_243, __p1_243, __p2_243) __extension__ ({ \ - uint16x4_t __s0_243 = __p0_243; \ - uint32x4_t __s1_243 = __p1_243; \ - uint16x4_t __rev0_243; __rev0_243 = __builtin_shufflevector(__s0_243, __s0_243, 3, 2, 1, 0); \ - uint32x4_t __rev1_243; __rev1_243 = __builtin_shufflevector(__s1_243, __s1_243, 3, 2, 1, 0); \ - uint16x8_t __ret_243; \ - __ret_243 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_243), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_243, __p2_243)))); \ +#define vshrn_high_n_s32(__p0_243, __p1_243, __p2_243) __extension__ ({ \ + int16x4_t __s0_243 = __p0_243; \ + int32x4_t __s1_243 = __p1_243; \ + int16x4_t __rev0_243; __rev0_243 = __builtin_shufflevector(__s0_243, __s0_243, 3, 2, 1, 0); \ + int32x4_t __rev1_243; __rev1_243 = __builtin_shufflevector(__s1_243, __s1_243, 3, 2, 1, 0); \ + int16x8_t __ret_243; \ + __ret_243 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_243), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_243, __p2_243)))); \ __ret_243 = __builtin_shufflevector(__ret_243, __ret_243, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_243; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u64(__p0_244, __p1_244, __p2_244) __extension__ ({ \ - uint32x2_t __s0_244 = __p0_244; \ - uint64x2_t __s1_244 = __p1_244; \ - uint32x4_t __ret_244; \ - __ret_244 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_244), (uint32x2_t)(vshrn_n_u64(__s1_244, __p2_244)))); \ +#define vshrn_high_n_s64(__p0_244, __p1_244, __p2_244) __extension__ ({ \ + int32x2_t __s0_244 = __p0_244; \ + int64x2_t __s1_244 = __p1_244; \ + int32x4_t __ret_244; \ + __ret_244 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_244), (int32x2_t)(vshrn_n_s64(__s1_244, __p2_244)))); \ __ret_244; \ }) #else -#define vshrn_high_n_u64(__p0_245, __p1_245, __p2_245) __extension__ ({ \ - uint32x2_t __s0_245 = __p0_245; \ - uint64x2_t __s1_245 = __p1_245; \ - uint32x2_t __rev0_245; __rev0_245 = __builtin_shufflevector(__s0_245, __s0_245, 1, 0); \ - uint64x2_t __rev1_245; __rev1_245 = __builtin_shufflevector(__s1_245, __s1_245, 1, 0); \ - uint32x4_t __ret_245; \ - __ret_245 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_245), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_245, __p2_245)))); \ +#define vshrn_high_n_s64(__p0_245, __p1_245, __p2_245) __extension__ ({ \ + int32x2_t __s0_245 = __p0_245; \ + int64x2_t __s1_245 = __p1_245; \ + int32x2_t __rev0_245; __rev0_245 = __builtin_shufflevector(__s0_245, __s0_245, 1, 0); \ + int64x2_t __rev1_245; __rev1_245 = __builtin_shufflevector(__s1_245, __s1_245, 1, 0); \ + int32x4_t __ret_245; \ + __ret_245 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_245), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_245, __p2_245)))); \ __ret_245 = __builtin_shufflevector(__ret_245, __ret_245, 3, 2, 1, 0); \ __ret_245; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u16(__p0_246, __p1_246, __p2_246) __extension__ ({ \ - uint8x8_t __s0_246 = __p0_246; \ - uint16x8_t __s1_246 = __p1_246; \ - uint8x16_t __ret_246; \ - __ret_246 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_246), (uint8x8_t)(vshrn_n_u16(__s1_246, __p2_246)))); \ +#define vshrn_high_n_s16(__p0_246, __p1_246, __p2_246) __extension__ ({ \ + int8x8_t __s0_246 = __p0_246; \ + int16x8_t __s1_246 = __p1_246; \ + int8x16_t __ret_246; \ + __ret_246 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_246), (int8x8_t)(vshrn_n_s16(__s1_246, __p2_246)))); \ __ret_246; \ }) #else -#define vshrn_high_n_u16(__p0_247, __p1_247, __p2_247) __extension__ ({ \ - uint8x8_t __s0_247 = __p0_247; \ - uint16x8_t __s1_247 = __p1_247; \ - uint8x8_t __rev0_247; __rev0_247 = __builtin_shufflevector(__s0_247, __s0_247, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_247; __rev1_247 = __builtin_shufflevector(__s1_247, __s1_247, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_247; \ - __ret_247 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_247), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_247, __p2_247)))); \ +#define vshrn_high_n_s16(__p0_247, __p1_247, __p2_247) __extension__ ({ \ + int8x8_t __s0_247 = __p0_247; \ + int16x8_t __s1_247 = __p1_247; \ + int8x8_t __rev0_247; __rev0_247 = __builtin_shufflevector(__s0_247, __s0_247, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_247; __rev1_247 = __builtin_shufflevector(__s1_247, __s1_247, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_247; \ + __ret_247 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_247), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_247, __p2_247)))); \ __ret_247 = __builtin_shufflevector(__ret_247, __ret_247, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_247; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s32(__p0_248, __p1_248, __p2_248) __extension__ ({ \ - int16x4_t __s0_248 = __p0_248; \ - int32x4_t __s1_248 = __p1_248; \ - int16x8_t __ret_248; \ - __ret_248 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_248), (int16x4_t)(vshrn_n_s32(__s1_248, __p2_248)))); \ - __ret_248; \ -}) -#else -#define vshrn_high_n_s32(__p0_249, __p1_249, __p2_249) __extension__ ({ \ - int16x4_t __s0_249 = __p0_249; \ - int32x4_t __s1_249 = __p1_249; \ - int16x4_t __rev0_249; __rev0_249 = __builtin_shufflevector(__s0_249, __s0_249, 3, 2, 1, 0); \ - int32x4_t __rev1_249; __rev1_249 = __builtin_shufflevector(__s1_249, __s1_249, 3, 2, 1, 0); \ - int16x8_t __ret_249; \ - __ret_249 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_249), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_249, __p2_249)))); \ - __ret_249 = __builtin_shufflevector(__ret_249, __ret_249, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_249; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s64(__p0_250, __p1_250, __p2_250) __extension__ ({ \ - int32x2_t __s0_250 = __p0_250; \ - int64x2_t __s1_250 = __p1_250; \ - int32x4_t __ret_250; \ - __ret_250 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_250), (int32x2_t)(vshrn_n_s64(__s1_250, __p2_250)))); \ - __ret_250; \ -}) -#else -#define vshrn_high_n_s64(__p0_251, __p1_251, __p2_251) __extension__ ({ \ - int32x2_t __s0_251 = __p0_251; \ - int64x2_t __s1_251 = __p1_251; \ - int32x2_t __rev0_251; __rev0_251 = __builtin_shufflevector(__s0_251, __s0_251, 1, 0); \ - int64x2_t __rev1_251; __rev1_251 = __builtin_shufflevector(__s1_251, __s1_251, 1, 0); \ - int32x4_t __ret_251; \ - __ret_251 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_251), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_251, __p2_251)))); \ - __ret_251 = __builtin_shufflevector(__ret_251, __ret_251, 3, 2, 1, 0); \ - __ret_251; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s16(__p0_252, __p1_252, __p2_252) __extension__ ({ \ - int8x8_t __s0_252 = __p0_252; \ - int16x8_t __s1_252 = __p1_252; \ - int8x16_t __ret_252; \ - __ret_252 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_252), (int8x8_t)(vshrn_n_s16(__s1_252, __p2_252)))); \ - __ret_252; \ -}) -#else -#define vshrn_high_n_s16(__p0_253, __p1_253, __p2_253) __extension__ ({ \ - int8x8_t __s0_253 = __p0_253; \ - int16x8_t __s1_253 = __p1_253; \ - int8x8_t __rev0_253; __rev0_253 = __builtin_shufflevector(__s0_253, __s0_253, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_253; __rev1_253 = __builtin_shufflevector(__s1_253, __s1_253, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_253; \ - __ret_253 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_253), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_253, __p2_253)))); \ - __ret_253 = __builtin_shufflevector(__ret_253, __ret_253, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_253; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vslid_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ @@ -66225,17 +56160,6 @@ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vslid_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ @@ -66243,17 +56167,6 @@ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ @@ -66261,16 +56174,6 @@ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) -#else -#define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsliq_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s0 = __p0; \ @@ -66292,62 +56195,26 @@ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { }) #endif -#ifdef __LITTLE_ENDIAN__ __ai uint8_t vsqaddb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); return __ret; } -#else -__ai uint8_t vsqaddb_u8(uint8_t __p0, int8_t __p1) { - uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint32_t vsqadds_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); return __ret; } -#else -__ai uint32_t vsqadds_u32(uint32_t __p0, int32_t __p1) { - uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vsqaddd_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vsqaddd_u64(uint64_t __p0, int64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint16_t vsqaddh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); return __ret; } -#else -__ai uint16_t vsqaddh_u16(uint16_t __p0, int16_t __p1) { - uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; @@ -66450,20 +56317,11 @@ __ai uint32x2_t vsqadd_u32(uint32x2_t __p0, int32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vsqadd_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x4_t vsqadd_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; @@ -66513,20 +56371,11 @@ __ai float32x4_t vsqrtq_f32(float32x4_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vsqrt_f64(float64x1_t __p0) { float64x1_t __ret; __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); return __ret; } -#else -__ai float64x1_t vsqrt_f64(float64x1_t __p0) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { float32x2_t __ret; @@ -66543,7 +56392,6 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ #define vsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ @@ -66551,17 +56399,6 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ @@ -66569,17 +56406,6 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ @@ -66587,17 +56413,6 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64_t __s0 = __p0; \ - uint64_t __s1 = __p1; \ - uint64_t __ret; \ - __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ @@ -66605,17 +56420,6 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ __ret; \ }) -#else -#define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ - int64_t __s0 = __p0; \ - int64_t __s1 = __p1; \ - int64_t __ret; \ - __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ @@ -66623,16 +56427,6 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ __ret; \ }) -#else -#define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s0 = __p0; \ - poly64x1_t __s1 = __p1; \ - poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ - __ret; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vsriq_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s0 = __p0; \ @@ -66654,18 +56448,10 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ }) -#else -#define vst1_p64(__p0, __p1) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1q_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __s1 = __p1; \ @@ -66692,30 +56478,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s1 = __p1; \ __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ }) -#else -#define vst1_f64(__p0, __p1) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ }) -#else -#define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ @@ -66742,30 +56512,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s1 = __p1; \ __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ }) -#else -#define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst1_p64_x2(__p0, __p1) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ }) -#else -#define vst1_p64_x2(__p0, __p1) __extension__ ({ \ - poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ @@ -66784,7 +56538,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, __s1.val[0], __s1.val[1], 42); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 42); \ }) #else #define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ @@ -66792,34 +56546,18 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, __rev1.val[0], __rev1.val[1], 42); \ + __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 42); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_f64_x2(__p0, __p1) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 10); \ + __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 10); \ }) -#else -#define vst1_f64_x2(__p0, __p1) __extension__ ({ \ - float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, __s1.val[0], __s1.val[1], 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst1_p64_x3(__p0, __p1) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ }) -#else -#define vst1_p64_x3(__p0, __p1) __extension__ ({ \ - poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ @@ -66839,7 +56577,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 42); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 42); \ }) #else #define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ @@ -66848,34 +56586,18 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 42); \ + __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 42); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_f64_x3(__p0, __p1) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ + __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 10); \ }) -#else -#define vst1_f64_x3(__p0, __p1) __extension__ ({ \ - float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst1_p64_x4(__p0, __p1) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ }) -#else -#define vst1_p64_x4(__p0, __p1) __extension__ ({ \ - poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ @@ -66896,7 +56618,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 42); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 42); \ }) #else #define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ @@ -66906,34 +56628,18 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 42); \ + __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 42); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst1_f64_x4(__p0, __p1) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ + __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 10); \ }) -#else -#define vst1_f64_x4(__p0, __p1) __extension__ ({ \ - float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst2_p64(__p0, __p1) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ }) -#else -#define vst2_p64(__p0, __p1) __extension__ ({ \ - poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2q_p64(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ @@ -66967,7 +56673,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst2q_f64(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 42); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 42); \ }) #else #define vst2q_f64(__p0, __p1) __extension__ ({ \ @@ -66975,14 +56681,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 42); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s64(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, __s1.val[0], __s1.val[1], 35); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 35); \ }) #else #define vst2q_s64(__p0, __p1) __extension__ ({ \ @@ -66990,34 +56696,18 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { int64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_v(__p0, __rev1.val[0], __rev1.val[1], 35); \ + __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 35); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst2_f64(__p0, __p1) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 10); \ + __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 10); \ }) -#else -#define vst2_f64(__p0, __p1) __extension__ ({ \ - float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, __s1.val[0], __s1.val[1], 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ }) -#else -#define vst2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ @@ -67096,7 +56786,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 42); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 42); \ }) #else #define vst2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ @@ -67104,14 +56794,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { float64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 42); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 35); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 35); \ }) #else #define vst2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ @@ -67119,58 +56809,26 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { int64x2x2_t __rev1; \ __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __p2, 35); \ + __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 35); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ }) -#else -#define vst2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 10); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 10); \ }) -#else -#define vst2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 3); \ + __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 3); \ }) -#else -#define vst2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, __s1.val[0], __s1.val[1], __p2, 3); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst3_p64(__p0, __p1) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ }) -#else -#define vst3_p64(__p0, __p1) __extension__ ({ \ - poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3q_p64(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ @@ -67206,7 +56864,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst3q_f64(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 42); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 42); \ }) #else #define vst3q_f64(__p0, __p1) __extension__ ({ \ @@ -67215,14 +56873,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 42); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s64(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 35); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 35); \ }) #else #define vst3q_s64(__p0, __p1) __extension__ ({ \ @@ -67231,34 +56889,18 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], 35); \ + __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 35); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst3_f64(__p0, __p1) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ + __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 10); \ }) -#else -#define vst3_f64(__p0, __p1) __extension__ ({ \ - float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ }) -#else -#define vst3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ @@ -67342,7 +56984,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 42); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 42); \ }) #else #define vst3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ @@ -67351,14 +56993,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 42); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 35); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 35); \ }) #else #define vst3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ @@ -67367,58 +57009,26 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __p2, 35); \ + __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 35); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ }) -#else -#define vst3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 10); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 10); \ }) -#else -#define vst3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 3); \ + __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 3); \ }) -#else -#define vst3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __p2, 3); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst4_p64(__p0, __p1) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ }) -#else -#define vst4_p64(__p0, __p1) __extension__ ({ \ - poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4q_p64(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ @@ -67456,7 +57066,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst4q_f64(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 42); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 42); \ }) #else #define vst4q_f64(__p0, __p1) __extension__ ({ \ @@ -67466,14 +57076,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 42); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s64(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 35); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 35); \ }) #else #define vst4q_s64(__p0, __p1) __extension__ ({ \ @@ -67483,34 +57093,18 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], 35); \ + __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 35); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst4_f64(__p0, __p1) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ + __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 10); \ }) -#else -#define vst4_f64(__p0, __p1) __extension__ ({ \ - float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ }) -#else -#define vst4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ - poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ @@ -67599,7 +57193,7 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 42); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 42); \ }) #else #define vst4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ @@ -67609,14 +57203,14 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 42); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 35); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 35); \ }) #else #define vst4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ @@ -67626,86 +57220,36 @@ __ai float32x2_t vsqrt_f32(float32x2_t __p0) { __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, __rev1.val[0], __rev1.val[1], __rev1.val[2], __rev1.val[3], __p2, 35); \ + __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 35); \ }) #endif -#ifdef __LITTLE_ENDIAN__ #define vst4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ }) -#else -#define vst4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ - uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 10); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 10); \ }) -#else -#define vst4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 10); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vst4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 3); \ + __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 3); \ }) -#else -#define vst4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ - int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, __s1.val[0], __s1.val[1], __s1.val[2], __s1.val[3], __p2, 3); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ #define vstrq_p128(__p0, __p1) __extension__ ({ \ poly128_t __s1 = __p1; \ __builtin_neon_vstrq_p128(__p0, __s1); \ }) -#else -#define vstrq_p128(__p0, __p1) __extension__ ({ \ - poly128_t __s1 = __p1; \ - __builtin_neon_vstrq_p128(__p0, __s1); \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vsubd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vsubd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vsubd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vsubd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vsubd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vsubd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vsubd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vsubd_s64(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai float64x2_t vsubq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; @@ -67723,20 +57267,11 @@ __ai float64x2_t vsubq_f64(float64x2_t __p0, float64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai float64x1_t vsub_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; __ret = __p0 - __p1; return __ret; } -#else -__ai float64x1_t vsub_f64(float64x1_t __p0, float64x1_t __p1) { - float64x1_t __ret; - __ret = __p0 - __p1; - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; @@ -68797,20 +58332,11 @@ __ai int16x4_t vtrn2_s16(int16x4_t __p0, int16x4_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vtst_p64(poly64x1_t __p0, poly64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vtst_p64(poly64x1_t __p0, poly64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint64x2_t vtstq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; @@ -68862,118 +58388,46 @@ __ai uint64x2_t vtstq_s64(int64x2_t __p0, int64x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vtst_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vtst_u64(uint64x1_t __p0, uint64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64x1_t vtst_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } -#else -__ai uint64x1_t vtst_s64(int64x1_t __p0, int64x1_t __p1) { - uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai uint64_t vtstd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vtstd_u64(__p0, __p1); return __ret; } -#else -__ai uint64_t vtstd_u64(uint64_t __p0, uint64_t __p1) { - uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vtstd_u64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vtstd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vtstd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vtstd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vtstd_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int8_t vuqaddb_s8(int8_t __p0, uint8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vuqaddb_s8(__p0, __p1); return __ret; } -#else -__ai int8_t vuqaddb_s8(int8_t __p0, uint8_t __p1) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vuqaddb_s8(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vuqadds_s32(int32_t __p0, uint32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vuqadds_s32(__p0, __p1); return __ret; } -#else -__ai int32_t vuqadds_s32(int32_t __p0, uint32_t __p1) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vuqadds_s32(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int64_t vuqaddd_s64(int64_t __p0, uint64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vuqaddd_s64(__p0, __p1); return __ret; } -#else -__ai int64_t vuqaddd_s64(int64_t __p0, uint64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vuqaddd_s64(__p0, __p1); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vuqaddh_s16(int16_t __p0, uint16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vuqaddh_s16(__p0, __p1); return __ret; } -#else -__ai int16_t vuqaddh_s16(int16_t __p0, uint16_t __p1) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vuqaddh_s16(__p0, __p1); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; @@ -69076,20 +58530,11 @@ __ai int32x2_t vuqadd_s32(int32x2_t __p0, uint32x2_t __p1) { } #endif -#ifdef __LITTLE_ENDIAN__ __ai int64x1_t vuqadd_s64(int64x1_t __p0, uint64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } -#else -__ai int64x1_t vuqadd_s64(int64x1_t __p0, uint64x1_t __p1) { - int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { int16x4_t __ret; @@ -71157,60 +60602,60 @@ __ai int32x4_t vaddw_s16(int32x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vget_lane_f16(__p0_254, __p1_254) __extension__ ({ \ - float16x4_t __s0_254 = __p0_254; \ - float16_t __ret_254; \ -float16x4_t __reint_254 = __s0_254; \ -int16_t __reint1_254 = vget_lane_s16(*(int16x4_t *) &__reint_254, __p1_254); \ - __ret_254 = *(float16_t *) &__reint1_254; \ - __ret_254; \ +#define vget_lane_f16(__p0_248, __p1_248) __extension__ ({ \ + float16x4_t __s0_248 = __p0_248; \ + float16_t __ret_248; \ +float16x4_t __reint_248 = __s0_248; \ +int16_t __reint1_248 = vget_lane_s16(*(int16x4_t *) &__reint_248, __p1_248); \ + __ret_248 = *(float16_t *) &__reint1_248; \ + __ret_248; \ }) #else -#define vget_lane_f16(__p0_255, __p1_255) __extension__ ({ \ - float16x4_t __s0_255 = __p0_255; \ - float16x4_t __rev0_255; __rev0_255 = __builtin_shufflevector(__s0_255, __s0_255, 3, 2, 1, 0); \ - float16_t __ret_255; \ -float16x4_t __reint_255 = __rev0_255; \ -int16_t __reint1_255 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_255, __p1_255); \ - __ret_255 = *(float16_t *) &__reint1_255; \ - __ret_255; \ +#define vget_lane_f16(__p0_249, __p1_249) __extension__ ({ \ + float16x4_t __s0_249 = __p0_249; \ + float16x4_t __rev0_249; __rev0_249 = __builtin_shufflevector(__s0_249, __s0_249, 3, 2, 1, 0); \ + float16_t __ret_249; \ +float16x4_t __reint_249 = __rev0_249; \ +int16_t __reint1_249 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_249, __p1_249); \ + __ret_249 = *(float16_t *) &__reint1_249; \ + __ret_249; \ }) -#define __noswap_vget_lane_f16(__p0_256, __p1_256) __extension__ ({ \ - float16x4_t __s0_256 = __p0_256; \ - float16_t __ret_256; \ -float16x4_t __reint_256 = __s0_256; \ -int16_t __reint1_256 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_256, __p1_256); \ - __ret_256 = *(float16_t *) &__reint1_256; \ - __ret_256; \ +#define __noswap_vget_lane_f16(__p0_250, __p1_250) __extension__ ({ \ + float16x4_t __s0_250 = __p0_250; \ + float16_t __ret_250; \ +float16x4_t __reint_250 = __s0_250; \ +int16_t __reint1_250 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_250, __p1_250); \ + __ret_250 = *(float16_t *) &__reint1_250; \ + __ret_250; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vgetq_lane_f16(__p0_257, __p1_257) __extension__ ({ \ - float16x8_t __s0_257 = __p0_257; \ - float16_t __ret_257; \ -float16x8_t __reint_257 = __s0_257; \ -int16_t __reint1_257 = vgetq_lane_s16(*(int16x8_t *) &__reint_257, __p1_257); \ - __ret_257 = *(float16_t *) &__reint1_257; \ - __ret_257; \ +#define vgetq_lane_f16(__p0_251, __p1_251) __extension__ ({ \ + float16x8_t __s0_251 = __p0_251; \ + float16_t __ret_251; \ +float16x8_t __reint_251 = __s0_251; \ +int16_t __reint1_251 = vgetq_lane_s16(*(int16x8_t *) &__reint_251, __p1_251); \ + __ret_251 = *(float16_t *) &__reint1_251; \ + __ret_251; \ }) #else -#define vgetq_lane_f16(__p0_258, __p1_258) __extension__ ({ \ - float16x8_t __s0_258 = __p0_258; \ - float16x8_t __rev0_258; __rev0_258 = __builtin_shufflevector(__s0_258, __s0_258, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret_258; \ -float16x8_t __reint_258 = __rev0_258; \ -int16_t __reint1_258 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_258, __p1_258); \ - __ret_258 = *(float16_t *) &__reint1_258; \ - __ret_258; \ +#define vgetq_lane_f16(__p0_252, __p1_252) __extension__ ({ \ + float16x8_t __s0_252 = __p0_252; \ + float16x8_t __rev0_252; __rev0_252 = __builtin_shufflevector(__s0_252, __s0_252, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret_252; \ +float16x8_t __reint_252 = __rev0_252; \ +int16_t __reint1_252 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_252, __p1_252); \ + __ret_252 = *(float16_t *) &__reint1_252; \ + __ret_252; \ }) -#define __noswap_vgetq_lane_f16(__p0_259, __p1_259) __extension__ ({ \ - float16x8_t __s0_259 = __p0_259; \ - float16_t __ret_259; \ -float16x8_t __reint_259 = __s0_259; \ -int16_t __reint1_259 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_259, __p1_259); \ - __ret_259 = *(float16_t *) &__reint1_259; \ - __ret_259; \ +#define __noswap_vgetq_lane_f16(__p0_253, __p1_253) __extension__ ({ \ + float16x8_t __s0_253 = __p0_253; \ + float16_t __ret_253; \ +float16x8_t __reint_253 = __s0_253; \ +int16_t __reint1_253 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_253, __p1_253); \ + __ret_253 = *(float16_t *) &__reint1_253; \ + __ret_253; \ }) #endif @@ -71859,708 +61304,672 @@ __ai int32x4_t __noswap_vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2 #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_f16(__p0_260, __p1_260, __p2_260) __extension__ ({ \ - float16_t __s0_260 = __p0_260; \ - float16x4_t __s1_260 = __p1_260; \ - float16x4_t __ret_260; \ -float16_t __reint_260 = __s0_260; \ -float16x4_t __reint1_260 = __s1_260; \ -int16x4_t __reint2_260 = vset_lane_s16(*(int16_t *) &__reint_260, *(int16x4_t *) &__reint1_260, __p2_260); \ - __ret_260 = *(float16x4_t *) &__reint2_260; \ - __ret_260; \ +#define vset_lane_f16(__p0_254, __p1_254, __p2_254) __extension__ ({ \ + float16_t __s0_254 = __p0_254; \ + float16x4_t __s1_254 = __p1_254; \ + float16x4_t __ret_254; \ +float16_t __reint_254 = __s0_254; \ +float16x4_t __reint1_254 = __s1_254; \ +int16x4_t __reint2_254 = vset_lane_s16(*(int16_t *) &__reint_254, *(int16x4_t *) &__reint1_254, __p2_254); \ + __ret_254 = *(float16x4_t *) &__reint2_254; \ + __ret_254; \ }) #else -#define vset_lane_f16(__p0_261, __p1_261, __p2_261) __extension__ ({ \ - float16_t __s0_261 = __p0_261; \ - float16x4_t __s1_261 = __p1_261; \ - float16x4_t __rev1_261; __rev1_261 = __builtin_shufflevector(__s1_261, __s1_261, 3, 2, 1, 0); \ - float16x4_t __ret_261; \ -float16_t __reint_261 = __s0_261; \ -float16x4_t __reint1_261 = __rev1_261; \ -int16x4_t __reint2_261 = __noswap_vset_lane_s16(*(int16_t *) &__reint_261, *(int16x4_t *) &__reint1_261, __p2_261); \ - __ret_261 = *(float16x4_t *) &__reint2_261; \ - __ret_261 = __builtin_shufflevector(__ret_261, __ret_261, 3, 2, 1, 0); \ - __ret_261; \ +#define vset_lane_f16(__p0_255, __p1_255, __p2_255) __extension__ ({ \ + float16_t __s0_255 = __p0_255; \ + float16x4_t __s1_255 = __p1_255; \ + float16x4_t __rev1_255; __rev1_255 = __builtin_shufflevector(__s1_255, __s1_255, 3, 2, 1, 0); \ + float16x4_t __ret_255; \ +float16_t __reint_255 = __s0_255; \ +float16x4_t __reint1_255 = __rev1_255; \ +int16x4_t __reint2_255 = __noswap_vset_lane_s16(*(int16_t *) &__reint_255, *(int16x4_t *) &__reint1_255, __p2_255); \ + __ret_255 = *(float16x4_t *) &__reint2_255; \ + __ret_255 = __builtin_shufflevector(__ret_255, __ret_255, 3, 2, 1, 0); \ + __ret_255; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_f16(__p0_262, __p1_262, __p2_262) __extension__ ({ \ - float16_t __s0_262 = __p0_262; \ - float16x8_t __s1_262 = __p1_262; \ - float16x8_t __ret_262; \ -float16_t __reint_262 = __s0_262; \ -float16x8_t __reint1_262 = __s1_262; \ -int16x8_t __reint2_262 = vsetq_lane_s16(*(int16_t *) &__reint_262, *(int16x8_t *) &__reint1_262, __p2_262); \ - __ret_262 = *(float16x8_t *) &__reint2_262; \ - __ret_262; \ +#define vsetq_lane_f16(__p0_256, __p1_256, __p2_256) __extension__ ({ \ + float16_t __s0_256 = __p0_256; \ + float16x8_t __s1_256 = __p1_256; \ + float16x8_t __ret_256; \ +float16_t __reint_256 = __s0_256; \ +float16x8_t __reint1_256 = __s1_256; \ +int16x8_t __reint2_256 = vsetq_lane_s16(*(int16_t *) &__reint_256, *(int16x8_t *) &__reint1_256, __p2_256); \ + __ret_256 = *(float16x8_t *) &__reint2_256; \ + __ret_256; \ }) #else -#define vsetq_lane_f16(__p0_263, __p1_263, __p2_263) __extension__ ({ \ - float16_t __s0_263 = __p0_263; \ - float16x8_t __s1_263 = __p1_263; \ - float16x8_t __rev1_263; __rev1_263 = __builtin_shufflevector(__s1_263, __s1_263, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret_263; \ -float16_t __reint_263 = __s0_263; \ -float16x8_t __reint1_263 = __rev1_263; \ -int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(int16x8_t *) &__reint1_263, __p2_263); \ - __ret_263 = *(float16x8_t *) &__reint2_263; \ - __ret_263 = __builtin_shufflevector(__ret_263, __ret_263, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_263; \ +#define vsetq_lane_f16(__p0_257, __p1_257, __p2_257) __extension__ ({ \ + float16_t __s0_257 = __p0_257; \ + float16x8_t __s1_257 = __p1_257; \ + float16x8_t __rev1_257; __rev1_257 = __builtin_shufflevector(__s1_257, __s1_257, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_257; \ +float16_t __reint_257 = __s0_257; \ +float16x8_t __reint1_257 = __rev1_257; \ +int16x8_t __reint2_257 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_257, *(int16x8_t *) &__reint1_257, __p2_257); \ + __ret_257 = *(float16x8_t *) &__reint2_257; \ + __ret_257 = __builtin_shufflevector(__ret_257, __ret_257, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_257; \ }) #endif #if defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_high_f16(__p0_264, __p1_264, __p2_264, __p3_264) __extension__ ({ \ - float32x4_t __s0_264 = __p0_264; \ - float16x8_t __s1_264 = __p1_264; \ +#define vfmlalq_lane_high_f16(__p0_258, __p1_258, __p2_258, __p3_258) __extension__ ({ \ + float32x4_t __s0_258 = __p0_258; \ + float16x8_t __s1_258 = __p1_258; \ + float16x4_t __s2_258 = __p2_258; \ + float32x4_t __ret_258; \ + __ret_258 = vfmlalq_high_f16(__s0_258, __s1_258, (float16x8_t) {vget_lane_f16(__s2_258, __p3_258), vget_lane_f16(__s2_258, __p3_258), vget_lane_f16(__s2_258, __p3_258), vget_lane_f16(__s2_258, __p3_258), vget_lane_f16(__s2_258, __p3_258), vget_lane_f16(__s2_258, __p3_258), vget_lane_f16(__s2_258, __p3_258), vget_lane_f16(__s2_258, __p3_258)}); \ + __ret_258; \ +}) +#else +#define vfmlalq_lane_high_f16(__p0_259, __p1_259, __p2_259, __p3_259) __extension__ ({ \ + float32x4_t __s0_259 = __p0_259; \ + float16x8_t __s1_259 = __p1_259; \ + float16x4_t __s2_259 = __p2_259; \ + float32x4_t __rev0_259; __rev0_259 = __builtin_shufflevector(__s0_259, __s0_259, 3, 2, 1, 0); \ + float16x8_t __rev1_259; __rev1_259 = __builtin_shufflevector(__s1_259, __s1_259, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_259; __rev2_259 = __builtin_shufflevector(__s2_259, __s2_259, 3, 2, 1, 0); \ + float32x4_t __ret_259; \ + __ret_259 = __noswap_vfmlalq_high_f16(__rev0_259, __rev1_259, (float16x8_t) {__noswap_vget_lane_f16(__rev2_259, __p3_259), __noswap_vget_lane_f16(__rev2_259, __p3_259), __noswap_vget_lane_f16(__rev2_259, __p3_259), __noswap_vget_lane_f16(__rev2_259, __p3_259), __noswap_vget_lane_f16(__rev2_259, __p3_259), __noswap_vget_lane_f16(__rev2_259, __p3_259), __noswap_vget_lane_f16(__rev2_259, __p3_259), __noswap_vget_lane_f16(__rev2_259, __p3_259)}); \ + __ret_259 = __builtin_shufflevector(__ret_259, __ret_259, 3, 2, 1, 0); \ + __ret_259; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlal_lane_high_f16(__p0_260, __p1_260, __p2_260, __p3_260) __extension__ ({ \ + float32x2_t __s0_260 = __p0_260; \ + float16x4_t __s1_260 = __p1_260; \ + float16x4_t __s2_260 = __p2_260; \ + float32x2_t __ret_260; \ + __ret_260 = vfmlal_high_f16(__s0_260, __s1_260, (float16x4_t) {vget_lane_f16(__s2_260, __p3_260), vget_lane_f16(__s2_260, __p3_260), vget_lane_f16(__s2_260, __p3_260), vget_lane_f16(__s2_260, __p3_260)}); \ + __ret_260; \ +}) +#else +#define vfmlal_lane_high_f16(__p0_261, __p1_261, __p2_261, __p3_261) __extension__ ({ \ + float32x2_t __s0_261 = __p0_261; \ + float16x4_t __s1_261 = __p1_261; \ + float16x4_t __s2_261 = __p2_261; \ + float32x2_t __rev0_261; __rev0_261 = __builtin_shufflevector(__s0_261, __s0_261, 1, 0); \ + float16x4_t __rev1_261; __rev1_261 = __builtin_shufflevector(__s1_261, __s1_261, 3, 2, 1, 0); \ + float16x4_t __rev2_261; __rev2_261 = __builtin_shufflevector(__s2_261, __s2_261, 3, 2, 1, 0); \ + float32x2_t __ret_261; \ + __ret_261 = __noswap_vfmlal_high_f16(__rev0_261, __rev1_261, (float16x4_t) {__noswap_vget_lane_f16(__rev2_261, __p3_261), __noswap_vget_lane_f16(__rev2_261, __p3_261), __noswap_vget_lane_f16(__rev2_261, __p3_261), __noswap_vget_lane_f16(__rev2_261, __p3_261)}); \ + __ret_261 = __builtin_shufflevector(__ret_261, __ret_261, 1, 0); \ + __ret_261; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlalq_lane_low_f16(__p0_262, __p1_262, __p2_262, __p3_262) __extension__ ({ \ + float32x4_t __s0_262 = __p0_262; \ + float16x8_t __s1_262 = __p1_262; \ + float16x4_t __s2_262 = __p2_262; \ + float32x4_t __ret_262; \ + __ret_262 = vfmlalq_low_f16(__s0_262, __s1_262, (float16x8_t) {vget_lane_f16(__s2_262, __p3_262), vget_lane_f16(__s2_262, __p3_262), vget_lane_f16(__s2_262, __p3_262), vget_lane_f16(__s2_262, __p3_262), vget_lane_f16(__s2_262, __p3_262), vget_lane_f16(__s2_262, __p3_262), vget_lane_f16(__s2_262, __p3_262), vget_lane_f16(__s2_262, __p3_262)}); \ + __ret_262; \ +}) +#else +#define vfmlalq_lane_low_f16(__p0_263, __p1_263, __p2_263, __p3_263) __extension__ ({ \ + float32x4_t __s0_263 = __p0_263; \ + float16x8_t __s1_263 = __p1_263; \ + float16x4_t __s2_263 = __p2_263; \ + float32x4_t __rev0_263; __rev0_263 = __builtin_shufflevector(__s0_263, __s0_263, 3, 2, 1, 0); \ + float16x8_t __rev1_263; __rev1_263 = __builtin_shufflevector(__s1_263, __s1_263, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_263; __rev2_263 = __builtin_shufflevector(__s2_263, __s2_263, 3, 2, 1, 0); \ + float32x4_t __ret_263; \ + __ret_263 = __noswap_vfmlalq_low_f16(__rev0_263, __rev1_263, (float16x8_t) {__noswap_vget_lane_f16(__rev2_263, __p3_263), __noswap_vget_lane_f16(__rev2_263, __p3_263), __noswap_vget_lane_f16(__rev2_263, __p3_263), __noswap_vget_lane_f16(__rev2_263, __p3_263), __noswap_vget_lane_f16(__rev2_263, __p3_263), __noswap_vget_lane_f16(__rev2_263, __p3_263), __noswap_vget_lane_f16(__rev2_263, __p3_263), __noswap_vget_lane_f16(__rev2_263, __p3_263)}); \ + __ret_263 = __builtin_shufflevector(__ret_263, __ret_263, 3, 2, 1, 0); \ + __ret_263; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlal_lane_low_f16(__p0_264, __p1_264, __p2_264, __p3_264) __extension__ ({ \ + float32x2_t __s0_264 = __p0_264; \ + float16x4_t __s1_264 = __p1_264; \ float16x4_t __s2_264 = __p2_264; \ - float32x4_t __ret_264; \ - __ret_264 = vfmlalq_high_f16(__s0_264, __s1_264, (float16x8_t) {vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264)}); \ + float32x2_t __ret_264; \ + __ret_264 = vfmlal_low_f16(__s0_264, __s1_264, (float16x4_t) {vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264)}); \ __ret_264; \ }) #else -#define vfmlalq_lane_high_f16(__p0_265, __p1_265, __p2_265, __p3_265) __extension__ ({ \ - float32x4_t __s0_265 = __p0_265; \ - float16x8_t __s1_265 = __p1_265; \ +#define vfmlal_lane_low_f16(__p0_265, __p1_265, __p2_265, __p3_265) __extension__ ({ \ + float32x2_t __s0_265 = __p0_265; \ + float16x4_t __s1_265 = __p1_265; \ float16x4_t __s2_265 = __p2_265; \ - float32x4_t __rev0_265; __rev0_265 = __builtin_shufflevector(__s0_265, __s0_265, 3, 2, 1, 0); \ - float16x8_t __rev1_265; __rev1_265 = __builtin_shufflevector(__s1_265, __s1_265, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __rev0_265; __rev0_265 = __builtin_shufflevector(__s0_265, __s0_265, 1, 0); \ + float16x4_t __rev1_265; __rev1_265 = __builtin_shufflevector(__s1_265, __s1_265, 3, 2, 1, 0); \ float16x4_t __rev2_265; __rev2_265 = __builtin_shufflevector(__s2_265, __s2_265, 3, 2, 1, 0); \ - float32x4_t __ret_265; \ - __ret_265 = __noswap_vfmlalq_high_f16(__rev0_265, __rev1_265, (float16x8_t) {__noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265)}); \ - __ret_265 = __builtin_shufflevector(__ret_265, __ret_265, 3, 2, 1, 0); \ + float32x2_t __ret_265; \ + __ret_265 = __noswap_vfmlal_low_f16(__rev0_265, __rev1_265, (float16x4_t) {__noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265)}); \ + __ret_265 = __builtin_shufflevector(__ret_265, __ret_265, 1, 0); \ __ret_265; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_high_f16(__p0_266, __p1_266, __p2_266, __p3_266) __extension__ ({ \ - float32x2_t __s0_266 = __p0_266; \ - float16x4_t __s1_266 = __p1_266; \ - float16x4_t __s2_266 = __p2_266; \ - float32x2_t __ret_266; \ - __ret_266 = vfmlal_high_f16(__s0_266, __s1_266, (float16x4_t) {vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266)}); \ +#define vfmlalq_laneq_high_f16(__p0_266, __p1_266, __p2_266, __p3_266) __extension__ ({ \ + float32x4_t __s0_266 = __p0_266; \ + float16x8_t __s1_266 = __p1_266; \ + float16x8_t __s2_266 = __p2_266; \ + float32x4_t __ret_266; \ + __ret_266 = vfmlalq_high_f16(__s0_266, __s1_266, (float16x8_t) {vgetq_lane_f16(__s2_266, __p3_266), vgetq_lane_f16(__s2_266, __p3_266), vgetq_lane_f16(__s2_266, __p3_266), vgetq_lane_f16(__s2_266, __p3_266), vgetq_lane_f16(__s2_266, __p3_266), vgetq_lane_f16(__s2_266, __p3_266), vgetq_lane_f16(__s2_266, __p3_266), vgetq_lane_f16(__s2_266, __p3_266)}); \ __ret_266; \ }) #else -#define vfmlal_lane_high_f16(__p0_267, __p1_267, __p2_267, __p3_267) __extension__ ({ \ - float32x2_t __s0_267 = __p0_267; \ - float16x4_t __s1_267 = __p1_267; \ - float16x4_t __s2_267 = __p2_267; \ - float32x2_t __rev0_267; __rev0_267 = __builtin_shufflevector(__s0_267, __s0_267, 1, 0); \ - float16x4_t __rev1_267; __rev1_267 = __builtin_shufflevector(__s1_267, __s1_267, 3, 2, 1, 0); \ - float16x4_t __rev2_267; __rev2_267 = __builtin_shufflevector(__s2_267, __s2_267, 3, 2, 1, 0); \ - float32x2_t __ret_267; \ - __ret_267 = __noswap_vfmlal_high_f16(__rev0_267, __rev1_267, (float16x4_t) {__noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267)}); \ - __ret_267 = __builtin_shufflevector(__ret_267, __ret_267, 1, 0); \ +#define vfmlalq_laneq_high_f16(__p0_267, __p1_267, __p2_267, __p3_267) __extension__ ({ \ + float32x4_t __s0_267 = __p0_267; \ + float16x8_t __s1_267 = __p1_267; \ + float16x8_t __s2_267 = __p2_267; \ + float32x4_t __rev0_267; __rev0_267 = __builtin_shufflevector(__s0_267, __s0_267, 3, 2, 1, 0); \ + float16x8_t __rev1_267; __rev1_267 = __builtin_shufflevector(__s1_267, __s1_267, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_267; __rev2_267 = __builtin_shufflevector(__s2_267, __s2_267, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_267; \ + __ret_267 = __noswap_vfmlalq_high_f16(__rev0_267, __rev1_267, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_267, __p3_267), __noswap_vgetq_lane_f16(__rev2_267, __p3_267), __noswap_vgetq_lane_f16(__rev2_267, __p3_267), __noswap_vgetq_lane_f16(__rev2_267, __p3_267), __noswap_vgetq_lane_f16(__rev2_267, __p3_267), __noswap_vgetq_lane_f16(__rev2_267, __p3_267), __noswap_vgetq_lane_f16(__rev2_267, __p3_267), __noswap_vgetq_lane_f16(__rev2_267, __p3_267)}); \ + __ret_267 = __builtin_shufflevector(__ret_267, __ret_267, 3, 2, 1, 0); \ __ret_267; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_low_f16(__p0_268, __p1_268, __p2_268, __p3_268) __extension__ ({ \ - float32x4_t __s0_268 = __p0_268; \ - float16x8_t __s1_268 = __p1_268; \ - float16x4_t __s2_268 = __p2_268; \ - float32x4_t __ret_268; \ - __ret_268 = vfmlalq_low_f16(__s0_268, __s1_268, (float16x8_t) {vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268)}); \ +#define vfmlal_laneq_high_f16(__p0_268, __p1_268, __p2_268, __p3_268) __extension__ ({ \ + float32x2_t __s0_268 = __p0_268; \ + float16x4_t __s1_268 = __p1_268; \ + float16x8_t __s2_268 = __p2_268; \ + float32x2_t __ret_268; \ + __ret_268 = vfmlal_high_f16(__s0_268, __s1_268, (float16x4_t) {vgetq_lane_f16(__s2_268, __p3_268), vgetq_lane_f16(__s2_268, __p3_268), vgetq_lane_f16(__s2_268, __p3_268), vgetq_lane_f16(__s2_268, __p3_268)}); \ __ret_268; \ }) #else -#define vfmlalq_lane_low_f16(__p0_269, __p1_269, __p2_269, __p3_269) __extension__ ({ \ - float32x4_t __s0_269 = __p0_269; \ - float16x8_t __s1_269 = __p1_269; \ - float16x4_t __s2_269 = __p2_269; \ - float32x4_t __rev0_269; __rev0_269 = __builtin_shufflevector(__s0_269, __s0_269, 3, 2, 1, 0); \ - float16x8_t __rev1_269; __rev1_269 = __builtin_shufflevector(__s1_269, __s1_269, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_269; __rev2_269 = __builtin_shufflevector(__s2_269, __s2_269, 3, 2, 1, 0); \ - float32x4_t __ret_269; \ - __ret_269 = __noswap_vfmlalq_low_f16(__rev0_269, __rev1_269, (float16x8_t) {__noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269)}); \ - __ret_269 = __builtin_shufflevector(__ret_269, __ret_269, 3, 2, 1, 0); \ +#define vfmlal_laneq_high_f16(__p0_269, __p1_269, __p2_269, __p3_269) __extension__ ({ \ + float32x2_t __s0_269 = __p0_269; \ + float16x4_t __s1_269 = __p1_269; \ + float16x8_t __s2_269 = __p2_269; \ + float32x2_t __rev0_269; __rev0_269 = __builtin_shufflevector(__s0_269, __s0_269, 1, 0); \ + float16x4_t __rev1_269; __rev1_269 = __builtin_shufflevector(__s1_269, __s1_269, 3, 2, 1, 0); \ + float16x8_t __rev2_269; __rev2_269 = __builtin_shufflevector(__s2_269, __s2_269, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __ret_269; \ + __ret_269 = __noswap_vfmlal_high_f16(__rev0_269, __rev1_269, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_269, __p3_269), __noswap_vgetq_lane_f16(__rev2_269, __p3_269), __noswap_vgetq_lane_f16(__rev2_269, __p3_269), __noswap_vgetq_lane_f16(__rev2_269, __p3_269)}); \ + __ret_269 = __builtin_shufflevector(__ret_269, __ret_269, 1, 0); \ __ret_269; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_low_f16(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ - float32x2_t __s0_270 = __p0_270; \ - float16x4_t __s1_270 = __p1_270; \ - float16x4_t __s2_270 = __p2_270; \ - float32x2_t __ret_270; \ - __ret_270 = vfmlal_low_f16(__s0_270, __s1_270, (float16x4_t) {vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270)}); \ +#define vfmlalq_laneq_low_f16(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ + float32x4_t __s0_270 = __p0_270; \ + float16x8_t __s1_270 = __p1_270; \ + float16x8_t __s2_270 = __p2_270; \ + float32x4_t __ret_270; \ + __ret_270 = vfmlalq_low_f16(__s0_270, __s1_270, (float16x8_t) {vgetq_lane_f16(__s2_270, __p3_270), vgetq_lane_f16(__s2_270, __p3_270), vgetq_lane_f16(__s2_270, __p3_270), vgetq_lane_f16(__s2_270, __p3_270), vgetq_lane_f16(__s2_270, __p3_270), vgetq_lane_f16(__s2_270, __p3_270), vgetq_lane_f16(__s2_270, __p3_270), vgetq_lane_f16(__s2_270, __p3_270)}); \ __ret_270; \ }) #else -#define vfmlal_lane_low_f16(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ - float32x2_t __s0_271 = __p0_271; \ - float16x4_t __s1_271 = __p1_271; \ - float16x4_t __s2_271 = __p2_271; \ - float32x2_t __rev0_271; __rev0_271 = __builtin_shufflevector(__s0_271, __s0_271, 1, 0); \ - float16x4_t __rev1_271; __rev1_271 = __builtin_shufflevector(__s1_271, __s1_271, 3, 2, 1, 0); \ - float16x4_t __rev2_271; __rev2_271 = __builtin_shufflevector(__s2_271, __s2_271, 3, 2, 1, 0); \ - float32x2_t __ret_271; \ - __ret_271 = __noswap_vfmlal_low_f16(__rev0_271, __rev1_271, (float16x4_t) {__noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271)}); \ - __ret_271 = __builtin_shufflevector(__ret_271, __ret_271, 1, 0); \ +#define vfmlalq_laneq_low_f16(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ + float32x4_t __s0_271 = __p0_271; \ + float16x8_t __s1_271 = __p1_271; \ + float16x8_t __s2_271 = __p2_271; \ + float32x4_t __rev0_271; __rev0_271 = __builtin_shufflevector(__s0_271, __s0_271, 3, 2, 1, 0); \ + float16x8_t __rev1_271; __rev1_271 = __builtin_shufflevector(__s1_271, __s1_271, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_271; __rev2_271 = __builtin_shufflevector(__s2_271, __s2_271, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_271; \ + __ret_271 = __noswap_vfmlalq_low_f16(__rev0_271, __rev1_271, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_271, __p3_271), __noswap_vgetq_lane_f16(__rev2_271, __p3_271), __noswap_vgetq_lane_f16(__rev2_271, __p3_271), __noswap_vgetq_lane_f16(__rev2_271, __p3_271), __noswap_vgetq_lane_f16(__rev2_271, __p3_271), __noswap_vgetq_lane_f16(__rev2_271, __p3_271), __noswap_vgetq_lane_f16(__rev2_271, __p3_271), __noswap_vgetq_lane_f16(__rev2_271, __p3_271)}); \ + __ret_271 = __builtin_shufflevector(__ret_271, __ret_271, 3, 2, 1, 0); \ __ret_271; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_high_f16(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ - float32x4_t __s0_272 = __p0_272; \ - float16x8_t __s1_272 = __p1_272; \ +#define vfmlal_laneq_low_f16(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ + float32x2_t __s0_272 = __p0_272; \ + float16x4_t __s1_272 = __p1_272; \ float16x8_t __s2_272 = __p2_272; \ - float32x4_t __ret_272; \ - __ret_272 = vfmlalq_high_f16(__s0_272, __s1_272, (float16x8_t) {vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272)}); \ + float32x2_t __ret_272; \ + __ret_272 = vfmlal_low_f16(__s0_272, __s1_272, (float16x4_t) {vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272)}); \ __ret_272; \ }) #else -#define vfmlalq_laneq_high_f16(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ - float32x4_t __s0_273 = __p0_273; \ - float16x8_t __s1_273 = __p1_273; \ +#define vfmlal_laneq_low_f16(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ + float32x2_t __s0_273 = __p0_273; \ + float16x4_t __s1_273 = __p1_273; \ float16x8_t __s2_273 = __p2_273; \ - float32x4_t __rev0_273; __rev0_273 = __builtin_shufflevector(__s0_273, __s0_273, 3, 2, 1, 0); \ - float16x8_t __rev1_273; __rev1_273 = __builtin_shufflevector(__s1_273, __s1_273, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __rev0_273; __rev0_273 = __builtin_shufflevector(__s0_273, __s0_273, 1, 0); \ + float16x4_t __rev1_273; __rev1_273 = __builtin_shufflevector(__s1_273, __s1_273, 3, 2, 1, 0); \ float16x8_t __rev2_273; __rev2_273 = __builtin_shufflevector(__s2_273, __s2_273, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_273; \ - __ret_273 = __noswap_vfmlalq_high_f16(__rev0_273, __rev1_273, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273)}); \ - __ret_273 = __builtin_shufflevector(__ret_273, __ret_273, 3, 2, 1, 0); \ + float32x2_t __ret_273; \ + __ret_273 = __noswap_vfmlal_low_f16(__rev0_273, __rev1_273, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273)}); \ + __ret_273 = __builtin_shufflevector(__ret_273, __ret_273, 1, 0); \ __ret_273; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_high_f16(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ - float32x2_t __s0_274 = __p0_274; \ - float16x4_t __s1_274 = __p1_274; \ - float16x8_t __s2_274 = __p2_274; \ - float32x2_t __ret_274; \ - __ret_274 = vfmlal_high_f16(__s0_274, __s1_274, (float16x4_t) {vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274)}); \ +#define vfmlslq_lane_high_f16(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ + float32x4_t __s0_274 = __p0_274; \ + float16x8_t __s1_274 = __p1_274; \ + float16x4_t __s2_274 = __p2_274; \ + float32x4_t __ret_274; \ + __ret_274 = vfmlslq_high_f16(__s0_274, __s1_274, (float16x8_t) {vget_lane_f16(__s2_274, __p3_274), vget_lane_f16(__s2_274, __p3_274), vget_lane_f16(__s2_274, __p3_274), vget_lane_f16(__s2_274, __p3_274), vget_lane_f16(__s2_274, __p3_274), vget_lane_f16(__s2_274, __p3_274), vget_lane_f16(__s2_274, __p3_274), vget_lane_f16(__s2_274, __p3_274)}); \ __ret_274; \ }) #else -#define vfmlal_laneq_high_f16(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ - float32x2_t __s0_275 = __p0_275; \ - float16x4_t __s1_275 = __p1_275; \ - float16x8_t __s2_275 = __p2_275; \ - float32x2_t __rev0_275; __rev0_275 = __builtin_shufflevector(__s0_275, __s0_275, 1, 0); \ - float16x4_t __rev1_275; __rev1_275 = __builtin_shufflevector(__s1_275, __s1_275, 3, 2, 1, 0); \ - float16x8_t __rev2_275; __rev2_275 = __builtin_shufflevector(__s2_275, __s2_275, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_275; \ - __ret_275 = __noswap_vfmlal_high_f16(__rev0_275, __rev1_275, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275)}); \ - __ret_275 = __builtin_shufflevector(__ret_275, __ret_275, 1, 0); \ +#define vfmlslq_lane_high_f16(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ + float32x4_t __s0_275 = __p0_275; \ + float16x8_t __s1_275 = __p1_275; \ + float16x4_t __s2_275 = __p2_275; \ + float32x4_t __rev0_275; __rev0_275 = __builtin_shufflevector(__s0_275, __s0_275, 3, 2, 1, 0); \ + float16x8_t __rev1_275; __rev1_275 = __builtin_shufflevector(__s1_275, __s1_275, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_275; __rev2_275 = __builtin_shufflevector(__s2_275, __s2_275, 3, 2, 1, 0); \ + float32x4_t __ret_275; \ + __ret_275 = __noswap_vfmlslq_high_f16(__rev0_275, __rev1_275, (float16x8_t) {__noswap_vget_lane_f16(__rev2_275, __p3_275), __noswap_vget_lane_f16(__rev2_275, __p3_275), __noswap_vget_lane_f16(__rev2_275, __p3_275), __noswap_vget_lane_f16(__rev2_275, __p3_275), __noswap_vget_lane_f16(__rev2_275, __p3_275), __noswap_vget_lane_f16(__rev2_275, __p3_275), __noswap_vget_lane_f16(__rev2_275, __p3_275), __noswap_vget_lane_f16(__rev2_275, __p3_275)}); \ + __ret_275 = __builtin_shufflevector(__ret_275, __ret_275, 3, 2, 1, 0); \ __ret_275; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_low_f16(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ - float32x4_t __s0_276 = __p0_276; \ - float16x8_t __s1_276 = __p1_276; \ - float16x8_t __s2_276 = __p2_276; \ - float32x4_t __ret_276; \ - __ret_276 = vfmlalq_low_f16(__s0_276, __s1_276, (float16x8_t) {vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276)}); \ +#define vfmlsl_lane_high_f16(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ + float32x2_t __s0_276 = __p0_276; \ + float16x4_t __s1_276 = __p1_276; \ + float16x4_t __s2_276 = __p2_276; \ + float32x2_t __ret_276; \ + __ret_276 = vfmlsl_high_f16(__s0_276, __s1_276, (float16x4_t) {vget_lane_f16(__s2_276, __p3_276), vget_lane_f16(__s2_276, __p3_276), vget_lane_f16(__s2_276, __p3_276), vget_lane_f16(__s2_276, __p3_276)}); \ __ret_276; \ }) #else -#define vfmlalq_laneq_low_f16(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ - float32x4_t __s0_277 = __p0_277; \ - float16x8_t __s1_277 = __p1_277; \ - float16x8_t __s2_277 = __p2_277; \ - float32x4_t __rev0_277; __rev0_277 = __builtin_shufflevector(__s0_277, __s0_277, 3, 2, 1, 0); \ - float16x8_t __rev1_277; __rev1_277 = __builtin_shufflevector(__s1_277, __s1_277, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_277; __rev2_277 = __builtin_shufflevector(__s2_277, __s2_277, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_277; \ - __ret_277 = __noswap_vfmlalq_low_f16(__rev0_277, __rev1_277, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277)}); \ - __ret_277 = __builtin_shufflevector(__ret_277, __ret_277, 3, 2, 1, 0); \ +#define vfmlsl_lane_high_f16(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ + float32x2_t __s0_277 = __p0_277; \ + float16x4_t __s1_277 = __p1_277; \ + float16x4_t __s2_277 = __p2_277; \ + float32x2_t __rev0_277; __rev0_277 = __builtin_shufflevector(__s0_277, __s0_277, 1, 0); \ + float16x4_t __rev1_277; __rev1_277 = __builtin_shufflevector(__s1_277, __s1_277, 3, 2, 1, 0); \ + float16x4_t __rev2_277; __rev2_277 = __builtin_shufflevector(__s2_277, __s2_277, 3, 2, 1, 0); \ + float32x2_t __ret_277; \ + __ret_277 = __noswap_vfmlsl_high_f16(__rev0_277, __rev1_277, (float16x4_t) {__noswap_vget_lane_f16(__rev2_277, __p3_277), __noswap_vget_lane_f16(__rev2_277, __p3_277), __noswap_vget_lane_f16(__rev2_277, __p3_277), __noswap_vget_lane_f16(__rev2_277, __p3_277)}); \ + __ret_277 = __builtin_shufflevector(__ret_277, __ret_277, 1, 0); \ __ret_277; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_low_f16(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ - float32x2_t __s0_278 = __p0_278; \ - float16x4_t __s1_278 = __p1_278; \ - float16x8_t __s2_278 = __p2_278; \ - float32x2_t __ret_278; \ - __ret_278 = vfmlal_low_f16(__s0_278, __s1_278, (float16x4_t) {vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278)}); \ +#define vfmlslq_lane_low_f16(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ + float32x4_t __s0_278 = __p0_278; \ + float16x8_t __s1_278 = __p1_278; \ + float16x4_t __s2_278 = __p2_278; \ + float32x4_t __ret_278; \ + __ret_278 = vfmlslq_low_f16(__s0_278, __s1_278, (float16x8_t) {vget_lane_f16(__s2_278, __p3_278), vget_lane_f16(__s2_278, __p3_278), vget_lane_f16(__s2_278, __p3_278), vget_lane_f16(__s2_278, __p3_278), vget_lane_f16(__s2_278, __p3_278), vget_lane_f16(__s2_278, __p3_278), vget_lane_f16(__s2_278, __p3_278), vget_lane_f16(__s2_278, __p3_278)}); \ __ret_278; \ }) #else -#define vfmlal_laneq_low_f16(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ - float32x2_t __s0_279 = __p0_279; \ - float16x4_t __s1_279 = __p1_279; \ - float16x8_t __s2_279 = __p2_279; \ - float32x2_t __rev0_279; __rev0_279 = __builtin_shufflevector(__s0_279, __s0_279, 1, 0); \ - float16x4_t __rev1_279; __rev1_279 = __builtin_shufflevector(__s1_279, __s1_279, 3, 2, 1, 0); \ - float16x8_t __rev2_279; __rev2_279 = __builtin_shufflevector(__s2_279, __s2_279, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_279; \ - __ret_279 = __noswap_vfmlal_low_f16(__rev0_279, __rev1_279, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279)}); \ - __ret_279 = __builtin_shufflevector(__ret_279, __ret_279, 1, 0); \ +#define vfmlslq_lane_low_f16(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ + float32x4_t __s0_279 = __p0_279; \ + float16x8_t __s1_279 = __p1_279; \ + float16x4_t __s2_279 = __p2_279; \ + float32x4_t __rev0_279; __rev0_279 = __builtin_shufflevector(__s0_279, __s0_279, 3, 2, 1, 0); \ + float16x8_t __rev1_279; __rev1_279 = __builtin_shufflevector(__s1_279, __s1_279, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_279; __rev2_279 = __builtin_shufflevector(__s2_279, __s2_279, 3, 2, 1, 0); \ + float32x4_t __ret_279; \ + __ret_279 = __noswap_vfmlslq_low_f16(__rev0_279, __rev1_279, (float16x8_t) {__noswap_vget_lane_f16(__rev2_279, __p3_279), __noswap_vget_lane_f16(__rev2_279, __p3_279), __noswap_vget_lane_f16(__rev2_279, __p3_279), __noswap_vget_lane_f16(__rev2_279, __p3_279), __noswap_vget_lane_f16(__rev2_279, __p3_279), __noswap_vget_lane_f16(__rev2_279, __p3_279), __noswap_vget_lane_f16(__rev2_279, __p3_279), __noswap_vget_lane_f16(__rev2_279, __p3_279)}); \ + __ret_279 = __builtin_shufflevector(__ret_279, __ret_279, 3, 2, 1, 0); \ __ret_279; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_high_f16(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ - float32x4_t __s0_280 = __p0_280; \ - float16x8_t __s1_280 = __p1_280; \ +#define vfmlsl_lane_low_f16(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ + float32x2_t __s0_280 = __p0_280; \ + float16x4_t __s1_280 = __p1_280; \ float16x4_t __s2_280 = __p2_280; \ - float32x4_t __ret_280; \ - __ret_280 = vfmlslq_high_f16(__s0_280, __s1_280, (float16x8_t) {vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280)}); \ + float32x2_t __ret_280; \ + __ret_280 = vfmlsl_low_f16(__s0_280, __s1_280, (float16x4_t) {vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280)}); \ __ret_280; \ }) #else -#define vfmlslq_lane_high_f16(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ - float32x4_t __s0_281 = __p0_281; \ - float16x8_t __s1_281 = __p1_281; \ +#define vfmlsl_lane_low_f16(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ + float32x2_t __s0_281 = __p0_281; \ + float16x4_t __s1_281 = __p1_281; \ float16x4_t __s2_281 = __p2_281; \ - float32x4_t __rev0_281; __rev0_281 = __builtin_shufflevector(__s0_281, __s0_281, 3, 2, 1, 0); \ - float16x8_t __rev1_281; __rev1_281 = __builtin_shufflevector(__s1_281, __s1_281, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __rev0_281; __rev0_281 = __builtin_shufflevector(__s0_281, __s0_281, 1, 0); \ + float16x4_t __rev1_281; __rev1_281 = __builtin_shufflevector(__s1_281, __s1_281, 3, 2, 1, 0); \ float16x4_t __rev2_281; __rev2_281 = __builtin_shufflevector(__s2_281, __s2_281, 3, 2, 1, 0); \ - float32x4_t __ret_281; \ - __ret_281 = __noswap_vfmlslq_high_f16(__rev0_281, __rev1_281, (float16x8_t) {__noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281)}); \ - __ret_281 = __builtin_shufflevector(__ret_281, __ret_281, 3, 2, 1, 0); \ + float32x2_t __ret_281; \ + __ret_281 = __noswap_vfmlsl_low_f16(__rev0_281, __rev1_281, (float16x4_t) {__noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281)}); \ + __ret_281 = __builtin_shufflevector(__ret_281, __ret_281, 1, 0); \ __ret_281; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_high_f16(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ - float32x2_t __s0_282 = __p0_282; \ - float16x4_t __s1_282 = __p1_282; \ - float16x4_t __s2_282 = __p2_282; \ - float32x2_t __ret_282; \ - __ret_282 = vfmlsl_high_f16(__s0_282, __s1_282, (float16x4_t) {vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282)}); \ +#define vfmlslq_laneq_high_f16(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ + float32x4_t __s0_282 = __p0_282; \ + float16x8_t __s1_282 = __p1_282; \ + float16x8_t __s2_282 = __p2_282; \ + float32x4_t __ret_282; \ + __ret_282 = vfmlslq_high_f16(__s0_282, __s1_282, (float16x8_t) {vgetq_lane_f16(__s2_282, __p3_282), vgetq_lane_f16(__s2_282, __p3_282), vgetq_lane_f16(__s2_282, __p3_282), vgetq_lane_f16(__s2_282, __p3_282), vgetq_lane_f16(__s2_282, __p3_282), vgetq_lane_f16(__s2_282, __p3_282), vgetq_lane_f16(__s2_282, __p3_282), vgetq_lane_f16(__s2_282, __p3_282)}); \ __ret_282; \ }) #else -#define vfmlsl_lane_high_f16(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ - float32x2_t __s0_283 = __p0_283; \ - float16x4_t __s1_283 = __p1_283; \ - float16x4_t __s2_283 = __p2_283; \ - float32x2_t __rev0_283; __rev0_283 = __builtin_shufflevector(__s0_283, __s0_283, 1, 0); \ - float16x4_t __rev1_283; __rev1_283 = __builtin_shufflevector(__s1_283, __s1_283, 3, 2, 1, 0); \ - float16x4_t __rev2_283; __rev2_283 = __builtin_shufflevector(__s2_283, __s2_283, 3, 2, 1, 0); \ - float32x2_t __ret_283; \ - __ret_283 = __noswap_vfmlsl_high_f16(__rev0_283, __rev1_283, (float16x4_t) {__noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283)}); \ - __ret_283 = __builtin_shufflevector(__ret_283, __ret_283, 1, 0); \ +#define vfmlslq_laneq_high_f16(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ + float32x4_t __s0_283 = __p0_283; \ + float16x8_t __s1_283 = __p1_283; \ + float16x8_t __s2_283 = __p2_283; \ + float32x4_t __rev0_283; __rev0_283 = __builtin_shufflevector(__s0_283, __s0_283, 3, 2, 1, 0); \ + float16x8_t __rev1_283; __rev1_283 = __builtin_shufflevector(__s1_283, __s1_283, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_283; __rev2_283 = __builtin_shufflevector(__s2_283, __s2_283, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_283; \ + __ret_283 = __noswap_vfmlslq_high_f16(__rev0_283, __rev1_283, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_283, __p3_283), __noswap_vgetq_lane_f16(__rev2_283, __p3_283), __noswap_vgetq_lane_f16(__rev2_283, __p3_283), __noswap_vgetq_lane_f16(__rev2_283, __p3_283), __noswap_vgetq_lane_f16(__rev2_283, __p3_283), __noswap_vgetq_lane_f16(__rev2_283, __p3_283), __noswap_vgetq_lane_f16(__rev2_283, __p3_283), __noswap_vgetq_lane_f16(__rev2_283, __p3_283)}); \ + __ret_283 = __builtin_shufflevector(__ret_283, __ret_283, 3, 2, 1, 0); \ __ret_283; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_low_f16(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ - float32x4_t __s0_284 = __p0_284; \ - float16x8_t __s1_284 = __p1_284; \ - float16x4_t __s2_284 = __p2_284; \ - float32x4_t __ret_284; \ - __ret_284 = vfmlslq_low_f16(__s0_284, __s1_284, (float16x8_t) {vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284)}); \ +#define vfmlsl_laneq_high_f16(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ + float32x2_t __s0_284 = __p0_284; \ + float16x4_t __s1_284 = __p1_284; \ + float16x8_t __s2_284 = __p2_284; \ + float32x2_t __ret_284; \ + __ret_284 = vfmlsl_high_f16(__s0_284, __s1_284, (float16x4_t) {vgetq_lane_f16(__s2_284, __p3_284), vgetq_lane_f16(__s2_284, __p3_284), vgetq_lane_f16(__s2_284, __p3_284), vgetq_lane_f16(__s2_284, __p3_284)}); \ __ret_284; \ }) #else -#define vfmlslq_lane_low_f16(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ - float32x4_t __s0_285 = __p0_285; \ - float16x8_t __s1_285 = __p1_285; \ - float16x4_t __s2_285 = __p2_285; \ - float32x4_t __rev0_285; __rev0_285 = __builtin_shufflevector(__s0_285, __s0_285, 3, 2, 1, 0); \ - float16x8_t __rev1_285; __rev1_285 = __builtin_shufflevector(__s1_285, __s1_285, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_285; __rev2_285 = __builtin_shufflevector(__s2_285, __s2_285, 3, 2, 1, 0); \ - float32x4_t __ret_285; \ - __ret_285 = __noswap_vfmlslq_low_f16(__rev0_285, __rev1_285, (float16x8_t) {__noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285)}); \ - __ret_285 = __builtin_shufflevector(__ret_285, __ret_285, 3, 2, 1, 0); \ +#define vfmlsl_laneq_high_f16(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ + float32x2_t __s0_285 = __p0_285; \ + float16x4_t __s1_285 = __p1_285; \ + float16x8_t __s2_285 = __p2_285; \ + float32x2_t __rev0_285; __rev0_285 = __builtin_shufflevector(__s0_285, __s0_285, 1, 0); \ + float16x4_t __rev1_285; __rev1_285 = __builtin_shufflevector(__s1_285, __s1_285, 3, 2, 1, 0); \ + float16x8_t __rev2_285; __rev2_285 = __builtin_shufflevector(__s2_285, __s2_285, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __ret_285; \ + __ret_285 = __noswap_vfmlsl_high_f16(__rev0_285, __rev1_285, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_285, __p3_285), __noswap_vgetq_lane_f16(__rev2_285, __p3_285), __noswap_vgetq_lane_f16(__rev2_285, __p3_285), __noswap_vgetq_lane_f16(__rev2_285, __p3_285)}); \ + __ret_285 = __builtin_shufflevector(__ret_285, __ret_285, 1, 0); \ __ret_285; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_low_f16(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ - float32x2_t __s0_286 = __p0_286; \ - float16x4_t __s1_286 = __p1_286; \ - float16x4_t __s2_286 = __p2_286; \ - float32x2_t __ret_286; \ - __ret_286 = vfmlsl_low_f16(__s0_286, __s1_286, (float16x4_t) {vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286)}); \ +#define vfmlslq_laneq_low_f16(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ + float32x4_t __s0_286 = __p0_286; \ + float16x8_t __s1_286 = __p1_286; \ + float16x8_t __s2_286 = __p2_286; \ + float32x4_t __ret_286; \ + __ret_286 = vfmlslq_low_f16(__s0_286, __s1_286, (float16x8_t) {vgetq_lane_f16(__s2_286, __p3_286), vgetq_lane_f16(__s2_286, __p3_286), vgetq_lane_f16(__s2_286, __p3_286), vgetq_lane_f16(__s2_286, __p3_286), vgetq_lane_f16(__s2_286, __p3_286), vgetq_lane_f16(__s2_286, __p3_286), vgetq_lane_f16(__s2_286, __p3_286), vgetq_lane_f16(__s2_286, __p3_286)}); \ __ret_286; \ }) #else -#define vfmlsl_lane_low_f16(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ - float32x2_t __s0_287 = __p0_287; \ - float16x4_t __s1_287 = __p1_287; \ - float16x4_t __s2_287 = __p2_287; \ - float32x2_t __rev0_287; __rev0_287 = __builtin_shufflevector(__s0_287, __s0_287, 1, 0); \ - float16x4_t __rev1_287; __rev1_287 = __builtin_shufflevector(__s1_287, __s1_287, 3, 2, 1, 0); \ - float16x4_t __rev2_287; __rev2_287 = __builtin_shufflevector(__s2_287, __s2_287, 3, 2, 1, 0); \ - float32x2_t __ret_287; \ - __ret_287 = __noswap_vfmlsl_low_f16(__rev0_287, __rev1_287, (float16x4_t) {__noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287)}); \ - __ret_287 = __builtin_shufflevector(__ret_287, __ret_287, 1, 0); \ +#define vfmlslq_laneq_low_f16(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ + float32x4_t __s0_287 = __p0_287; \ + float16x8_t __s1_287 = __p1_287; \ + float16x8_t __s2_287 = __p2_287; \ + float32x4_t __rev0_287; __rev0_287 = __builtin_shufflevector(__s0_287, __s0_287, 3, 2, 1, 0); \ + float16x8_t __rev1_287; __rev1_287 = __builtin_shufflevector(__s1_287, __s1_287, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_287; __rev2_287 = __builtin_shufflevector(__s2_287, __s2_287, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_287; \ + __ret_287 = __noswap_vfmlslq_low_f16(__rev0_287, __rev1_287, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_287, __p3_287), __noswap_vgetq_lane_f16(__rev2_287, __p3_287), __noswap_vgetq_lane_f16(__rev2_287, __p3_287), __noswap_vgetq_lane_f16(__rev2_287, __p3_287), __noswap_vgetq_lane_f16(__rev2_287, __p3_287), __noswap_vgetq_lane_f16(__rev2_287, __p3_287), __noswap_vgetq_lane_f16(__rev2_287, __p3_287), __noswap_vgetq_lane_f16(__rev2_287, __p3_287)}); \ + __ret_287 = __builtin_shufflevector(__ret_287, __ret_287, 3, 2, 1, 0); \ __ret_287; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_high_f16(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ - float32x4_t __s0_288 = __p0_288; \ - float16x8_t __s1_288 = __p1_288; \ +#define vfmlsl_laneq_low_f16(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ + float32x2_t __s0_288 = __p0_288; \ + float16x4_t __s1_288 = __p1_288; \ float16x8_t __s2_288 = __p2_288; \ - float32x4_t __ret_288; \ - __ret_288 = vfmlslq_high_f16(__s0_288, __s1_288, (float16x8_t) {vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288)}); \ + float32x2_t __ret_288; \ + __ret_288 = vfmlsl_low_f16(__s0_288, __s1_288, (float16x4_t) {vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288)}); \ __ret_288; \ }) #else -#define vfmlslq_laneq_high_f16(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ - float32x4_t __s0_289 = __p0_289; \ - float16x8_t __s1_289 = __p1_289; \ +#define vfmlsl_laneq_low_f16(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ + float32x2_t __s0_289 = __p0_289; \ + float16x4_t __s1_289 = __p1_289; \ float16x8_t __s2_289 = __p2_289; \ - float32x4_t __rev0_289; __rev0_289 = __builtin_shufflevector(__s0_289, __s0_289, 3, 2, 1, 0); \ - float16x8_t __rev1_289; __rev1_289 = __builtin_shufflevector(__s1_289, __s1_289, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __rev0_289; __rev0_289 = __builtin_shufflevector(__s0_289, __s0_289, 1, 0); \ + float16x4_t __rev1_289; __rev1_289 = __builtin_shufflevector(__s1_289, __s1_289, 3, 2, 1, 0); \ float16x8_t __rev2_289; __rev2_289 = __builtin_shufflevector(__s2_289, __s2_289, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_289; \ - __ret_289 = __noswap_vfmlslq_high_f16(__rev0_289, __rev1_289, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289)}); \ - __ret_289 = __builtin_shufflevector(__ret_289, __ret_289, 3, 2, 1, 0); \ + float32x2_t __ret_289; \ + __ret_289 = __noswap_vfmlsl_low_f16(__rev0_289, __rev1_289, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289)}); \ + __ret_289 = __builtin_shufflevector(__ret_289, __ret_289, 1, 0); \ __ret_289; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_high_f16(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ - float32x2_t __s0_290 = __p0_290; \ - float16x4_t __s1_290 = __p1_290; \ - float16x8_t __s2_290 = __p2_290; \ - float32x2_t __ret_290; \ - __ret_290 = vfmlsl_high_f16(__s0_290, __s1_290, (float16x4_t) {vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290)}); \ - __ret_290; \ -}) -#else -#define vfmlsl_laneq_high_f16(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ - float32x2_t __s0_291 = __p0_291; \ - float16x4_t __s1_291 = __p1_291; \ - float16x8_t __s2_291 = __p2_291; \ - float32x2_t __rev0_291; __rev0_291 = __builtin_shufflevector(__s0_291, __s0_291, 1, 0); \ - float16x4_t __rev1_291; __rev1_291 = __builtin_shufflevector(__s1_291, __s1_291, 3, 2, 1, 0); \ - float16x8_t __rev2_291; __rev2_291 = __builtin_shufflevector(__s2_291, __s2_291, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_291; \ - __ret_291 = __noswap_vfmlsl_high_f16(__rev0_291, __rev1_291, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291)}); \ - __ret_291 = __builtin_shufflevector(__ret_291, __ret_291, 1, 0); \ - __ret_291; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_low_f16(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ - float32x4_t __s0_292 = __p0_292; \ - float16x8_t __s1_292 = __p1_292; \ - float16x8_t __s2_292 = __p2_292; \ - float32x4_t __ret_292; \ - __ret_292 = vfmlslq_low_f16(__s0_292, __s1_292, (float16x8_t) {vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292)}); \ - __ret_292; \ -}) -#else -#define vfmlslq_laneq_low_f16(__p0_293, __p1_293, __p2_293, __p3_293) __extension__ ({ \ - float32x4_t __s0_293 = __p0_293; \ - float16x8_t __s1_293 = __p1_293; \ - float16x8_t __s2_293 = __p2_293; \ - float32x4_t __rev0_293; __rev0_293 = __builtin_shufflevector(__s0_293, __s0_293, 3, 2, 1, 0); \ - float16x8_t __rev1_293; __rev1_293 = __builtin_shufflevector(__s1_293, __s1_293, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_293; __rev2_293 = __builtin_shufflevector(__s2_293, __s2_293, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_293; \ - __ret_293 = __noswap_vfmlslq_low_f16(__rev0_293, __rev1_293, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293)}); \ - __ret_293 = __builtin_shufflevector(__ret_293, __ret_293, 3, 2, 1, 0); \ - __ret_293; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_low_f16(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ - float32x2_t __s0_294 = __p0_294; \ - float16x4_t __s1_294 = __p1_294; \ - float16x8_t __s2_294 = __p2_294; \ - float32x2_t __ret_294; \ - __ret_294 = vfmlsl_low_f16(__s0_294, __s1_294, (float16x4_t) {vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294)}); \ - __ret_294; \ -}) -#else -#define vfmlsl_laneq_low_f16(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ - float32x2_t __s0_295 = __p0_295; \ - float16x4_t __s1_295 = __p1_295; \ - float16x8_t __s2_295 = __p2_295; \ - float32x2_t __rev0_295; __rev0_295 = __builtin_shufflevector(__s0_295, __s0_295, 1, 0); \ - float16x4_t __rev1_295; __rev1_295 = __builtin_shufflevector(__s1_295, __s1_295, 3, 2, 1, 0); \ - float16x8_t __rev2_295; __rev2_295 = __builtin_shufflevector(__s2_295, __s2_295, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_295; \ - __ret_295 = __noswap_vfmlsl_low_f16(__rev0_295, __rev1_295, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295)}); \ - __ret_295 = __builtin_shufflevector(__ret_295, __ret_295, 1, 0); \ - __ret_295; \ -}) -#endif - #endif #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -#define vmulh_lane_f16(__p0_296, __p1_296, __p2_296) __extension__ ({ \ - float16_t __s0_296 = __p0_296; \ - float16x4_t __s1_296 = __p1_296; \ - float16_t __ret_296; \ - __ret_296 = __s0_296 * vget_lane_f16(__s1_296, __p2_296); \ - __ret_296; \ +#define vmulh_lane_f16(__p0_290, __p1_290, __p2_290) __extension__ ({ \ + float16_t __s0_290 = __p0_290; \ + float16x4_t __s1_290 = __p1_290; \ + float16_t __ret_290; \ + __ret_290 = __s0_290 * vget_lane_f16(__s1_290, __p2_290); \ + __ret_290; \ }) #else -#define vmulh_lane_f16(__p0_297, __p1_297, __p2_297) __extension__ ({ \ - float16_t __s0_297 = __p0_297; \ - float16x4_t __s1_297 = __p1_297; \ - float16x4_t __rev1_297; __rev1_297 = __builtin_shufflevector(__s1_297, __s1_297, 3, 2, 1, 0); \ - float16_t __ret_297; \ - __ret_297 = __s0_297 * __noswap_vget_lane_f16(__rev1_297, __p2_297); \ - __ret_297; \ +#define vmulh_lane_f16(__p0_291, __p1_291, __p2_291) __extension__ ({ \ + float16_t __s0_291 = __p0_291; \ + float16x4_t __s1_291 = __p1_291; \ + float16x4_t __rev1_291; __rev1_291 = __builtin_shufflevector(__s1_291, __s1_291, 3, 2, 1, 0); \ + float16_t __ret_291; \ + __ret_291 = __s0_291 * __noswap_vget_lane_f16(__rev1_291, __p2_291); \ + __ret_291; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulh_laneq_f16(__p0_298, __p1_298, __p2_298) __extension__ ({ \ - float16_t __s0_298 = __p0_298; \ - float16x8_t __s1_298 = __p1_298; \ - float16_t __ret_298; \ - __ret_298 = __s0_298 * vgetq_lane_f16(__s1_298, __p2_298); \ - __ret_298; \ +#define vmulh_laneq_f16(__p0_292, __p1_292, __p2_292) __extension__ ({ \ + float16_t __s0_292 = __p0_292; \ + float16x8_t __s1_292 = __p1_292; \ + float16_t __ret_292; \ + __ret_292 = __s0_292 * vgetq_lane_f16(__s1_292, __p2_292); \ + __ret_292; \ }) #else -#define vmulh_laneq_f16(__p0_299, __p1_299, __p2_299) __extension__ ({ \ - float16_t __s0_299 = __p0_299; \ - float16x8_t __s1_299 = __p1_299; \ - float16x8_t __rev1_299; __rev1_299 = __builtin_shufflevector(__s1_299, __s1_299, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret_299; \ - __ret_299 = __s0_299 * __noswap_vgetq_lane_f16(__rev1_299, __p2_299); \ - __ret_299; \ +#define vmulh_laneq_f16(__p0_293, __p1_293, __p2_293) __extension__ ({ \ + float16_t __s0_293 = __p0_293; \ + float16x8_t __s1_293 = __p1_293; \ + float16x8_t __rev1_293; __rev1_293 = __builtin_shufflevector(__s1_293, __s1_293, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret_293; \ + __ret_293 = __s0_293 * __noswap_vgetq_lane_f16(__rev1_293, __p2_293); \ + __ret_293; \ }) #endif #endif #if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqrdmlahs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { int32_t __ret; __ret = vqadds_s32(__p0, vqrdmulhs_s32(__p1, __p2)); return __ret; } -#else -__ai int32_t vqrdmlahs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { - int32_t __ret; - __ret = __noswap_vqadds_s32(__p0, __noswap_vqrdmulhs_s32(__p1, __p2)); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqrdmlahh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { int16_t __ret; __ret = vqaddh_s16(__p0, vqrdmulhh_s16(__p1, __p2)); return __ret; } +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlahs_lane_s32(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ + int32_t __s0_294 = __p0_294; \ + int32_t __s1_294 = __p1_294; \ + int32x2_t __s2_294 = __p2_294; \ + int32_t __ret_294; \ + __ret_294 = vqadds_s32(__s0_294, vqrdmulhs_s32(__s1_294, vget_lane_s32(__s2_294, __p3_294))); \ + __ret_294; \ +}) #else -__ai int16_t vqrdmlahh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { - int16_t __ret; - __ret = __noswap_vqaddh_s16(__p0, __noswap_vqrdmulhh_s16(__p1, __p2)); - return __ret; -} +#define vqrdmlahs_lane_s32(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ + int32_t __s0_295 = __p0_295; \ + int32_t __s1_295 = __p1_295; \ + int32x2_t __s2_295 = __p2_295; \ + int32x2_t __rev2_295; __rev2_295 = __builtin_shufflevector(__s2_295, __s2_295, 1, 0); \ + int32_t __ret_295; \ + __ret_295 = vqadds_s32(__s0_295, vqrdmulhs_s32(__s1_295, __noswap_vget_lane_s32(__rev2_295, __p3_295))); \ + __ret_295; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahs_lane_s32(__p0_300, __p1_300, __p2_300, __p3_300) __extension__ ({ \ - int32_t __s0_300 = __p0_300; \ - int32_t __s1_300 = __p1_300; \ - int32x2_t __s2_300 = __p2_300; \ - int32_t __ret_300; \ - __ret_300 = vqadds_s32(__s0_300, vqrdmulhs_s32(__s1_300, vget_lane_s32(__s2_300, __p3_300))); \ +#define vqrdmlahh_lane_s16(__p0_296, __p1_296, __p2_296, __p3_296) __extension__ ({ \ + int16_t __s0_296 = __p0_296; \ + int16_t __s1_296 = __p1_296; \ + int16x4_t __s2_296 = __p2_296; \ + int16_t __ret_296; \ + __ret_296 = vqaddh_s16(__s0_296, vqrdmulhh_s16(__s1_296, vget_lane_s16(__s2_296, __p3_296))); \ + __ret_296; \ +}) +#else +#define vqrdmlahh_lane_s16(__p0_297, __p1_297, __p2_297, __p3_297) __extension__ ({ \ + int16_t __s0_297 = __p0_297; \ + int16_t __s1_297 = __p1_297; \ + int16x4_t __s2_297 = __p2_297; \ + int16x4_t __rev2_297; __rev2_297 = __builtin_shufflevector(__s2_297, __s2_297, 3, 2, 1, 0); \ + int16_t __ret_297; \ + __ret_297 = vqaddh_s16(__s0_297, vqrdmulhh_s16(__s1_297, __noswap_vget_lane_s16(__rev2_297, __p3_297))); \ + __ret_297; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlahs_laneq_s32(__p0_298, __p1_298, __p2_298, __p3_298) __extension__ ({ \ + int32_t __s0_298 = __p0_298; \ + int32_t __s1_298 = __p1_298; \ + int32x4_t __s2_298 = __p2_298; \ + int32_t __ret_298; \ + __ret_298 = vqadds_s32(__s0_298, vqrdmulhs_s32(__s1_298, vgetq_lane_s32(__s2_298, __p3_298))); \ + __ret_298; \ +}) +#else +#define vqrdmlahs_laneq_s32(__p0_299, __p1_299, __p2_299, __p3_299) __extension__ ({ \ + int32_t __s0_299 = __p0_299; \ + int32_t __s1_299 = __p1_299; \ + int32x4_t __s2_299 = __p2_299; \ + int32x4_t __rev2_299; __rev2_299 = __builtin_shufflevector(__s2_299, __s2_299, 3, 2, 1, 0); \ + int32_t __ret_299; \ + __ret_299 = vqadds_s32(__s0_299, vqrdmulhs_s32(__s1_299, __noswap_vgetq_lane_s32(__rev2_299, __p3_299))); \ + __ret_299; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlahh_laneq_s16(__p0_300, __p1_300, __p2_300, __p3_300) __extension__ ({ \ + int16_t __s0_300 = __p0_300; \ + int16_t __s1_300 = __p1_300; \ + int16x8_t __s2_300 = __p2_300; \ + int16_t __ret_300; \ + __ret_300 = vqaddh_s16(__s0_300, vqrdmulhh_s16(__s1_300, vgetq_lane_s16(__s2_300, __p3_300))); \ __ret_300; \ }) #else -#define vqrdmlahs_lane_s32(__p0_301, __p1_301, __p2_301, __p3_301) __extension__ ({ \ - int32_t __s0_301 = __p0_301; \ - int32_t __s1_301 = __p1_301; \ - int32x2_t __s2_301 = __p2_301; \ - int32x2_t __rev2_301; __rev2_301 = __builtin_shufflevector(__s2_301, __s2_301, 1, 0); \ - int32_t __ret_301; \ - __ret_301 = __noswap_vqadds_s32(__s0_301, __noswap_vqrdmulhs_s32(__s1_301, __noswap_vget_lane_s32(__rev2_301, __p3_301))); \ +#define vqrdmlahh_laneq_s16(__p0_301, __p1_301, __p2_301, __p3_301) __extension__ ({ \ + int16_t __s0_301 = __p0_301; \ + int16_t __s1_301 = __p1_301; \ + int16x8_t __s2_301 = __p2_301; \ + int16x8_t __rev2_301; __rev2_301 = __builtin_shufflevector(__s2_301, __s2_301, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_301; \ + __ret_301 = vqaddh_s16(__s0_301, vqrdmulhh_s16(__s1_301, __noswap_vgetq_lane_s16(__rev2_301, __p3_301))); \ __ret_301; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahh_lane_s16(__p0_302, __p1_302, __p2_302, __p3_302) __extension__ ({ \ - int16_t __s0_302 = __p0_302; \ - int16_t __s1_302 = __p1_302; \ - int16x4_t __s2_302 = __p2_302; \ - int16_t __ret_302; \ - __ret_302 = vqaddh_s16(__s0_302, vqrdmulhh_s16(__s1_302, vget_lane_s16(__s2_302, __p3_302))); \ - __ret_302; \ -}) -#else -#define vqrdmlahh_lane_s16(__p0_303, __p1_303, __p2_303, __p3_303) __extension__ ({ \ - int16_t __s0_303 = __p0_303; \ - int16_t __s1_303 = __p1_303; \ - int16x4_t __s2_303 = __p2_303; \ - int16x4_t __rev2_303; __rev2_303 = __builtin_shufflevector(__s2_303, __s2_303, 3, 2, 1, 0); \ - int16_t __ret_303; \ - __ret_303 = __noswap_vqaddh_s16(__s0_303, __noswap_vqrdmulhh_s16(__s1_303, __noswap_vget_lane_s16(__rev2_303, __p3_303))); \ - __ret_303; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahs_laneq_s32(__p0_304, __p1_304, __p2_304, __p3_304) __extension__ ({ \ - int32_t __s0_304 = __p0_304; \ - int32_t __s1_304 = __p1_304; \ - int32x4_t __s2_304 = __p2_304; \ - int32_t __ret_304; \ - __ret_304 = vqadds_s32(__s0_304, vqrdmulhs_s32(__s1_304, vgetq_lane_s32(__s2_304, __p3_304))); \ - __ret_304; \ -}) -#else -#define vqrdmlahs_laneq_s32(__p0_305, __p1_305, __p2_305, __p3_305) __extension__ ({ \ - int32_t __s0_305 = __p0_305; \ - int32_t __s1_305 = __p1_305; \ - int32x4_t __s2_305 = __p2_305; \ - int32x4_t __rev2_305; __rev2_305 = __builtin_shufflevector(__s2_305, __s2_305, 3, 2, 1, 0); \ - int32_t __ret_305; \ - __ret_305 = __noswap_vqadds_s32(__s0_305, __noswap_vqrdmulhs_s32(__s1_305, __noswap_vgetq_lane_s32(__rev2_305, __p3_305))); \ - __ret_305; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahh_laneq_s16(__p0_306, __p1_306, __p2_306, __p3_306) __extension__ ({ \ - int16_t __s0_306 = __p0_306; \ - int16_t __s1_306 = __p1_306; \ - int16x8_t __s2_306 = __p2_306; \ - int16_t __ret_306; \ - __ret_306 = vqaddh_s16(__s0_306, vqrdmulhh_s16(__s1_306, vgetq_lane_s16(__s2_306, __p3_306))); \ - __ret_306; \ -}) -#else -#define vqrdmlahh_laneq_s16(__p0_307, __p1_307, __p2_307, __p3_307) __extension__ ({ \ - int16_t __s0_307 = __p0_307; \ - int16_t __s1_307 = __p1_307; \ - int16x8_t __s2_307 = __p2_307; \ - int16x8_t __rev2_307; __rev2_307 = __builtin_shufflevector(__s2_307, __s2_307, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_307; \ - __ret_307 = __noswap_vqaddh_s16(__s0_307, __noswap_vqrdmulhh_s16(__s1_307, __noswap_vgetq_lane_s16(__rev2_307, __p3_307))); \ - __ret_307; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int32_t vqrdmlshs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { int32_t __ret; __ret = vqsubs_s32(__p0, vqrdmulhs_s32(__p1, __p2)); return __ret; } -#else -__ai int32_t vqrdmlshs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { - int32_t __ret; - __ret = __noswap_vqsubs_s32(__p0, __noswap_vqrdmulhs_s32(__p1, __p2)); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ __ai int16_t vqrdmlshh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { int16_t __ret; __ret = vqsubh_s16(__p0, vqrdmulhh_s16(__p1, __p2)); return __ret; } +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshs_lane_s32(__p0_302, __p1_302, __p2_302, __p3_302) __extension__ ({ \ + int32_t __s0_302 = __p0_302; \ + int32_t __s1_302 = __p1_302; \ + int32x2_t __s2_302 = __p2_302; \ + int32_t __ret_302; \ + __ret_302 = vqsubs_s32(__s0_302, vqrdmulhs_s32(__s1_302, vget_lane_s32(__s2_302, __p3_302))); \ + __ret_302; \ +}) #else -__ai int16_t vqrdmlshh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { - int16_t __ret; - __ret = __noswap_vqsubh_s16(__p0, __noswap_vqrdmulhh_s16(__p1, __p2)); - return __ret; -} +#define vqrdmlshs_lane_s32(__p0_303, __p1_303, __p2_303, __p3_303) __extension__ ({ \ + int32_t __s0_303 = __p0_303; \ + int32_t __s1_303 = __p1_303; \ + int32x2_t __s2_303 = __p2_303; \ + int32x2_t __rev2_303; __rev2_303 = __builtin_shufflevector(__s2_303, __s2_303, 1, 0); \ + int32_t __ret_303; \ + __ret_303 = vqsubs_s32(__s0_303, vqrdmulhs_s32(__s1_303, __noswap_vget_lane_s32(__rev2_303, __p3_303))); \ + __ret_303; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshs_lane_s32(__p0_308, __p1_308, __p2_308, __p3_308) __extension__ ({ \ - int32_t __s0_308 = __p0_308; \ - int32_t __s1_308 = __p1_308; \ - int32x2_t __s2_308 = __p2_308; \ - int32_t __ret_308; \ - __ret_308 = vqsubs_s32(__s0_308, vqrdmulhs_s32(__s1_308, vget_lane_s32(__s2_308, __p3_308))); \ +#define vqrdmlshh_lane_s16(__p0_304, __p1_304, __p2_304, __p3_304) __extension__ ({ \ + int16_t __s0_304 = __p0_304; \ + int16_t __s1_304 = __p1_304; \ + int16x4_t __s2_304 = __p2_304; \ + int16_t __ret_304; \ + __ret_304 = vqsubh_s16(__s0_304, vqrdmulhh_s16(__s1_304, vget_lane_s16(__s2_304, __p3_304))); \ + __ret_304; \ +}) +#else +#define vqrdmlshh_lane_s16(__p0_305, __p1_305, __p2_305, __p3_305) __extension__ ({ \ + int16_t __s0_305 = __p0_305; \ + int16_t __s1_305 = __p1_305; \ + int16x4_t __s2_305 = __p2_305; \ + int16x4_t __rev2_305; __rev2_305 = __builtin_shufflevector(__s2_305, __s2_305, 3, 2, 1, 0); \ + int16_t __ret_305; \ + __ret_305 = vqsubh_s16(__s0_305, vqrdmulhh_s16(__s1_305, __noswap_vget_lane_s16(__rev2_305, __p3_305))); \ + __ret_305; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshs_laneq_s32(__p0_306, __p1_306, __p2_306, __p3_306) __extension__ ({ \ + int32_t __s0_306 = __p0_306; \ + int32_t __s1_306 = __p1_306; \ + int32x4_t __s2_306 = __p2_306; \ + int32_t __ret_306; \ + __ret_306 = vqsubs_s32(__s0_306, vqrdmulhs_s32(__s1_306, vgetq_lane_s32(__s2_306, __p3_306))); \ + __ret_306; \ +}) +#else +#define vqrdmlshs_laneq_s32(__p0_307, __p1_307, __p2_307, __p3_307) __extension__ ({ \ + int32_t __s0_307 = __p0_307; \ + int32_t __s1_307 = __p1_307; \ + int32x4_t __s2_307 = __p2_307; \ + int32x4_t __rev2_307; __rev2_307 = __builtin_shufflevector(__s2_307, __s2_307, 3, 2, 1, 0); \ + int32_t __ret_307; \ + __ret_307 = vqsubs_s32(__s0_307, vqrdmulhs_s32(__s1_307, __noswap_vgetq_lane_s32(__rev2_307, __p3_307))); \ + __ret_307; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshh_laneq_s16(__p0_308, __p1_308, __p2_308, __p3_308) __extension__ ({ \ + int16_t __s0_308 = __p0_308; \ + int16_t __s1_308 = __p1_308; \ + int16x8_t __s2_308 = __p2_308; \ + int16_t __ret_308; \ + __ret_308 = vqsubh_s16(__s0_308, vqrdmulhh_s16(__s1_308, vgetq_lane_s16(__s2_308, __p3_308))); \ __ret_308; \ }) #else -#define vqrdmlshs_lane_s32(__p0_309, __p1_309, __p2_309, __p3_309) __extension__ ({ \ - int32_t __s0_309 = __p0_309; \ - int32_t __s1_309 = __p1_309; \ - int32x2_t __s2_309 = __p2_309; \ - int32x2_t __rev2_309; __rev2_309 = __builtin_shufflevector(__s2_309, __s2_309, 1, 0); \ - int32_t __ret_309; \ - __ret_309 = __noswap_vqsubs_s32(__s0_309, __noswap_vqrdmulhs_s32(__s1_309, __noswap_vget_lane_s32(__rev2_309, __p3_309))); \ +#define vqrdmlshh_laneq_s16(__p0_309, __p1_309, __p2_309, __p3_309) __extension__ ({ \ + int16_t __s0_309 = __p0_309; \ + int16_t __s1_309 = __p1_309; \ + int16x8_t __s2_309 = __p2_309; \ + int16x8_t __rev2_309; __rev2_309 = __builtin_shufflevector(__s2_309, __s2_309, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_309; \ + __ret_309 = vqsubh_s16(__s0_309, vqrdmulhh_s16(__s1_309, __noswap_vgetq_lane_s16(__rev2_309, __p3_309))); \ __ret_309; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlshh_lane_s16(__p0_310, __p1_310, __p2_310, __p3_310) __extension__ ({ \ - int16_t __s0_310 = __p0_310; \ - int16_t __s1_310 = __p1_310; \ - int16x4_t __s2_310 = __p2_310; \ - int16_t __ret_310; \ - __ret_310 = vqsubh_s16(__s0_310, vqrdmulhh_s16(__s1_310, vget_lane_s16(__s2_310, __p3_310))); \ - __ret_310; \ -}) -#else -#define vqrdmlshh_lane_s16(__p0_311, __p1_311, __p2_311, __p3_311) __extension__ ({ \ - int16_t __s0_311 = __p0_311; \ - int16_t __s1_311 = __p1_311; \ - int16x4_t __s2_311 = __p2_311; \ - int16x4_t __rev2_311; __rev2_311 = __builtin_shufflevector(__s2_311, __s2_311, 3, 2, 1, 0); \ - int16_t __ret_311; \ - __ret_311 = __noswap_vqsubh_s16(__s0_311, __noswap_vqrdmulhh_s16(__s1_311, __noswap_vget_lane_s16(__rev2_311, __p3_311))); \ - __ret_311; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlshs_laneq_s32(__p0_312, __p1_312, __p2_312, __p3_312) __extension__ ({ \ - int32_t __s0_312 = __p0_312; \ - int32_t __s1_312 = __p1_312; \ - int32x4_t __s2_312 = __p2_312; \ - int32_t __ret_312; \ - __ret_312 = vqsubs_s32(__s0_312, vqrdmulhs_s32(__s1_312, vgetq_lane_s32(__s2_312, __p3_312))); \ - __ret_312; \ -}) -#else -#define vqrdmlshs_laneq_s32(__p0_313, __p1_313, __p2_313, __p3_313) __extension__ ({ \ - int32_t __s0_313 = __p0_313; \ - int32_t __s1_313 = __p1_313; \ - int32x4_t __s2_313 = __p2_313; \ - int32x4_t __rev2_313; __rev2_313 = __builtin_shufflevector(__s2_313, __s2_313, 3, 2, 1, 0); \ - int32_t __ret_313; \ - __ret_313 = __noswap_vqsubs_s32(__s0_313, __noswap_vqrdmulhs_s32(__s1_313, __noswap_vgetq_lane_s32(__rev2_313, __p3_313))); \ - __ret_313; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlshh_laneq_s16(__p0_314, __p1_314, __p2_314, __p3_314) __extension__ ({ \ - int16_t __s0_314 = __p0_314; \ - int16_t __s1_314 = __p1_314; \ - int16x8_t __s2_314 = __p2_314; \ - int16_t __ret_314; \ - __ret_314 = vqsubh_s16(__s0_314, vqrdmulhh_s16(__s1_314, vgetq_lane_s16(__s2_314, __p3_314))); \ - __ret_314; \ -}) -#else -#define vqrdmlshh_laneq_s16(__p0_315, __p1_315, __p2_315, __p3_315) __extension__ ({ \ - int16_t __s0_315 = __p0_315; \ - int16_t __s1_315 = __p1_315; \ - int16x8_t __s2_315 = __p2_315; \ - int16x8_t __rev2_315; __rev2_315 = __builtin_shufflevector(__s2_315, __s2_315, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_315; \ - __ret_315 = __noswap_vqsubh_s16(__s0_315, __noswap_vqrdmulhh_s16(__s1_315, __noswap_vgetq_lane_s16(__rev2_315, __p3_315))); \ - __ret_315; \ -}) -#endif - #endif #if defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ @@ -72870,161 +62279,139 @@ __ai int32x4_t vaddw_high_s16(int32x4_t __p0, int16x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p64(__p0_316, __p1_316, __p2_316, __p3_316) __extension__ ({ \ +#define vcopyq_lane_p64(__p0_310, __p1_310, __p2_310, __p3_310) __extension__ ({ \ + poly64x2_t __s0_310 = __p0_310; \ + poly64x1_t __s2_310 = __p2_310; \ + poly64x2_t __ret_310; \ + __ret_310 = vsetq_lane_p64(vget_lane_p64(__s2_310, __p3_310), __s0_310, __p1_310); \ + __ret_310; \ +}) +#else +#define vcopyq_lane_p64(__p0_311, __p1_311, __p2_311, __p3_311) __extension__ ({ \ + poly64x2_t __s0_311 = __p0_311; \ + poly64x1_t __s2_311 = __p2_311; \ + poly64x2_t __rev0_311; __rev0_311 = __builtin_shufflevector(__s0_311, __s0_311, 1, 0); \ + poly64x2_t __ret_311; \ + __ret_311 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_311, __p3_311), __rev0_311, __p1_311); \ + __ret_311 = __builtin_shufflevector(__ret_311, __ret_311, 1, 0); \ + __ret_311; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopyq_lane_f64(__p0_312, __p1_312, __p2_312, __p3_312) __extension__ ({ \ + float64x2_t __s0_312 = __p0_312; \ + float64x1_t __s2_312 = __p2_312; \ + float64x2_t __ret_312; \ + __ret_312 = vsetq_lane_f64(vget_lane_f64(__s2_312, __p3_312), __s0_312, __p1_312); \ + __ret_312; \ +}) +#else +#define vcopyq_lane_f64(__p0_313, __p1_313, __p2_313, __p3_313) __extension__ ({ \ + float64x2_t __s0_313 = __p0_313; \ + float64x1_t __s2_313 = __p2_313; \ + float64x2_t __rev0_313; __rev0_313 = __builtin_shufflevector(__s0_313, __s0_313, 1, 0); \ + float64x2_t __ret_313; \ + __ret_313 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_313, __p3_313), __rev0_313, __p1_313); \ + __ret_313 = __builtin_shufflevector(__ret_313, __ret_313, 1, 0); \ + __ret_313; \ +}) +#endif + +#define vcopy_lane_p64(__p0_314, __p1_314, __p2_314, __p3_314) __extension__ ({ \ + poly64x1_t __s0_314 = __p0_314; \ + poly64x1_t __s2_314 = __p2_314; \ + poly64x1_t __ret_314; \ + __ret_314 = vset_lane_p64(vget_lane_p64(__s2_314, __p3_314), __s0_314, __p1_314); \ + __ret_314; \ +}) +#define vcopy_lane_f64(__p0_315, __p1_315, __p2_315, __p3_315) __extension__ ({ \ + float64x1_t __s0_315 = __p0_315; \ + float64x1_t __s2_315 = __p2_315; \ + float64x1_t __ret_315; \ + __ret_315 = vset_lane_f64(vget_lane_f64(__s2_315, __p3_315), __s0_315, __p1_315); \ + __ret_315; \ +}) +#ifdef __LITTLE_ENDIAN__ +#define vcopyq_laneq_p64(__p0_316, __p1_316, __p2_316, __p3_316) __extension__ ({ \ poly64x2_t __s0_316 = __p0_316; \ - poly64x1_t __s2_316 = __p2_316; \ + poly64x2_t __s2_316 = __p2_316; \ poly64x2_t __ret_316; \ - __ret_316 = vsetq_lane_p64(vget_lane_p64(__s2_316, __p3_316), __s0_316, __p1_316); \ + __ret_316 = vsetq_lane_p64(vgetq_lane_p64(__s2_316, __p3_316), __s0_316, __p1_316); \ __ret_316; \ }) #else -#define vcopyq_lane_p64(__p0_317, __p1_317, __p2_317, __p3_317) __extension__ ({ \ +#define vcopyq_laneq_p64(__p0_317, __p1_317, __p2_317, __p3_317) __extension__ ({ \ poly64x2_t __s0_317 = __p0_317; \ - poly64x1_t __s2_317 = __p2_317; \ + poly64x2_t __s2_317 = __p2_317; \ poly64x2_t __rev0_317; __rev0_317 = __builtin_shufflevector(__s0_317, __s0_317, 1, 0); \ + poly64x2_t __rev2_317; __rev2_317 = __builtin_shufflevector(__s2_317, __s2_317, 1, 0); \ poly64x2_t __ret_317; \ - __ret_317 = __noswap_vsetq_lane_p64(__noswap_vget_lane_p64(__s2_317, __p3_317), __rev0_317, __p1_317); \ + __ret_317 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_317, __p3_317), __rev0_317, __p1_317); \ __ret_317 = __builtin_shufflevector(__ret_317, __ret_317, 1, 0); \ __ret_317; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_f64(__p0_318, __p1_318, __p2_318, __p3_318) __extension__ ({ \ +#define vcopyq_laneq_f64(__p0_318, __p1_318, __p2_318, __p3_318) __extension__ ({ \ float64x2_t __s0_318 = __p0_318; \ - float64x1_t __s2_318 = __p2_318; \ + float64x2_t __s2_318 = __p2_318; \ float64x2_t __ret_318; \ - __ret_318 = vsetq_lane_f64(vget_lane_f64(__s2_318, __p3_318), __s0_318, __p1_318); \ + __ret_318 = vsetq_lane_f64(vgetq_lane_f64(__s2_318, __p3_318), __s0_318, __p1_318); \ __ret_318; \ }) #else -#define vcopyq_lane_f64(__p0_319, __p1_319, __p2_319, __p3_319) __extension__ ({ \ +#define vcopyq_laneq_f64(__p0_319, __p1_319, __p2_319, __p3_319) __extension__ ({ \ float64x2_t __s0_319 = __p0_319; \ - float64x1_t __s2_319 = __p2_319; \ + float64x2_t __s2_319 = __p2_319; \ float64x2_t __rev0_319; __rev0_319 = __builtin_shufflevector(__s0_319, __s0_319, 1, 0); \ + float64x2_t __rev2_319; __rev2_319 = __builtin_shufflevector(__s2_319, __s2_319, 1, 0); \ float64x2_t __ret_319; \ - __ret_319 = __noswap_vsetq_lane_f64(__noswap_vget_lane_f64(__s2_319, __p3_319), __rev0_319, __p1_319); \ + __ret_319 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_319, __p3_319), __rev0_319, __p1_319); \ __ret_319 = __builtin_shufflevector(__ret_319, __ret_319, 1, 0); \ __ret_319; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_p64(__p0_320, __p1_320, __p2_320, __p3_320) __extension__ ({ \ +#define vcopy_laneq_p64(__p0_320, __p1_320, __p2_320, __p3_320) __extension__ ({ \ poly64x1_t __s0_320 = __p0_320; \ - poly64x1_t __s2_320 = __p2_320; \ + poly64x2_t __s2_320 = __p2_320; \ poly64x1_t __ret_320; \ - __ret_320 = vset_lane_p64(vget_lane_p64(__s2_320, __p3_320), __s0_320, __p1_320); \ + __ret_320 = vset_lane_p64(vgetq_lane_p64(__s2_320, __p3_320), __s0_320, __p1_320); \ __ret_320; \ }) #else -#define vcopy_lane_p64(__p0_321, __p1_321, __p2_321, __p3_321) __extension__ ({ \ +#define vcopy_laneq_p64(__p0_321, __p1_321, __p2_321, __p3_321) __extension__ ({ \ poly64x1_t __s0_321 = __p0_321; \ - poly64x1_t __s2_321 = __p2_321; \ + poly64x2_t __s2_321 = __p2_321; \ + poly64x2_t __rev2_321; __rev2_321 = __builtin_shufflevector(__s2_321, __s2_321, 1, 0); \ poly64x1_t __ret_321; \ - __ret_321 = __noswap_vset_lane_p64(__noswap_vget_lane_p64(__s2_321, __p3_321), __s0_321, __p1_321); \ + __ret_321 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_321, __p3_321), __s0_321, __p1_321); \ __ret_321; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_f64(__p0_322, __p1_322, __p2_322, __p3_322) __extension__ ({ \ +#define vcopy_laneq_f64(__p0_322, __p1_322, __p2_322, __p3_322) __extension__ ({ \ float64x1_t __s0_322 = __p0_322; \ - float64x1_t __s2_322 = __p2_322; \ + float64x2_t __s2_322 = __p2_322; \ float64x1_t __ret_322; \ - __ret_322 = vset_lane_f64(vget_lane_f64(__s2_322, __p3_322), __s0_322, __p1_322); \ + __ret_322 = vset_lane_f64(vgetq_lane_f64(__s2_322, __p3_322), __s0_322, __p1_322); \ __ret_322; \ }) #else -#define vcopy_lane_f64(__p0_323, __p1_323, __p2_323, __p3_323) __extension__ ({ \ +#define vcopy_laneq_f64(__p0_323, __p1_323, __p2_323, __p3_323) __extension__ ({ \ float64x1_t __s0_323 = __p0_323; \ - float64x1_t __s2_323 = __p2_323; \ + float64x2_t __s2_323 = __p2_323; \ + float64x2_t __rev2_323; __rev2_323 = __builtin_shufflevector(__s2_323, __s2_323, 1, 0); \ float64x1_t __ret_323; \ - __ret_323 = __noswap_vset_lane_f64(__noswap_vget_lane_f64(__s2_323, __p3_323), __s0_323, __p1_323); \ + __ret_323 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_323, __p3_323), __s0_323, __p1_323); \ __ret_323; \ }) #endif -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p64(__p0_324, __p1_324, __p2_324, __p3_324) __extension__ ({ \ - poly64x2_t __s0_324 = __p0_324; \ - poly64x2_t __s2_324 = __p2_324; \ - poly64x2_t __ret_324; \ - __ret_324 = vsetq_lane_p64(vgetq_lane_p64(__s2_324, __p3_324), __s0_324, __p1_324); \ - __ret_324; \ -}) -#else -#define vcopyq_laneq_p64(__p0_325, __p1_325, __p2_325, __p3_325) __extension__ ({ \ - poly64x2_t __s0_325 = __p0_325; \ - poly64x2_t __s2_325 = __p2_325; \ - poly64x2_t __rev0_325; __rev0_325 = __builtin_shufflevector(__s0_325, __s0_325, 1, 0); \ - poly64x2_t __rev2_325; __rev2_325 = __builtin_shufflevector(__s2_325, __s2_325, 1, 0); \ - poly64x2_t __ret_325; \ - __ret_325 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_325, __p3_325), __rev0_325, __p1_325); \ - __ret_325 = __builtin_shufflevector(__ret_325, __ret_325, 1, 0); \ - __ret_325; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f64(__p0_326, __p1_326, __p2_326, __p3_326) __extension__ ({ \ - float64x2_t __s0_326 = __p0_326; \ - float64x2_t __s2_326 = __p2_326; \ - float64x2_t __ret_326; \ - __ret_326 = vsetq_lane_f64(vgetq_lane_f64(__s2_326, __p3_326), __s0_326, __p1_326); \ - __ret_326; \ -}) -#else -#define vcopyq_laneq_f64(__p0_327, __p1_327, __p2_327, __p3_327) __extension__ ({ \ - float64x2_t __s0_327 = __p0_327; \ - float64x2_t __s2_327 = __p2_327; \ - float64x2_t __rev0_327; __rev0_327 = __builtin_shufflevector(__s0_327, __s0_327, 1, 0); \ - float64x2_t __rev2_327; __rev2_327 = __builtin_shufflevector(__s2_327, __s2_327, 1, 0); \ - float64x2_t __ret_327; \ - __ret_327 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_327, __p3_327), __rev0_327, __p1_327); \ - __ret_327 = __builtin_shufflevector(__ret_327, __ret_327, 1, 0); \ - __ret_327; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p64(__p0_328, __p1_328, __p2_328, __p3_328) __extension__ ({ \ - poly64x1_t __s0_328 = __p0_328; \ - poly64x2_t __s2_328 = __p2_328; \ - poly64x1_t __ret_328; \ - __ret_328 = vset_lane_p64(vgetq_lane_p64(__s2_328, __p3_328), __s0_328, __p1_328); \ - __ret_328; \ -}) -#else -#define vcopy_laneq_p64(__p0_329, __p1_329, __p2_329, __p3_329) __extension__ ({ \ - poly64x1_t __s0_329 = __p0_329; \ - poly64x2_t __s2_329 = __p2_329; \ - poly64x2_t __rev2_329; __rev2_329 = __builtin_shufflevector(__s2_329, __s2_329, 1, 0); \ - poly64x1_t __ret_329; \ - __ret_329 = __noswap_vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_329, __p3_329), __s0_329, __p1_329); \ - __ret_329; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f64(__p0_330, __p1_330, __p2_330, __p3_330) __extension__ ({ \ - float64x1_t __s0_330 = __p0_330; \ - float64x2_t __s2_330 = __p2_330; \ - float64x1_t __ret_330; \ - __ret_330 = vset_lane_f64(vgetq_lane_f64(__s2_330, __p3_330), __s0_330, __p1_330); \ - __ret_330; \ -}) -#else -#define vcopy_laneq_f64(__p0_331, __p1_331, __p2_331, __p3_331) __extension__ ({ \ - float64x1_t __s0_331 = __p0_331; \ - float64x2_t __s2_331 = __p2_331; \ - float64x2_t __rev2_331; __rev2_331 = __builtin_shufflevector(__s2_331, __s2_331, 1, 0); \ - float64x1_t __ret_331; \ - __ret_331 = __noswap_vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_331, __p3_331), __s0_331, __p1_331); \ - __ret_331; \ -}) -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint16x8_t vmlal_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; @@ -73377,52 +62764,38 @@ __ai int32x4_t vmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { } #endif +#define vmulx_lane_f64(__p0_324, __p1_324, __p2_324) __extension__ ({ \ + float64x1_t __s0_324 = __p0_324; \ + float64x1_t __s1_324 = __p1_324; \ + float64x1_t __ret_324; \ + float64_t __x_324 = vget_lane_f64(__s0_324, 0); \ + float64_t __y_324 = vget_lane_f64(__s1_324, __p2_324); \ + float64_t __z_324 = vmulxd_f64(__x_324, __y_324); \ + __ret_324 = vset_lane_f64(__z_324, __s0_324, __p2_324); \ + __ret_324; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vmulx_lane_f64(__p0_332, __p1_332, __p2_332) __extension__ ({ \ - float64x1_t __s0_332 = __p0_332; \ - float64x1_t __s1_332 = __p1_332; \ - float64x1_t __ret_332; \ - float64_t __x_332 = vget_lane_f64(__s0_332, 0); \ - float64_t __y_332 = vget_lane_f64(__s1_332, __p2_332); \ - float64_t __z_332 = vmulxd_f64(__x_332, __y_332); \ - __ret_332 = vset_lane_f64(__z_332, __s0_332, __p2_332); \ - __ret_332; \ +#define vmulx_laneq_f64(__p0_325, __p1_325, __p2_325) __extension__ ({ \ + float64x1_t __s0_325 = __p0_325; \ + float64x2_t __s1_325 = __p1_325; \ + float64x1_t __ret_325; \ + float64_t __x_325 = vget_lane_f64(__s0_325, 0); \ + float64_t __y_325 = vgetq_lane_f64(__s1_325, __p2_325); \ + float64_t __z_325 = vmulxd_f64(__x_325, __y_325); \ + __ret_325 = vset_lane_f64(__z_325, __s0_325, 0); \ + __ret_325; \ }) #else -#define vmulx_lane_f64(__p0_333, __p1_333, __p2_333) __extension__ ({ \ - float64x1_t __s0_333 = __p0_333; \ - float64x1_t __s1_333 = __p1_333; \ - float64x1_t __ret_333; \ - float64_t __x_333 = __noswap_vget_lane_f64(__s0_333, 0); \ - float64_t __y_333 = __noswap_vget_lane_f64(__s1_333, __p2_333); \ - float64_t __z_333 = __noswap_vmulxd_f64(__x_333, __y_333); \ - __ret_333 = __noswap_vset_lane_f64(__z_333, __s0_333, __p2_333); \ - __ret_333; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f64(__p0_334, __p1_334, __p2_334) __extension__ ({ \ - float64x1_t __s0_334 = __p0_334; \ - float64x2_t __s1_334 = __p1_334; \ - float64x1_t __ret_334; \ - float64_t __x_334 = vget_lane_f64(__s0_334, 0); \ - float64_t __y_334 = vgetq_lane_f64(__s1_334, __p2_334); \ - float64_t __z_334 = vmulxd_f64(__x_334, __y_334); \ - __ret_334 = vset_lane_f64(__z_334, __s0_334, 0); \ - __ret_334; \ -}) -#else -#define vmulx_laneq_f64(__p0_335, __p1_335, __p2_335) __extension__ ({ \ - float64x1_t __s0_335 = __p0_335; \ - float64x2_t __s1_335 = __p1_335; \ - float64x2_t __rev1_335; __rev1_335 = __builtin_shufflevector(__s1_335, __s1_335, 1, 0); \ - float64x1_t __ret_335; \ - float64_t __x_335 = __noswap_vget_lane_f64(__s0_335, 0); \ - float64_t __y_335 = __noswap_vgetq_lane_f64(__rev1_335, __p2_335); \ - float64_t __z_335 = __noswap_vmulxd_f64(__x_335, __y_335); \ - __ret_335 = __noswap_vset_lane_f64(__z_335, __s0_335, 0); \ - __ret_335; \ +#define vmulx_laneq_f64(__p0_326, __p1_326, __p2_326) __extension__ ({ \ + float64x1_t __s0_326 = __p0_326; \ + float64x2_t __s1_326 = __p1_326; \ + float64x2_t __rev1_326; __rev1_326 = __builtin_shufflevector(__s1_326, __s1_326, 1, 0); \ + float64x1_t __ret_326; \ + float64_t __x_326 = vget_lane_f64(__s0_326, 0); \ + float64_t __y_326 = __noswap_vgetq_lane_f64(__rev1_326, __p2_326); \ + float64_t __z_326 = vmulxd_f64(__x_326, __y_326); \ + __ret_326 = vset_lane_f64(__z_326, __s0_326, 0); \ + __ret_326; \ }) #endif diff --git a/lib/include/avx512bwintrin.h b/lib/include/avx512bwintrin.h index cb2e07619c..3765584076 100644 --- a/lib/include/avx512bwintrin.h +++ b/lib/include/avx512bwintrin.h @@ -1731,13 +1731,13 @@ _mm512_loadu_epi16 (void const *__P) struct __loadu_epi16 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi16*)__P)->__v; + return ((const struct __loadu_epi16*)__P)->__v; } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, + return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, (__v32hi) __W, (__mmask32) __U); } @@ -1745,7 +1745,7 @@ _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, + return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, (__v32hi) _mm512_setzero_si512 (), (__mmask32) __U); @@ -1757,13 +1757,13 @@ _mm512_loadu_epi8 (void const *__P) struct __loadu_epi8 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi8*)__P)->__v; + return ((const struct __loadu_epi8*)__P)->__v; } static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, + return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, (__v64qi) __W, (__mmask64) __U); } @@ -1771,7 +1771,7 @@ _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) { - return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, + return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, (__v64qi) _mm512_setzero_si512 (), (__mmask64) __U); diff --git a/lib/include/avx512fintrin.h b/lib/include/avx512fintrin.h index 132761f9ef..7465da379b 100644 --- a/lib/include/avx512fintrin.h +++ b/lib/include/avx512fintrin.h @@ -4305,7 +4305,7 @@ _mm512_loadu_si512 (void const *__P) struct __loadu_si512 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_si512*)__P)->__v; + return ((const struct __loadu_si512*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS512 @@ -4314,7 +4314,7 @@ _mm512_loadu_epi32 (void const *__P) struct __loadu_epi32 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi32*)__P)->__v; + return ((const struct __loadu_epi32*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS512 @@ -4341,7 +4341,7 @@ _mm512_loadu_epi64 (void const *__P) struct __loadu_epi64 { __m512i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi64*)__P)->__v; + return ((const struct __loadu_epi64*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS512 @@ -4401,7 +4401,7 @@ _mm512_loadu_pd(void const *__p) struct __loadu_pd { __m512d_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_pd*)__p)->__v; + return ((const struct __loadu_pd*)__p)->__v; } static __inline __m512 __DEFAULT_FN_ATTRS512 @@ -4410,13 +4410,13 @@ _mm512_loadu_ps(void const *__p) struct __loadu_ps { __m512_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_ps*)__p)->__v; + return ((const struct __loadu_ps*)__p)->__v; } static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p) { - return *(__m512*)__p; + return *(const __m512*)__p; } static __inline __m512 __DEFAULT_FN_ATTRS512 @@ -4439,7 +4439,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void const *__P) static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p) { - return *(__m512d*)__p; + return *(const __m512d*)__p; } static __inline __m512d __DEFAULT_FN_ATTRS512 @@ -4462,19 +4462,19 @@ _mm512_maskz_load_pd(__mmask8 __U, void const *__P) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512 (void const *__P) { - return *(__m512i *) __P; + return *(const __m512i *) __P; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32 (void const *__P) { - return *(__m512i *) __P; + return *(const __m512i *) __P; } static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64 (void const *__P) { - return *(__m512i *) __P; + return *(const __m512i *) __P; } /* SIMD store ops */ @@ -7658,13 +7658,13 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) #define _mm512_i32gather_ps(index, addr, scale) \ (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ (void const *)(addr), \ - (__v16sf)(__m512)(index), \ + (__v16si)(__m512)(index), \ (__mmask16)-1, (int)(scale)) #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ (void const *)(addr), \ - (__v16sf)(__m512)(index), \ + (__v16si)(__m512)(index), \ (__mmask16)(mask), (int)(scale)) #define _mm512_i32gather_epi32(index, addr, scale) \ @@ -8436,7 +8436,7 @@ _store_mask16(__mmask16 *__A, __mmask16 __B) { } static __inline__ void __DEFAULT_FN_ATTRS512 -_mm512_stream_si512 (__m512i * __P, __m512i __A) +_mm512_stream_si512 (void * __P, __m512i __A) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); @@ -8450,14 +8450,14 @@ _mm512_stream_load_si512 (void const *__P) } static __inline__ void __DEFAULT_FN_ATTRS512 -_mm512_stream_pd (double *__P, __m512d __A) +_mm512_stream_pd (void *__P, __m512d __A) { typedef __v8df __v8df_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); } static __inline__ void __DEFAULT_FN_ATTRS512 -_mm512_stream_ps (float *__P, __m512 __A) +_mm512_stream_ps (void *__P, __m512 __A) { typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); @@ -8724,13 +8724,13 @@ _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) (__v4sf)_mm_setzero_ps(), 0, 4, 4, 4); - return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1); + return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss (__mmask8 __U, const float* __A) { - return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A, + return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A, (__v4sf) _mm_setzero_ps(), __U & 1); } @@ -8742,13 +8742,13 @@ _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) (__v2df)_mm_setzero_pd(), 0, 2); - return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1); + return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd (__mmask8 __U, const double* __A) { - return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, + return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, (__v2df) _mm_setzero_pd(), __U & 1); } @@ -9659,6 +9659,23 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { } #undef _mm512_mask_reduce_operator +/// Moves the least significant 32 bits of a vector of [16 x i32] to a +/// 32-bit signed integer value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VMOVD / MOVD instruction. +/// +/// \param __A +/// A vector of [16 x i32]. The least significant 32 bits are moved to the +/// destination. +/// \returns A 32-bit signed integer containing the moved value. +static __inline__ int __DEFAULT_FN_ATTRS512 +_mm512_cvtsi512_si32(__m512i __A) { + __v16si __b = (__v16si)__A; + return __b[0]; +} + #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512vlbwintrin.h b/lib/include/avx512vlbwintrin.h index ead09466bc..cd9f2400da 100644 --- a/lib/include/avx512vlbwintrin.h +++ b/lib/include/avx512vlbwintrin.h @@ -2289,13 +2289,13 @@ _mm_loadu_epi16 (void const *__P) struct __loadu_epi16 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi16*)__P)->__v; + return ((const struct __loadu_epi16*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, + return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, (__v8hi) __W, (__mmask8) __U); } @@ -2303,7 +2303,7 @@ _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, + return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, (__v8hi) _mm_setzero_si128 (), (__mmask8) __U); @@ -2315,13 +2315,13 @@ _mm256_loadu_epi16 (void const *__P) struct __loadu_epi16 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi16*)__P)->__v; + return ((const struct __loadu_epi16*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, + return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, (__v16hi) __W, (__mmask16) __U); } @@ -2329,7 +2329,7 @@ _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, + return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, (__v16hi) _mm256_setzero_si256 (), (__mmask16) __U); @@ -2341,13 +2341,13 @@ _mm_loadu_epi8 (void const *__P) struct __loadu_epi8 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi8*)__P)->__v; + return ((const struct __loadu_epi8*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, + return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, (__v16qi) __W, (__mmask16) __U); } @@ -2355,7 +2355,7 @@ _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, + return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, (__v16qi) _mm_setzero_si128 (), (__mmask16) __U); @@ -2367,13 +2367,13 @@ _mm256_loadu_epi8 (void const *__P) struct __loadu_epi8 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi8*)__P)->__v; + return ((const struct __loadu_epi8*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, + return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, (__v32qi) __W, (__mmask32) __U); } @@ -2381,7 +2381,7 @@ _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, + return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, (__v32qi) _mm256_setzero_si256 (), (__mmask32) __U); diff --git a/lib/include/avx512vlintrin.h b/lib/include/avx512vlintrin.h index 9494fc8a6e..9d1d791bb2 100644 --- a/lib/include/avx512vlintrin.h +++ b/lib/include/avx512vlintrin.h @@ -2505,7 +2505,7 @@ _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, (__v2df) __W, (__mmask8) __U); @@ -2513,7 +2513,7 @@ _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, (__v2df) _mm_setzero_pd (), (__mmask8) @@ -2522,7 +2522,7 @@ _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, (__v4df) __W, (__mmask8) __U); @@ -2530,7 +2530,7 @@ _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, (__v4df) _mm256_setzero_pd (), (__mmask8) @@ -2539,7 +2539,7 @@ _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, (__v2di) __W, (__mmask8) __U); @@ -2547,7 +2547,7 @@ _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, (__v2di) _mm_setzero_si128 (), (__mmask8) @@ -2557,7 +2557,7 @@ _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, (__v4di) __W, (__mmask8) __U); @@ -2565,7 +2565,7 @@ _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, (__v4di) _mm256_setzero_si256 (), (__mmask8) @@ -2574,14 +2574,14 @@ _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, (__v4sf) _mm_setzero_ps (), (__mmask8) @@ -2590,14 +2590,14 @@ _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, (__v8sf) _mm256_setzero_ps (), (__mmask8) @@ -2606,7 +2606,7 @@ _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, (__v4si) __W, (__mmask8) __U); @@ -2614,7 +2614,7 @@ _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); @@ -2623,7 +2623,7 @@ _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, (__v8si) __W, (__mmask8) __U); @@ -2631,7 +2631,7 @@ _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, (__v8si) _mm256_setzero_si256 (), (__mmask8) @@ -5073,13 +5073,13 @@ _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32 (void const *__P) { - return *(__m128i *) __P; + return *(const __m128i *) __P; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, (__v4si) __W, (__mmask8) __U); @@ -5088,7 +5088,7 @@ _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, (__v4si) _mm_setzero_si128 (), (__mmask8) @@ -5098,13 +5098,13 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32 (void const *__P) { - return *(__m256i *) __P; + return *(const __m256i *) __P; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, (__v8si) __W, (__mmask8) __U); @@ -5113,7 +5113,7 @@ _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, (__v8si) _mm256_setzero_si256 (), (__mmask8) @@ -5183,13 +5183,13 @@ _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64 (void const *__P) { - return *(__m128i *) __P; + return *(const __m128i *) __P; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, (__v2di) __W, (__mmask8) __U); @@ -5198,7 +5198,7 @@ _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, (__v2di) _mm_setzero_si128 (), (__mmask8) @@ -5208,13 +5208,13 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64 (void const *__P) { - return *(__m256i *) __P; + return *(const __m256i *) __P; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, (__v4di) __W, (__mmask8) __U); @@ -5223,7 +5223,7 @@ _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, (__v4di) _mm256_setzero_si256 (), (__mmask8) @@ -5430,7 +5430,7 @@ _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, (__v2df) __W, (__mmask8) __U); } @@ -5438,7 +5438,7 @@ _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd (__mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, (__v2df) _mm_setzero_pd (), (__mmask8) __U); @@ -5447,7 +5447,7 @@ _mm_maskz_load_pd (__mmask8 __U, void const *__P) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, (__v4df) __W, (__mmask8) __U); } @@ -5455,7 +5455,7 @@ _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); @@ -5464,7 +5464,7 @@ _mm256_maskz_load_pd (__mmask8 __U, void const *__P) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } @@ -5472,7 +5472,7 @@ _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps (__mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); @@ -5481,7 +5481,7 @@ _mm_maskz_load_ps (__mmask8 __U, void const *__P) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } @@ -5489,7 +5489,7 @@ _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); @@ -5501,13 +5501,13 @@ _mm_loadu_epi64 (void const *__P) struct __loadu_epi64 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi64*)__P)->__v; + return ((const struct __loadu_epi64*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, (__v2di) __W, (__mmask8) __U); } @@ -5515,7 +5515,7 @@ _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, + return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, (__v2di) _mm_setzero_si128 (), (__mmask8) __U); @@ -5527,13 +5527,13 @@ _mm256_loadu_epi64 (void const *__P) struct __loadu_epi64 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi64*)__P)->__v; + return ((const struct __loadu_epi64*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, (__v4di) __W, (__mmask8) __U); } @@ -5541,7 +5541,7 @@ _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, + return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); @@ -5553,13 +5553,13 @@ _mm_loadu_epi32 (void const *__P) struct __loadu_epi32 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi32*)__P)->__v; + return ((const struct __loadu_epi32*)__P)->__v; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, (__v4si) __W, (__mmask8) __U); } @@ -5567,7 +5567,7 @@ _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { - return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, + return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); @@ -5579,13 +5579,13 @@ _mm256_loadu_epi32 (void const *__P) struct __loadu_epi32 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_epi32*)__P)->__v; + return ((const struct __loadu_epi32*)__P)->__v; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, (__v8si) __W, (__mmask8) __U); } @@ -5593,7 +5593,7 @@ _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { - return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, + return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U); @@ -5602,7 +5602,7 @@ _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, (__v2df) __W, (__mmask8) __U); } @@ -5610,7 +5610,7 @@ _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) { - return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, + return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, (__v2df) _mm_setzero_pd (), (__mmask8) __U); @@ -5619,7 +5619,7 @@ _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, (__v4df) __W, (__mmask8) __U); } @@ -5627,7 +5627,7 @@ _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) { - return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, + return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); @@ -5636,7 +5636,7 @@ _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } @@ -5644,7 +5644,7 @@ _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) { - return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, + return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); @@ -5653,7 +5653,7 @@ _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } @@ -5661,7 +5661,7 @@ _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) { - return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, + return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); diff --git a/lib/include/avxintrin.h b/lib/include/avxintrin.h index a01240b9d1..84421bf1b9 100644 --- a/lib/include/avxintrin.h +++ b/lib/include/avxintrin.h @@ -3069,7 +3069,7 @@ _mm256_broadcast_ps(__m128 const *__a) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_load_pd(double const *__p) { - return *(__m256d *)__p; + return *(const __m256d *)__p; } /// Loads 8 single-precision floating point values from a 32-byte aligned @@ -3085,7 +3085,7 @@ _mm256_load_pd(double const *__p) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_load_ps(float const *__p) { - return *(__m256 *)__p; + return *(const __m256 *)__p; } /// Loads 4 double-precision floating point values from an unaligned @@ -3105,7 +3105,7 @@ _mm256_loadu_pd(double const *__p) struct __loadu_pd { __m256d_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_pd*)__p)->__v; + return ((const struct __loadu_pd*)__p)->__v; } /// Loads 8 single-precision floating point values from an unaligned @@ -3125,7 +3125,7 @@ _mm256_loadu_ps(float const *__p) struct __loadu_ps { __m256_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_ps*)__p)->__v; + return ((const struct __loadu_ps*)__p)->__v; } /// Loads 256 bits of integer data from a 32-byte aligned memory @@ -3161,7 +3161,7 @@ _mm256_loadu_si256(__m256i_u const *__p) struct __loadu_si256 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_si256*)__p)->__v; + return ((const struct __loadu_si256*)__p)->__v; } /// Loads 256 bits of integer data from an unaligned memory location diff --git a/lib/include/bmiintrin.h b/lib/include/bmiintrin.h index b7af62f609..841bd84070 100644 --- a/lib/include/bmiintrin.h +++ b/lib/include/bmiintrin.h @@ -14,27 +14,13 @@ #ifndef __BMIINTRIN_H #define __BMIINTRIN_H -#define _tzcnt_u16(a) (__tzcnt_u16((a))) - -#define _andn_u32(a, b) (__andn_u32((a), (b))) - -/* _bextr_u32 != __bextr_u32 */ -#define _blsi_u32(a) (__blsi_u32((a))) - -#define _blsmsk_u32(a) (__blsmsk_u32((a))) - -#define _blsr_u32(a) (__blsr_u32((a))) - -#define _tzcnt_u32(a) (__tzcnt_u32((a))) - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) - /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT instruction behaves as BSF on non-BMI targets, there is code that expects to use it as a potentially faster version of BSF. */ #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define _tzcnt_u16(a) (__tzcnt_u16((a))) + /// Counts the number of trailing zero bits in the operand. /// /// \headerfile @@ -51,6 +37,94 @@ __tzcnt_u16(unsigned short __X) return __builtin_ia32_tzcnt_u16(__X); } +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 32-bit integer containing the number of trailing zero +/// bits in the operand. +static __inline__ unsigned int __RELAXED_FN_ATTRS +__tzcnt_u32(unsigned int __X) +{ + return __builtin_ia32_tzcnt_u32(__X); +} + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An 32-bit integer containing the number of trailing zero bits in +/// the operand. +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __builtin_ia32_tzcnt_u32(__X); +} + +#define _tzcnt_u32(a) (__tzcnt_u32((a))) + +#ifdef __x86_64__ + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 64-bit integer containing the number of trailing zero +/// bits in the operand. +static __inline__ unsigned long long __RELAXED_FN_ATTRS +__tzcnt_u64(unsigned long long __X) +{ + return __builtin_ia32_tzcnt_u64(__X); +} + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An 64-bit integer containing the number of trailing zero bits in +/// the operand. +static __inline__ long long __RELAXED_FN_ATTRS +_mm_tzcnt_64(unsigned long long __X) +{ + return __builtin_ia32_tzcnt_u64(__X); +} + +#define _tzcnt_u64(a) (__tzcnt_u64((a))) + +#endif /* __x86_64__ */ + +#undef __RELAXED_FN_ATTRS + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) + +#define _andn_u32(a, b) (__andn_u32((a), (b))) + +/* _bextr_u32 != __bextr_u32 */ +#define _blsi_u32(a) (__blsi_u32((a))) + +#define _blsmsk_u32(a) (__blsmsk_u32((a))) + +#define _blsr_u32(a) (__blsr_u32((a))) + /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// @@ -169,38 +243,6 @@ __blsr_u32(unsigned int __X) return __X & (__X - 1); } -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An unsigned 32-bit integer containing the number of trailing zero -/// bits in the operand. -static __inline__ unsigned int __RELAXED_FN_ATTRS -__tzcnt_u32(unsigned int __X) -{ - return __builtin_ia32_tzcnt_u32(__X); -} - -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An 32-bit integer containing the number of trailing zero bits in -/// the operand. -static __inline__ int __RELAXED_FN_ATTRS -_mm_tzcnt_32(unsigned int __X) -{ - return __builtin_ia32_tzcnt_u32(__X); -} - #ifdef __x86_64__ #define _andn_u64(a, b) (__andn_u64((a), (b))) @@ -212,8 +254,6 @@ _mm_tzcnt_32(unsigned int __X) #define _blsr_u64(a) (__blsr_u64((a))) -#define _tzcnt_u64(a) (__tzcnt_u64((a))) - /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// @@ -332,41 +372,10 @@ __blsr_u64(unsigned long long __X) return __X & (__X - 1); } -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 64-bit integer whose trailing zeros are to be counted. -/// \returns An unsigned 64-bit integer containing the number of trailing zero -/// bits in the operand. -static __inline__ unsigned long long __RELAXED_FN_ATTRS -__tzcnt_u64(unsigned long long __X) -{ - return __builtin_ia32_tzcnt_u64(__X); -} - -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 64-bit integer whose trailing zeros are to be counted. -/// \returns An 64-bit integer containing the number of trailing zero bits in -/// the operand. -static __inline__ long long __RELAXED_FN_ATTRS -_mm_tzcnt_64(unsigned long long __X) -{ - return __builtin_ia32_tzcnt_u64(__X); -} - #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS -#undef __RELAXED_FN_ATTRS + +#endif /* !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) */ #endif /* __BMIINTRIN_H */ diff --git a/lib/include/cpuid.h b/lib/include/cpuid.h index 02ffac26c0..4ddd64847c 100644 --- a/lib/include/cpuid.h +++ b/lib/include/cpuid.h @@ -38,8 +38,8 @@ #define signature_TM2_ecx 0x3638784d /* NSC: "Geode by NSC" */ #define signature_NSC_ebx 0x646f6547 -#define signature_NSC_edx 0x43534e20 -#define signature_NSC_ecx 0x79622065 +#define signature_NSC_edx 0x79622065 +#define signature_NSC_ecx 0x43534e20 /* NEXGEN: "NexGenDriven" */ #define signature_NEXGEN_ebx 0x4778654e #define signature_NEXGEN_edx 0x72446e65 diff --git a/lib/include/emmintrin.h b/lib/include/emmintrin.h index c8fefdfc79..993c688ce8 100644 --- a/lib/include/emmintrin.h +++ b/lib/include/emmintrin.h @@ -1578,7 +1578,7 @@ _mm_cvtsd_f64(__m128d __a) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { - return *(__m128d*)__dp; + return *(const __m128d*)__dp; } /// Loads a double-precision floating-point value from a specified memory @@ -1599,7 +1599,7 @@ _mm_load1_pd(double const *__dp) struct __mm_load1_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); - double __u = ((struct __mm_load1_pd_struct*)__dp)->__u; + double __u = ((const struct __mm_load1_pd_struct*)__dp)->__u; return __extension__ (__m128d){ __u, __u }; } @@ -1622,7 +1622,7 @@ _mm_load1_pd(double const *__dp) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { - __m128d __u = *(__m128d*)__dp; + __m128d __u = *(const __m128d*)__dp; return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); } @@ -1643,7 +1643,7 @@ _mm_loadu_pd(double const *__dp) struct __loadu_pd { __m128d_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_pd*)__dp)->__v; + return ((const struct __loadu_pd*)__dp)->__v; } /// Loads a 64-bit integer value to the low element of a 128-bit integer @@ -1663,7 +1663,7 @@ _mm_loadu_si64(void const *__a) struct __loadu_si64 { long long __v; } __attribute__((__packed__, __may_alias__)); - long long __u = ((struct __loadu_si64*)__a)->__v; + long long __u = ((const struct __loadu_si64*)__a)->__v; return __extension__ (__m128i)(__v2di){__u, 0LL}; } @@ -1684,7 +1684,7 @@ _mm_loadu_si32(void const *__a) struct __loadu_si32 { int __v; } __attribute__((__packed__, __may_alias__)); - int __u = ((struct __loadu_si32*)__a)->__v; + int __u = ((const struct __loadu_si32*)__a)->__v; return __extension__ (__m128i)(__v4si){__u, 0, 0, 0}; } @@ -1705,7 +1705,7 @@ _mm_loadu_si16(void const *__a) struct __loadu_si16 { short __v; } __attribute__((__packed__, __may_alias__)); - short __u = ((struct __loadu_si16*)__a)->__v; + short __u = ((const struct __loadu_si16*)__a)->__v; return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; } @@ -1726,7 +1726,7 @@ _mm_load_sd(double const *__dp) struct __mm_load_sd_struct { double __u; } __attribute__((__packed__, __may_alias__)); - double __u = ((struct __mm_load_sd_struct*)__dp)->__u; + double __u = ((const struct __mm_load_sd_struct*)__dp)->__u; return __extension__ (__m128d){ __u, 0 }; } @@ -1753,7 +1753,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp) struct __mm_loadh_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); - double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u; + double __u = ((const struct __mm_loadh_pd_struct*)__dp)->__u; return __extension__ (__m128d){ __a[0], __u }; } @@ -1780,7 +1780,7 @@ _mm_loadl_pd(__m128d __a, double const *__dp) struct __mm_loadl_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); - double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u; + double __u = ((const struct __mm_loadl_pd_struct*)__dp)->__u; return __extension__ (__m128d){ __u, __a[1] }; } @@ -2288,7 +2288,7 @@ _mm_adds_epu16(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); } -/// Computes the rounded avarages of corresponding elements of two +/// Computes the rounded averages of corresponding elements of two /// 128-bit unsigned [16 x i8] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// @@ -2308,7 +2308,7 @@ _mm_avg_epu8(__m128i __a, __m128i __b) return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); } -/// Computes the rounded avarages of corresponding elements of two +/// Computes the rounded averages of corresponding elements of two /// 128-bit unsigned [8 x i16] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// @@ -3550,7 +3550,7 @@ _mm_loadu_si128(__m128i_u const *__p) struct __loadu_si128 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_si128*)__p)->__v; + return ((const struct __loadu_si128*)__p)->__v; } /// Returns a vector of [2 x i64] where the lower element is taken from @@ -3571,7 +3571,7 @@ _mm_loadl_epi64(__m128i_u const *__p) struct __mm_loadl_epi64_struct { long long __u; } __attribute__((__packed__, __may_alias__)); - return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; + return __extension__ (__m128i) { ((const struct __mm_loadl_epi64_struct*)__p)->__u, 0}; } /// Generates a 128-bit vector of [4 x i32] with unspecified content. diff --git a/lib/include/ia32intrin.h b/lib/include/ia32intrin.h index 8e38df7318..79b7f0655c 100644 --- a/lib/include/ia32intrin.h +++ b/lib/include/ia32intrin.h @@ -195,6 +195,74 @@ __writeeflags(unsigned int __f) } #endif /* !__x86_64__ */ +/** Cast a 32-bit float value to a 32-bit unsigned integer value + * + * \headerfile + * This intrinsic corresponds to the VMOVD / MOVD instruction in x86_64, + * and corresponds to the VMOVL / MOVL instruction in ia32. + * + * \param __A + * A 32-bit float value. + * \returns a 32-bit unsigned integer containing the converted value. + */ +static __inline__ unsigned int __attribute__((__always_inline__)) +_castf32_u32(float __A) { + unsigned int D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + +/** Cast a 64-bit float value to a 64-bit unsigned integer value + * + * \headerfile + * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, + * and corresponds to the VMOVL / MOVL instruction in ia32. + * + * \param __A + * A 64-bit float value. + * \returns a 64-bit unsigned integer containing the converted value. + */ +static __inline__ unsigned long long __attribute__((__always_inline__)) +_castf64_u64(double __A) { + unsigned long long D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + +/** Cast a 32-bit unsigned integer value to a 32-bit float value + * + * \headerfile + * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, + * and corresponds to the FLDS instruction in ia32. + * + * \param __A + * A 32-bit unsigned integer value. + * \returns a 32-bit float value containing the converted value. + */ +static __inline__ float __attribute__((__always_inline__)) +_castu32_f32(unsigned int __A) { + float D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + +/** Cast a 64-bit unsigned integer value to a 64-bit float value + * + * \headerfile + * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, + * and corresponds to the FLDL instruction in ia32. + * + * \param __A + * A 64-bit unsigned integer value. + * \returns a 64-bit float value containing the converted value. + */ +static __inline__ double __attribute__((__always_inline__)) +_castu64_f64(unsigned long long __A) { + double D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + /** Adds the unsigned integer operand to the CRC-32C checksum of the * unsigned char operand. * diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h index 7555ad82fa..edf8c42ec4 100644 --- a/lib/include/immintrin.h +++ b/lib/include/immintrin.h @@ -64,9 +64,8 @@ #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) +/* No feature check desired due to internal checks */ #include -#endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) #include @@ -302,7 +301,7 @@ _loadbe_i16(void const * __P) { struct __loadu_i16 { short __v; } __attribute__((__packed__, __may_alias__)); - return __builtin_bswap16(((struct __loadu_i16*)__P)->__v); + return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) @@ -318,7 +317,7 @@ _loadbe_i32(void const * __P) { struct __loadu_i32 { int __v; } __attribute__((__packed__, __may_alias__)); - return __builtin_bswap32(((struct __loadu_i32*)__P)->__v); + return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) @@ -335,7 +334,7 @@ _loadbe_i64(void const * __P) { struct __loadu_i64 { long long __v; } __attribute__((__packed__, __may_alias__)); - return __builtin_bswap64(((struct __loadu_i64*)__P)->__v); + return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) diff --git a/lib/include/intrin.h b/lib/include/intrin.h index 9786ba147f..f85f7a2beb 100644 --- a/lib/include/intrin.h +++ b/lib/include/intrin.h @@ -36,6 +36,12 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#if __x86_64__ +#define __LPTRINT_TYPE__ __int64 +#else +#define __LPTRINT_TYPE__ long +#endif + #ifdef __cplusplus extern "C" { #endif @@ -94,8 +100,7 @@ void __outword(unsigned short, unsigned short); void __outwordstring(unsigned short, unsigned short *, unsigned long); unsigned long __readcr0(void); unsigned long __readcr2(void); -static __inline__ -unsigned long __readcr3(void); +unsigned __LPTRINT_TYPE__ __readcr3(void); unsigned long __readcr4(void); unsigned long __readcr8(void); unsigned int __readdr(unsigned int); @@ -132,7 +137,7 @@ void __vmx_vmptrst(unsigned __int64 *); void __wbinvd(void); void __writecr0(unsigned int); static __inline__ -void __writecr3(unsigned int); +void __writecr3(unsigned __INTPTR_TYPE__); void __writecr4(unsigned int); void __writecr8(unsigned int); void __writedr(unsigned int, unsigned int); @@ -164,7 +169,6 @@ long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); long _InterlockedExchangeAdd_HLERelease(long volatile *, long); __int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64); __int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64); -void __cdecl _invpcid(unsigned int, void *); static __inline__ void __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _ReadBarrier(void); @@ -565,24 +569,26 @@ __readmsr(unsigned long __register) { __asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register)); return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; } +#endif -static __inline__ unsigned long __DEFAULT_FN_ATTRS +static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) { - unsigned long __cr3_val; - __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory"); + unsigned __LPTRINT_TYPE__ __cr3_val; + __asm__ __volatile__ ("mov %%cr3, %0" : "=r"(__cr3_val) : : "memory"); return __cr3_val; } static __inline__ void __DEFAULT_FN_ATTRS -__writecr3(unsigned int __cr3_val) { - __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory"); +__writecr3(unsigned __INTPTR_TYPE__ __cr3_val) { + __asm__ ("mov %0, %%cr3" : : "r"(__cr3_val) : "memory"); } -#endif #ifdef __cplusplus } #endif +#undef __LPTRINT_TYPE__ + #undef __DEFAULT_FN_ATTRS #endif /* __INTRIN_H */ diff --git a/lib/include/mwaitxintrin.h b/lib/include/mwaitxintrin.h index bca395b0e0..ed485380af 100644 --- a/lib/include/mwaitxintrin.h +++ b/lib/include/mwaitxintrin.h @@ -17,9 +17,9 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx"))) static __inline__ void __DEFAULT_FN_ATTRS -_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints) +_mm_monitorx(void * __p, unsigned __extensions, unsigned __hints) { - __builtin_ia32_monitorx((void *)__p, __extensions, __hints); + __builtin_ia32_monitorx(__p, __extensions, __hints); } static __inline__ void __DEFAULT_FN_ATTRS diff --git a/lib/include/opencl-c-base.h b/lib/include/opencl-c-base.h index 9a23333a33..430e07d36f 100644 --- a/lib/include/opencl-c-base.h +++ b/lib/include/opencl-c-base.h @@ -406,7 +406,7 @@ typedef enum memory_order #define CLK_OUT_OF_RESOURCES -5 #define CLK_NULL_QUEUE 0 -#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t)) +#define CLK_NULL_EVENT (__builtin_astype(((__SIZE_MAX__)), clk_event_t)) // execution model related definitions #define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0 diff --git a/lib/include/pmmintrin.h b/lib/include/pmmintrin.h index c376f298cc..a83b2eb6d8 100644 --- a/lib/include/pmmintrin.h +++ b/lib/include/pmmintrin.h @@ -263,7 +263,7 @@ _mm_movedup_pd(__m128d __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) { - __builtin_ia32_monitor((void *)__p, __extensions, __hints); + __builtin_ia32_monitor(__p, __extensions, __hints); } /// Used with the MONITOR instruction to wait while the processor is in diff --git a/lib/include/ppc_wrappers/emmintrin.h b/lib/include/ppc_wrappers/emmintrin.h index 617ce24acd..4dcb8485e2 100644 --- a/lib/include/ppc_wrappers/emmintrin.h +++ b/lib/include/ppc_wrappers/emmintrin.h @@ -35,6 +35,8 @@ #ifndef EMMINTRIN_H_ #define EMMINTRIN_H_ +#if defined(__linux__) && defined(__ppc64__) + #include /* We need definitions from the SSE header files. */ @@ -1747,7 +1749,7 @@ _mm_sll_epi64 (__m128i __A, __m128i __B) lshift = vec_splat ((__v2du) __B, 0); shmask = vec_cmplt (lshift, shmax); result = vec_sl ((__v2du) __A, lshift); - result = vec_sel ((__v2du) shmask, result, shmask); + result = (__v2du)vec_sel ((__v2df) shmask, (__v2df)result, shmask); return (__m128i) result; } @@ -1841,7 +1843,7 @@ _mm_srl_epi64 (__m128i __A, __m128i __B) rshift = vec_splat ((__v2du) __B, 0); shmask = vec_cmplt (rshift, shmax); result = vec_sr ((__v2du) __A, rshift); - result = vec_sel ((__v2du) shmask, result, shmask); + result = (__v2du)vec_sel ((__v2df) shmask, (__v2df)result, shmask); return (__m128i) result; } @@ -2315,4 +2317,8 @@ _mm_castsi128_pd(__m128i __A) return (__m128d) __A; } +#else +#include_next +#endif /* defined(__linux__) && defined(__ppc64__) */ + #endif /* EMMINTRIN_H_ */ diff --git a/lib/include/ppc_wrappers/mm_malloc.h b/lib/include/ppc_wrappers/mm_malloc.h index d91d7865c8..24b14c8e07 100644 --- a/lib/include/ppc_wrappers/mm_malloc.h +++ b/lib/include/ppc_wrappers/mm_malloc.h @@ -10,6 +10,8 @@ #ifndef _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED +#if defined(__linux__) && defined(__ppc64__) + #include /* We can't depend on since the prototype of posix_memalign @@ -41,4 +43,8 @@ _mm_free (void * ptr) free (ptr); } +#else +#include_next +#endif + #endif /* _MM_MALLOC_H_INCLUDED */ diff --git a/lib/include/ppc_wrappers/mmintrin.h b/lib/include/ppc_wrappers/mmintrin.h index b949653adf..c55c44726f 100644 --- a/lib/include/ppc_wrappers/mmintrin.h +++ b/lib/include/ppc_wrappers/mmintrin.h @@ -35,6 +35,8 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED +#if defined(__linux__) && defined(__ppc64__) + #include /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ @@ -1440,4 +1442,9 @@ extern __inline __m64 return (res.as_m64); #endif } + +#else +#include_next +#endif /* defined(__linux__) && defined(__ppc64__) */ + #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/lib/include/ppc_wrappers/pmmintrin.h b/lib/include/ppc_wrappers/pmmintrin.h new file mode 100644 index 0000000000..6d93383d54 --- /dev/null +++ b/lib/include/ppc_wrappers/pmmintrin.h @@ -0,0 +1,150 @@ +/*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. + + In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA + is a good match for most SIMD operations. However the Horizontal + add/sub requires the data pairs be permuted into a separate + registers with vertical even/odd alignment for the operation. + And the addsub operation requires the sign of only the even numbered + elements be flipped (xored with -0.0). + For larger blocks of code using these intrinsic implementations, + the compiler be should be able to schedule instructions to avoid + additional latency. + + In the specific case of the monitor and mwait instructions there are + no direct equivalent in the PowerISA at this time. So those + intrinsics are not implemented. */ +#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." +#endif + +#ifndef PMMINTRIN_H_ +#define PMMINTRIN_H_ + +#if defined(__linux__) && defined(__ppc64__) + +/* We need definitions from the SSE2 and SSE header files*/ +#include + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_addsub_ps (__m128 __X, __m128 __Y) +{ + const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0}; + __v4sf even_neg_Y = vec_xor(__Y, even_n0); + return (__m128) vec_add (__X, even_neg_Y); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_addsub_pd (__m128d __X, __m128d __Y) +{ + const __v2df even_n0 = {-0.0, 0.0}; + __v2df even_neg_Y = vec_xor(__Y, even_n0); + return (__m128d) vec_add (__X, even_neg_Y); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadd_ps (__m128 __X, __m128 __Y) +{ + __vector unsigned char xform2 = { + 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, + 0x18, 0x19, 0x1A, 0x1B + }; + __vector unsigned char xform1 = { + 0x04, 0x05, 0x06, 0x07, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, + 0x1C, 0x1D, 0x1E, 0x1F + }; + return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2), + vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsub_ps (__m128 __X, __m128 __Y) +{ + __vector unsigned char xform2 = { + 0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, + 0x18, 0x19, 0x1A, 0x1B + }; + __vector unsigned char xform1 = { + 0x04, 0x05, 0x06, 0x07, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, + 0x1C, 0x1D, 0x1E, 0x1F + }; + return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2), + vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadd_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y), + vec_mergel ((__v2df) __X, (__v2df)__Y)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsub_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y), + vec_mergel ((__v2df) __X, (__v2df)__Y)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movehdup_ps (__m128 __X) +{ + return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_moveldup_ps (__m128 __X) +{ + return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loaddup_pd (double const *__P) +{ + return (__m128d) vec_splats (*__P); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movedup_pd (__m128d __X) +{ + return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0)); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_lddqu_si128 (__m128i const *__P) +{ + return (__m128i) (vec_vsx_ld(0, (signed int const *)__P)); +} + +/* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */ + +#else +#include_next +#endif /* defined(__linux__) && defined(__ppc64__) */ + +#endif /* PMMINTRIN_H_ */ diff --git a/lib/include/ppc_wrappers/smmintrin.h b/lib/include/ppc_wrappers/smmintrin.h new file mode 100644 index 0000000000..56ef6ba76b --- /dev/null +++ b/lib/include/ppc_wrappers/smmintrin.h @@ -0,0 +1,85 @@ +/*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. + + NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerp64/powerpc64le. + + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ +#error \ + "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." +#endif + +#ifndef SMMINTRIN_H_ +#define SMMINTRIN_H_ + +#if defined(__linux__) && defined(__ppc64__) + +#include +#include + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_extract_epi8(__m128i __X, const int __N) { + return (unsigned char)((__v16qi)__X)[__N & 15]; +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_extract_epi32(__m128i __X, const int __N) { + return ((__v4si)__X)[__N & 3]; +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_extract_epi64(__m128i __X, const int __N) { + return ((__v2di)__X)[__N & 1]; +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_extract_ps(__m128 __X, const int __N) { + return ((__v4si)__X)[__N & 3]; +} + +extern __inline __m128i + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) { + __v16qi __charmask = vec_splats((signed char)__imm8); + __charmask = vec_gb(__charmask); + __v8hu __shortmask = (__v8hu)vec_unpackh(__charmask); +#ifdef __BIG_ENDIAN__ + __shortmask = vec_reve(__shortmask); +#endif + return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask); +} + +extern __inline __m128i + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) { + const __v16qu __seven = vec_splats((unsigned char)0x07); + __v16qu __lmask = vec_sra((__v16qu)__mask, __seven); + return (__m128i)vec_sel((__v16qu)__A, (__v16qu)__B, __lmask); +} + +#else +#include_next +#endif /* defined(__linux__) && defined(__ppc64__) */ + +#endif /* _SMMINTRIN_H_ */ diff --git a/lib/include/ppc_wrappers/tmmintrin.h b/lib/include/ppc_wrappers/tmmintrin.h new file mode 100644 index 0000000000..b5a935d5e4 --- /dev/null +++ b/lib/include/ppc_wrappers/tmmintrin.h @@ -0,0 +1,495 @@ +/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ +#endif + +#ifndef TMMINTRIN_H_ +#define TMMINTRIN_H_ + +#if defined(__linux__) && defined(__ppc64__) + +#include + +/* We need definitions from the SSE header files. */ +#include + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_epi16 (__m128i __A) +{ + return (__m128i) vec_abs ((__v8hi) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_epi32 (__m128i __A) +{ + return (__m128i) vec_abs ((__v4si) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_epi8 (__m128i __A) +{ + return (__m128i) vec_abs ((__v16qi) __A); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_pi16 (__m64 __A) +{ + __v8hi __B = (__v8hi) (__v2du) { __A, __A }; + return (__m64) ((__v2du) vec_abs (__B))[0]; +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_pi32 (__m64 __A) +{ + __v4si __B = (__v4si) (__v2du) { __A, __A }; + return (__m64) ((__v2du) vec_abs (__B))[0]; +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_pi8 (__m64 __A) +{ + __v16qi __B = (__v16qi) (__v2du) { __A, __A }; + return (__m64) ((__v2du) vec_abs (__B))[0]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count) +{ + if (__builtin_constant_p (__count) && __count < 16) + { +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_reve ((__v16qu) __A); + __B = (__m128i) vec_reve ((__v16qu) __B); +#endif + __A = (__m128i) vec_sld ((__v16qu) __B, (__v16qu) __A, __count); +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_reve ((__v16qu) __A); +#endif + return __A; + } + + if (__count == 0) + return __B; + + if (__count >= 16) + { + if (__count >= 32) + { + const __v16qu zero = { 0 }; + return (__m128i) zero; + } + else + { + const __v16qu __shift = + vec_splats ((unsigned char) ((__count - 16) * 8)); +#ifdef __LITTLE_ENDIAN__ + return (__m128i) vec_sro ((__v16qu) __A, __shift); +#else + return (__m128i) vec_slo ((__v16qu) __A, __shift); +#endif + } + } + else + { + const __v16qu __shiftA = + vec_splats ((unsigned char) ((16 - __count) * 8)); + const __v16qu __shiftB = vec_splats ((unsigned char) (__count * 8)); +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_slo ((__v16qu) __A, __shiftA); + __B = (__m128i) vec_sro ((__v16qu) __B, __shiftB); +#else + __A = (__m128i) vec_sro ((__v16qu) __A, __shiftA); + __B = (__m128i) vec_slo ((__v16qu) __B, __shiftB); +#endif + return (__m128i) vec_or ((__v16qu) __A, (__v16qu) __B); + } +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_alignr_pi8 (__m64 __A, __m64 __B, unsigned int __count) +{ + if (__count < 16) + { + __v2du __C = { __B, __A }; +#ifdef __LITTLE_ENDIAN__ + const __v4su __shift = { __count << 3, 0, 0, 0 }; + __C = (__v2du) vec_sro ((__v16qu) __C, (__v16qu) __shift); +#else + const __v4su __shift = { 0, 0, 0, __count << 3 }; + __C = (__v2du) vec_slo ((__v16qu) __C, (__v16qu) __shift); +#endif + return (__m64) __C[0]; + } + else + { + const __m64 __zero = { 0 }; + return __zero; + } +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadd_epi16 (__m128i __A, __m128i __B) +{ + const __v16qu __P = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; + const __v16qu __Q = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; + __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P); + __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q); + return (__m128i) vec_add (__C, __D); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadd_epi32 (__m128i __A, __m128i __B) +{ + const __v16qu __P = + { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 }; + const __v16qu __Q = + { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 }; + __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P); + __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q); + return (__m128i) vec_add (__C, __D); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadd_pi16 (__m64 __A, __m64 __B) +{ + __v8hi __C = (__v8hi) (__v2du) { __A, __B }; + const __v16qu __P = + { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 }; + const __v16qu __Q = + { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 }; + __v8hi __D = vec_perm (__C, __C, __Q); + __C = vec_perm (__C, __C, __P); + __C = vec_add (__C, __D); + return (__m64) ((__v2du) __C)[1]; +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadd_pi32 (__m64 __A, __m64 __B) +{ + __v4si __C = (__v4si) (__v2du) { __A, __B }; + const __v16qu __P = + { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 }; + const __v16qu __Q = + { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 }; + __v4si __D = vec_perm (__C, __C, __Q); + __C = vec_perm (__C, __C, __P); + __C = vec_add (__C, __D); + return (__m64) ((__v2du) __C)[1]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadds_epi16 (__m128i __A, __m128i __B) +{ + __v4si __C = { 0 }, __D = { 0 }; + __C = vec_sum4s ((__v8hi) __A, __C); + __D = vec_sum4s ((__v8hi) __B, __D); + __C = (__v4si) vec_packs (__C, __D); + return (__m128i) __C; +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hadds_pi16 (__m64 __A, __m64 __B) +{ + const __v4si __zero = { 0 }; + __v8hi __C = (__v8hi) (__v2du) { __A, __B }; + __v4si __D = vec_sum4s (__C, __zero); + __C = vec_packs (__D, __D); + return (__m64) ((__v2du) __C)[1]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsub_epi16 (__m128i __A, __m128i __B) +{ + const __v16qu __P = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; + const __v16qu __Q = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; + __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P); + __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q); + return (__m128i) vec_sub (__C, __D); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsub_epi32 (__m128i __A, __m128i __B) +{ + const __v16qu __P = + { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 }; + const __v16qu __Q = + { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 }; + __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P); + __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q); + return (__m128i) vec_sub (__C, __D); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsub_pi16 (__m64 __A, __m64 __B) +{ + const __v16qu __P = + { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 }; + const __v16qu __Q = + { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 }; + __v8hi __C = (__v8hi) (__v2du) { __A, __B }; + __v8hi __D = vec_perm (__C, __C, __Q); + __C = vec_perm (__C, __C, __P); + __C = vec_sub (__C, __D); + return (__m64) ((__v2du) __C)[1]; +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsub_pi32 (__m64 __A, __m64 __B) +{ + const __v16qu __P = + { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 }; + const __v16qu __Q = + { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 }; + __v4si __C = (__v4si) (__v2du) { __A, __B }; + __v4si __D = vec_perm (__C, __C, __Q); + __C = vec_perm (__C, __C, __P); + __C = vec_sub (__C, __D); + return (__m64) ((__v2du) __C)[1]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubs_epi16 (__m128i __A, __m128i __B) +{ + const __v16qu __P = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; + const __v16qu __Q = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; + __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P); + __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q); + return (__m128i) vec_subs (__C, __D); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubs_pi16 (__m64 __A, __m64 __B) +{ + const __v16qu __P = + { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 }; + const __v16qu __Q = + { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 }; + __v8hi __C = (__v8hi) (__v2du) { __A, __B }; + __v8hi __D = vec_perm (__C, __C, __P); + __v8hi __E = vec_perm (__C, __C, __Q); + __C = vec_subs (__D, __E); + return (__m64) ((__v2du) __C)[1]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_epi8 (__m128i __A, __m128i __B) +{ + const __v16qi __zero = { 0 }; + __vector __bool char __select = vec_cmplt ((__v16qi) __B, __zero); + __v16qi __C = vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B); + return (__m128i) vec_sel (__C, __zero, __select); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pi8 (__m64 __A, __m64 __B) +{ + const __v16qi __zero = { 0 }; + __v16qi __C = (__v16qi) (__v2du) { __A, __A }; + __v16qi __D = (__v16qi) (__v2du) { __B, __B }; + __vector __bool char __select = vec_cmplt ((__v16qi) __D, __zero); + __C = vec_perm ((__v16qi) __C, (__v16qi) __C, (__v16qu) __D); + __C = vec_sel (__C, __zero, __select); + return (__m64) ((__v2du) (__C))[0]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sign_epi8 (__m128i __A, __m128i __B) +{ + const __v16qi __zero = { 0 }; + __v16qi __selectneg = (__v16qi) vec_cmplt ((__v16qi) __B, __zero); + __v16qi __selectpos = + (__v16qi) vec_neg ((__v16qi) vec_cmpgt ((__v16qi) __B, __zero)); + __v16qi __conv = vec_add (__selectneg, __selectpos); + return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sign_epi16 (__m128i __A, __m128i __B) +{ + const __v8hi __zero = { 0 }; + __v8hi __selectneg = (__v8hi) vec_cmplt ((__v8hi) __B, __zero); + __v8hi __selectpos = + (__v8hi) vec_neg ((__v8hi) vec_cmpgt ((__v8hi) __B, __zero)); + __v8hi __conv = vec_add (__selectneg, __selectpos); + return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sign_epi32 (__m128i __A, __m128i __B) +{ + const __v4si __zero = { 0 }; + __v4si __selectneg = (__v4si) vec_cmplt ((__v4si) __B, __zero); + __v4si __selectpos = + (__v4si) vec_neg ((__v4si) vec_cmpgt ((__v4si) __B, __zero)); + __v4si __conv = vec_add (__selectneg, __selectpos); + return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sign_pi8 (__m64 __A, __m64 __B) +{ + const __v16qi __zero = { 0 }; + __v16qi __C = (__v16qi) (__v2du) { __A, __A }; + __v16qi __D = (__v16qi) (__v2du) { __B, __B }; + __C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D); + return (__m64) ((__v2du) (__C))[0]; +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sign_pi16 (__m64 __A, __m64 __B) +{ + const __v8hi __zero = { 0 }; + __v8hi __C = (__v8hi) (__v2du) { __A, __A }; + __v8hi __D = (__v8hi) (__v2du) { __B, __B }; + __C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D); + return (__m64) ((__v2du) (__C))[0]; +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sign_pi32 (__m64 __A, __m64 __B) +{ + const __v4si __zero = { 0 }; + __v4si __C = (__v4si) (__v2du) { __A, __A }; + __v4si __D = (__v4si) (__v2du) { __B, __B }; + __C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D); + return (__m64) ((__v2du) (__C))[0]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maddubs_epi16 (__m128i __A, __m128i __B) +{ + __v8hi __unsigned = vec_splats ((signed short) 0x00ff); + __v8hi __C = vec_and (vec_unpackh ((__v16qi) __A), __unsigned); + __v8hi __D = vec_and (vec_unpackl ((__v16qi) __A), __unsigned); + __v8hi __E = vec_unpackh ((__v16qi) __B); + __v8hi __F = vec_unpackl ((__v16qi) __B); + __C = vec_mul (__C, __E); + __D = vec_mul (__D, __F); + const __v16qu __odds = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; + const __v16qu __evens = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; + __E = vec_perm (__C, __D, __odds); + __F = vec_perm (__C, __D, __evens); + return (__m128i) vec_adds (__E, __F); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maddubs_pi16 (__m64 __A, __m64 __B) +{ + __v8hi __C = (__v8hi) (__v2du) { __A, __A }; + __C = vec_unpackl ((__v16qi) __C); + const __v8hi __unsigned = vec_splats ((signed short) 0x00ff); + __C = vec_and (__C, __unsigned); + __v8hi __D = (__v8hi) (__v2du) { __B, __B }; + __D = vec_unpackl ((__v16qi) __D); + __D = vec_mul (__C, __D); + const __v16qu __odds = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; + const __v16qu __evens = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; + __C = vec_perm (__D, __D, __odds); + __D = vec_perm (__D, __D, __evens); + __C = vec_adds (__C, __D); + return (__m64) ((__v2du) (__C))[0]; +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhrs_epi16 (__m128i __A, __m128i __B) +{ + __v4si __C = vec_unpackh ((__v8hi) __A); + __v4si __D = vec_unpackh ((__v8hi) __B); + __C = vec_mul (__C, __D); + __D = vec_unpackl ((__v8hi) __A); + __v4si __E = vec_unpackl ((__v8hi) __B); + __D = vec_mul (__D, __E); + const __v4su __shift = vec_splats ((unsigned int) 14); + __C = vec_sr (__C, __shift); + __D = vec_sr (__D, __shift); + const __v4si __ones = vec_splats ((signed int) 1); + __C = vec_add (__C, __ones); + __C = vec_sr (__C, (__v4su) __ones); + __D = vec_add (__D, __ones); + __D = vec_sr (__D, (__v4su) __ones); + return (__m128i) vec_pack (__C, __D); +} + +extern __inline __m64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhrs_pi16 (__m64 __A, __m64 __B) +{ + __v4si __C = (__v4si) (__v2du) { __A, __A }; + __C = vec_unpackh ((__v8hi) __C); + __v4si __D = (__v4si) (__v2du) { __B, __B }; + __D = vec_unpackh ((__v8hi) __D); + __C = vec_mul (__C, __D); + const __v4su __shift = vec_splats ((unsigned int) 14); + __C = vec_sr (__C, __shift); + const __v4si __ones = vec_splats ((signed int) 1); + __C = vec_add (__C, __ones); + __C = vec_sr (__C, (__v4su) __ones); + __v8hi __E = vec_pack (__C, __D); + return (__m64) ((__v2du) (__E))[0]; +} + +#else +#include_next +#endif /* defined(__linux__) && defined(__ppc64__) */ + +#endif /* TMMINTRIN_H_ */ diff --git a/lib/include/ppc_wrappers/xmmintrin.h b/lib/include/ppc_wrappers/xmmintrin.h index 1b322b6651..0f429fa040 100644 --- a/lib/include/ppc_wrappers/xmmintrin.h +++ b/lib/include/ppc_wrappers/xmmintrin.h @@ -34,6 +34,8 @@ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED +#if defined(__linux__) && defined(__ppc64__) + /* Define four value permute mask */ #define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) @@ -1835,4 +1837,8 @@ do { \ /* For backward source compatibility. */ //# include +#else +#include_next +#endif /* defined(__linux__) && defined(__ppc64__) */ + #endif /* _XMMINTRIN_H_INCLUDED */ diff --git a/lib/include/xmmintrin.h b/lib/include/xmmintrin.h index 75ff37655b..0e61eab44a 100644 --- a/lib/include/xmmintrin.h +++ b/lib/include/xmmintrin.h @@ -1627,7 +1627,7 @@ _mm_loadh_pi(__m128 __a, const __m64 *__p) struct __mm_loadh_pi_struct { __mm_loadh_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); - __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u; + __mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u; __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); } @@ -1654,7 +1654,7 @@ _mm_loadl_pi(__m128 __a, const __m64 *__p) struct __mm_loadl_pi_struct { __mm_loadl_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); - __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u; + __mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u; __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); } @@ -1680,7 +1680,7 @@ _mm_load_ss(const float *__p) struct __mm_load_ss_struct { float __u; } __attribute__((__packed__, __may_alias__)); - float __u = ((struct __mm_load_ss_struct*)__p)->__u; + float __u = ((const struct __mm_load_ss_struct*)__p)->__u; return __extension__ (__m128){ __u, 0, 0, 0 }; } @@ -1702,7 +1702,7 @@ _mm_load1_ps(const float *__p) struct __mm_load1_ps_struct { float __u; } __attribute__((__packed__, __may_alias__)); - float __u = ((struct __mm_load1_ps_struct*)__p)->__u; + float __u = ((const struct __mm_load1_ps_struct*)__p)->__u; return __extension__ (__m128){ __u, __u, __u, __u }; } @@ -1722,7 +1722,7 @@ _mm_load1_ps(const float *__p) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ps(const float *__p) { - return *(__m128*)__p; + return *(const __m128*)__p; } /// Loads a 128-bit floating-point vector of [4 x float] from an @@ -1742,7 +1742,7 @@ _mm_loadu_ps(const float *__p) struct __loadu_ps { __m128_u __v; } __attribute__((__packed__, __may_alias__)); - return ((struct __loadu_ps*)__p)->__v; + return ((const struct __loadu_ps*)__p)->__v; } /// Loads four packed float values, in reverse order, from an aligned @@ -2100,7 +2100,7 @@ _mm_storer_ps(float *__p, __m128 __a) /// be generated. \n /// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will /// be generated. -#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \ +#define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \ ((sel) >> 2) & 1, (sel) & 0x3)) #endif