diff --git a/lib/include/__clang_cuda_device_functions.h b/lib/include/__clang_cuda_device_functions.h index cc4e1a4dd9..f8a12cefdb 100644 --- a/lib/include/__clang_cuda_device_functions.h +++ b/lib/include/__clang_cuda_device_functions.h @@ -502,8 +502,8 @@ __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); } __DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); } __DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); } __DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); } -__DEVICE__ int __popc(int __a) { return __nv_popc(__a); } -__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); } +__DEVICE__ int __popc(unsigned int __a) { return __nv_popc(__a); } +__DEVICE__ int __popcll(unsigned long long __a) { return __nv_popcll(__a); } __DEVICE__ float __powf(float __a, float __b) { return __nv_fast_powf(__a, __b); } diff --git a/lib/include/__clang_cuda_libdevice_declares.h b/lib/include/__clang_cuda_libdevice_declares.h index 23f35964ea..ded0382a7d 100644 --- a/lib/include/__clang_cuda_libdevice_declares.h +++ b/lib/include/__clang_cuda_libdevice_declares.h @@ -285,8 +285,8 @@ __DEVICE__ double __nv_normcdfinv(double __a); __DEVICE__ float __nv_normcdfinvf(float __a); __DEVICE__ float __nv_normf(int __a, const float *__b); __DEVICE__ double __nv_norm(int __a, const double *__b); -__DEVICE__ int __nv_popc(int __a); -__DEVICE__ int __nv_popcll(long long __a); +__DEVICE__ int __nv_popc(unsigned int __a); +__DEVICE__ int __nv_popcll(unsigned long long __a); __DEVICE__ double __nv_pow(double __a, double __b); __DEVICE__ float __nv_powf(float __a, float __b); __DEVICE__ double __nv_powi(double __a, int __b); diff --git a/lib/include/__clang_cuda_math.h b/lib/include/__clang_cuda_math.h index e447590393..0401916506 100644 --- a/lib/include/__clang_cuda_math.h +++ b/lib/include/__clang_cuda_math.h @@ -36,7 +36,7 @@ // because the OpenMP overlay requires constexpr functions here but prior to // c++14 void return functions could not be constexpr. #pragma push_macro("__DEVICE_VOID__") -#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L +#if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE_VOID__ __DEVICE__ @@ -45,9 +45,9 @@ // libdevice provides fast low precision and slow full-recision implementations // for some functions. Which one gets selected depends on // __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if -// -ffast-math or -fcuda-approx-transcendentals are in effect. +// -ffast-math or -fgpu-approx-transcendentals are in effect. #pragma push_macro("__FAST_OR_SLOW") -#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) +#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) #define __FAST_OR_SLOW(fast, slow) fast #else #define __FAST_OR_SLOW(fast, slow) slow diff --git a/lib/include/__clang_cuda_runtime_wrapper.h b/lib/include/__clang_cuda_runtime_wrapper.h index 512fc300fc..d369c86fe1 100644 --- a/lib/include/__clang_cuda_runtime_wrapper.h +++ b/lib/include/__clang_cuda_runtime_wrapper.h @@ -196,12 +196,12 @@ inline __host__ double __signbitd(double x) { // math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we // get the slow-but-accurate or fast-but-inaccurate versions of functions like -// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals. +// sin and exp. This is controlled in clang by -fgpu-approx-transcendentals. // // device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs. // slow divides), so we need to scope our define carefully here. #pragma push_macro("__USE_FAST_MATH__") -#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) +#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) #define __USE_FAST_MATH__ 1 #endif diff --git a/lib/include/__clang_hip_math.h b/lib/include/__clang_hip_math.h index a47dda3327..11e1e7d032 100644 --- a/lib/include/__clang_hip_math.h +++ b/lib/include/__clang_hip_math.h @@ -14,9 +14,6 @@ #endif #if !defined(__HIPCC_RTC__) -#if defined(__cplusplus) -#include -#endif #include #include #ifdef __OPENMP_AMDGCN__ @@ -32,6 +29,17 @@ #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif +// Device library provides fast low precision and slow full-recision +// implementations for some functions. Which one gets selected depends on +// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if +// -ffast-math or -fgpu-approx-transcendentals are in effect. +#pragma push_macro("__FAST_OR_SLOW") +#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) +#define __FAST_OR_SLOW(fast, slow) fast +#else +#define __FAST_OR_SLOW(fast, slow) slow +#endif + // A few functions return bool type starting only in C++11. #pragma push_macro("__RETURN_TYPE") #ifdef __OPENMP_AMDGCN__ @@ -139,21 +147,180 @@ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) { } // BEGIN FLOAT + +// BEGIN INTRINSICS + +__DEVICE__ +float __cosf(float __x) { return __ocml_native_cos_f32(__x); } + +__DEVICE__ +float __exp10f(float __x) { + const float __log2_10 = 0x1.a934f0p+1f; + return __builtin_amdgcn_exp2f(__log2_10 * __x); +} + +__DEVICE__ +float __expf(float __x) { + const float __log2_e = 0x1.715476p+0; + return __builtin_amdgcn_exp2f(__log2_e * __x); +} + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } +__DEVICE__ +float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } +__DEVICE__ +float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } +__DEVICE__ +float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fadd_rn(float __x, float __y) { return __x + __y; } +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } +__DEVICE__ +float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } +__DEVICE__ +float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } +__DEVICE__ +float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fdiv_rn(float __x, float __y) { return __x / __y; } +#endif + +__DEVICE__ +float __fdividef(float __x, float __y) { return __x / __y; } + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fmaf_rd(float __x, float __y, float __z) { + return __ocml_fma_rtn_f32(__x, __y, __z); +} +__DEVICE__ +float __fmaf_rn(float __x, float __y, float __z) { + return __ocml_fma_rte_f32(__x, __y, __z); +} +__DEVICE__ +float __fmaf_ru(float __x, float __y, float __z) { + return __ocml_fma_rtp_f32(__x, __y, __z); +} +__DEVICE__ +float __fmaf_rz(float __x, float __y, float __z) { + return __ocml_fma_rtz_f32(__x, __y, __z); +} +#else +__DEVICE__ +float __fmaf_rn(float __x, float __y, float __z) { + return __builtin_fmaf(__x, __y, __z); +} +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } +__DEVICE__ +float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } +__DEVICE__ +float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } +__DEVICE__ +float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fmul_rn(float __x, float __y) { return __x * __y; } +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } +__DEVICE__ +float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } +__DEVICE__ +float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } +__DEVICE__ +float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } +#else +__DEVICE__ +float __frcp_rn(float __x) { return 1.0f / __x; } +#endif + +__DEVICE__ +float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); } + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } +__DEVICE__ +float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } +__DEVICE__ +float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } +__DEVICE__ +float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } +#else +__DEVICE__ +float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } +__DEVICE__ +float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } +__DEVICE__ +float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } +__DEVICE__ +float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fsub_rn(float __x, float __y) { return __x - __y; } +#endif + +__DEVICE__ +float __log10f(float __x) { return __builtin_log10f(__x); } + +__DEVICE__ +float __log2f(float __x) { return __builtin_amdgcn_logf(__x); } + +__DEVICE__ +float __logf(float __x) { return __builtin_logf(__x); } + +__DEVICE__ +float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } + +__DEVICE__ +float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } + +__DEVICE__ +void __sincosf(float __x, float *__sinptr, float *__cosptr) { + *__sinptr = __ocml_native_sin_f32(__x); + *__cosptr = __ocml_native_cos_f32(__x); +} + +__DEVICE__ +float __sinf(float __x) { return __ocml_native_sin_f32(__x); } + +__DEVICE__ +float __tanf(float __x) { + return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x)); +} +// END INTRINSICS + #if defined(__cplusplus) __DEVICE__ int abs(int __x) { - int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; + return __builtin_abs(__x); } __DEVICE__ long labs(long __x) { - long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; + return __builtin_labs(__x); } __DEVICE__ long long llabs(long long __x) { - long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; + return __builtin_llabs(__x); } #endif @@ -188,7 +355,7 @@ __DEVICE__ float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); } __DEVICE__ -float cosf(float __x) { return __ocml_cos_f32(__x); } +float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); } __DEVICE__ float coshf(float __x) { return __ocml_cosh_f32(__x); } @@ -321,13 +488,13 @@ __DEVICE__ float log1pf(float __x) { return __ocml_log1p_f32(__x); } __DEVICE__ -float log2f(float __x) { return __builtin_log2f(__x); } +float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); } __DEVICE__ float logbf(float __x) { return __ocml_logb_f32(__x); } __DEVICE__ -float logf(float __x) { return __builtin_logf(__x); } +float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); } __DEVICE__ long int lrintf(float __x) { return __builtin_rintf(__x); } @@ -401,7 +568,7 @@ float normf(int __dim, ++__a; } - return __ocml_sqrt_f32(__r); + return __builtin_sqrtf(__r); } __DEVICE__ @@ -483,9 +650,13 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif +#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__ + __sincosf(__x, __sinptr, __cosptr); +#else *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; +#endif } __DEVICE__ @@ -500,7 +671,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) { } __DEVICE__ -float sinf(float __x) { return __ocml_sin_f32(__x); } +float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); } __DEVICE__ float sinhf(float __x) { return __ocml_sinh_f32(__x); } @@ -509,7 +680,7 @@ __DEVICE__ float sinpif(float __x) { return __ocml_sinpi_f32(__x); } __DEVICE__ -float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } +float sqrtf(float __x) { return __builtin_sqrtf(__x); } __DEVICE__ float tanf(float __x) { return __ocml_tan_f32(__x); } @@ -551,158 +722,7 @@ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication return __x1; } -// BEGIN INTRINSICS -__DEVICE__ -float __cosf(float __x) { return __ocml_native_cos_f32(__x); } - -__DEVICE__ -float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } - -__DEVICE__ -float __expf(float __x) { return __ocml_native_exp_f32(__x); } - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } -__DEVICE__ -float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } -__DEVICE__ -float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } -__DEVICE__ -float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fadd_rn(float __x, float __y) { return __x + __y; } -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } -__DEVICE__ -float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } -__DEVICE__ -float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } -__DEVICE__ -float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fdiv_rn(float __x, float __y) { return __x / __y; } -#endif - -__DEVICE__ -float __fdividef(float __x, float __y) { return __x / __y; } - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fmaf_rd(float __x, float __y, float __z) { - return __ocml_fma_rtn_f32(__x, __y, __z); -} -__DEVICE__ -float __fmaf_rn(float __x, float __y, float __z) { - return __ocml_fma_rte_f32(__x, __y, __z); -} -__DEVICE__ -float __fmaf_ru(float __x, float __y, float __z) { - return __ocml_fma_rtp_f32(__x, __y, __z); -} -__DEVICE__ -float __fmaf_rz(float __x, float __y, float __z) { - return __ocml_fma_rtz_f32(__x, __y, __z); -} -#else -__DEVICE__ -float __fmaf_rn(float __x, float __y, float __z) { - return __builtin_fmaf(__x, __y, __z); -} -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } -__DEVICE__ -float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } -__DEVICE__ -float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } -__DEVICE__ -float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fmul_rn(float __x, float __y) { return __x * __y; } -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } -__DEVICE__ -float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } -__DEVICE__ -float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } -__DEVICE__ -float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } -#else -__DEVICE__ -float __frcp_rn(float __x) { return 1.0f / __x; } -#endif - -__DEVICE__ -float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); } - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } -__DEVICE__ -float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } -__DEVICE__ -float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } -__DEVICE__ -float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } -#else -__DEVICE__ -float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } -__DEVICE__ -float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } -__DEVICE__ -float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } -__DEVICE__ -float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fsub_rn(float __x, float __y) { return __x - __y; } -#endif - -__DEVICE__ -float __log10f(float __x) { return __ocml_native_log10_f32(__x); } - -__DEVICE__ -float __log2f(float __x) { return __ocml_native_log2_f32(__x); } - -__DEVICE__ -float __logf(float __x) { return __ocml_native_log_f32(__x); } - -__DEVICE__ -float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } - -__DEVICE__ -float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } - -__DEVICE__ -void __sincosf(float __x, float *__sinptr, float *__cosptr) { - *__sinptr = __ocml_native_sin_f32(__x); - *__cosptr = __ocml_native_cos_f32(__x); -} - -__DEVICE__ -float __sinf(float __x) { return __ocml_native_sin_f32(__x); } - -__DEVICE__ -float __tanf(float __x) { return __ocml_tan_f32(__x); } -// END INTRINSICS // END FLOAT // BEGIN DOUBLE @@ -941,7 +961,7 @@ double norm(int __dim, ++__a; } - return __ocml_sqrt_f64(__r); + return __builtin_sqrt(__r); } __DEVICE__ @@ -1064,7 +1084,7 @@ __DEVICE__ double sinpi(double __x) { return __ocml_sinpi_f64(__x); } __DEVICE__ -double sqrt(double __x) { return __ocml_sqrt_f64(__x); } +double sqrt(double __x) { return __builtin_sqrt(__x); } __DEVICE__ double tan(double __x) { return __ocml_tan_f64(__x); } @@ -1198,7 +1218,7 @@ __DEVICE__ double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } #else __DEVICE__ -double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } +double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS @@ -1288,16 +1308,17 @@ double min(double __x, double __y) { return __builtin_fmin(__x, __y); } #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) __host__ inline static int min(int __arg1, int __arg2) { - return std::min(__arg1, __arg2); + return __arg1 < __arg2 ? __arg1 : __arg2; } __host__ inline static int max(int __arg1, int __arg2) { - return std::max(__arg1, __arg2); + return __arg1 > __arg2 ? __arg1 : __arg2; } #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) #endif #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") +#pragma pop_macro("__FAST_OR_SLOW") #endif // __CLANG_HIP_MATH_H__ diff --git a/lib/include/__clang_hip_runtime_wrapper.h b/lib/include/__clang_hip_runtime_wrapper.h index e8817073ef..ed1550038e 100644 --- a/lib/include/__clang_hip_runtime_wrapper.h +++ b/lib/include/__clang_hip_runtime_wrapper.h @@ -46,6 +46,67 @@ extern "C" { } #endif //__cplusplus +#if !defined(__HIPCC_RTC__) +#if __has_include("hip/hip_version.h") +#include "hip/hip_version.h" +#endif // __has_include("hip/hip_version.h") +#endif // __HIPCC_RTC__ + +typedef __SIZE_TYPE__ __hip_size_t; + +#ifdef __cplusplus +extern "C" { +#endif //__cplusplus + +#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405 +__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); +__device__ void __ockl_dm_dealloc(unsigned long long __addr); +#if __has_feature(address_sanitizer) +__device__ unsigned long long __asan_malloc_impl(unsigned long long __size, + unsigned long long __pc); +__device__ void __asan_free_impl(unsigned long long __addr, + unsigned long long __pc); +__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) { + unsigned long long __pc = (unsigned long long)__builtin_return_address(0); + return (void *)__asan_malloc_impl(__size, __pc); +} +__attribute__((noinline, weak)) __device__ void free(void *__ptr) { + unsigned long long __pc = (unsigned long long)__builtin_return_address(0); + __asan_free_impl((unsigned long long)__ptr, __pc); +} +#else // __has_feature(address_sanitizer) +__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { + return (void *) __ockl_dm_alloc(__size); +} +__attribute__((weak)) inline __device__ void free(void *__ptr) { + __ockl_dm_dealloc((unsigned long long)__ptr); +} +#endif // __has_feature(address_sanitizer) +#else // HIP version check +#if __HIP_ENABLE_DEVICE_MALLOC__ +__device__ void *__hip_malloc(__hip_size_t __size); +__device__ void *__hip_free(void *__ptr); +__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { + return __hip_malloc(__size); +} +__attribute__((weak)) inline __device__ void free(void *__ptr) { + __hip_free(__ptr); +} +#else // __HIP_ENABLE_DEVICE_MALLOC__ +__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { + __builtin_trap(); + return (void *)0; +} +__attribute__((weak)) inline __device__ void free(void *__ptr) { + __builtin_trap(); +} +#endif // __HIP_ENABLE_DEVICE_MALLOC__ +#endif // HIP version check + +#ifdef __cplusplus +} // extern "C" +#endif //__cplusplus + #if !defined(__HIPCC_RTC__) #include #include @@ -71,59 +132,6 @@ typedef __SIZE_TYPE__ size_t; #define INT_MAX __INTMAX_MAX__ #endif // __HIPCC_RTC__ -typedef __SIZE_TYPE__ __hip_size_t; - -#ifdef __cplusplus -extern "C" { -#endif //__cplusplus - -#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405 -extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); -extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr); -#if __has_feature(address_sanitizer) -extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc); -extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc); -__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) { - unsigned long long __pc = (unsigned long long)__builtin_return_address(0); - return (void *)__asan_malloc_impl(__size, __pc); -} -__attribute__((noinline, weak)) __device__ void free(void *__ptr) { - unsigned long long __pc = (unsigned long long)__builtin_return_address(0); - __asan_free_impl((unsigned long long)__ptr, __pc); -} -#else -__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { - return (void *) __ockl_dm_alloc(__size); -} -__attribute__((weak)) inline __device__ void free(void *__ptr) { - __ockl_dm_dealloc((unsigned long long)__ptr); -} -#endif // __has_feature(address_sanitizer) -#else // HIP version check -#if __HIP_ENABLE_DEVICE_MALLOC__ -__device__ void *__hip_malloc(__hip_size_t __size); -__device__ void *__hip_free(void *__ptr); -__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { - return __hip_malloc(__size); -} -__attribute__((weak)) inline __device__ void free(void *__ptr) { - __hip_free(__ptr); -} -#else -__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { - __builtin_trap(); - return (void *)0; -} -__attribute__((weak)) inline __device__ void free(void *__ptr) { - __builtin_trap(); -} -#endif -#endif // HIP version check - -#ifdef __cplusplus -} // extern "C" -#endif //__cplusplus - #include <__clang_hip_libdevice_declares.h> #include <__clang_hip_math.h> #include <__clang_hip_stdlib.h> diff --git a/lib/include/__stdarg___gnuc_va_list.h b/lib/include/__stdarg___gnuc_va_list.h new file mode 100644 index 0000000000..2a0a7e8cc6 --- /dev/null +++ b/lib/include/__stdarg___gnuc_va_list.h @@ -0,0 +1,13 @@ +/*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __GNUC_VA_LIST +#define __GNUC_VA_LIST +typedef __builtin_va_list __gnuc_va_list; +#endif diff --git a/lib/include/__stdarg___va_copy.h b/lib/include/__stdarg___va_copy.h new file mode 100644 index 0000000000..e433e18a70 --- /dev/null +++ b/lib/include/__stdarg___va_copy.h @@ -0,0 +1,12 @@ +/*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __va_copy +#define __va_copy(d, s) __builtin_va_copy(d, s) +#endif diff --git a/lib/include/__stdarg_va_arg.h b/lib/include/__stdarg_va_arg.h new file mode 100644 index 0000000000..89bd2f65d3 --- /dev/null +++ b/lib/include/__stdarg_va_arg.h @@ -0,0 +1,22 @@ +/*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef va_arg + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +/* C23 does not require the second parameter for va_start. */ +#define va_start(ap, ...) __builtin_va_start(ap, 0) +#else +/* Versions before C23 do require the second parameter. */ +#define va_start(ap, param) __builtin_va_start(ap, param) +#endif +#define va_end(ap) __builtin_va_end(ap) +#define va_arg(ap, type) __builtin_va_arg(ap, type) + +#endif diff --git a/lib/include/__stdarg_va_copy.h b/lib/include/__stdarg_va_copy.h new file mode 100644 index 0000000000..8645328c2c --- /dev/null +++ b/lib/include/__stdarg_va_copy.h @@ -0,0 +1,12 @@ +/*===---- __stdarg_va_copy.h - Definition of va_copy------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef va_copy +#define va_copy(dest, src) __builtin_va_copy(dest, src) +#endif diff --git a/lib/include/__stdarg_va_list.h b/lib/include/__stdarg_va_list.h new file mode 100644 index 0000000000..20c2e2cad9 --- /dev/null +++ b/lib/include/__stdarg_va_list.h @@ -0,0 +1,13 @@ +/*===---- __stdarg_va_list.h - Definition of va_list -----------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _VA_LIST +#define _VA_LIST +typedef __builtin_va_list va_list; +#endif diff --git a/lib/include/__stddef_max_align_t.h b/lib/include/__stddef_max_align_t.h index e3b439285d..512606a877 100644 --- a/lib/include/__stddef_max_align_t.h +++ b/lib/include/__stddef_max_align_t.h @@ -1,4 +1,4 @@ -/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---=== +/*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. diff --git a/lib/include/__stddef_null.h b/lib/include/__stddef_null.h new file mode 100644 index 0000000000..c10bd2d7d9 --- /dev/null +++ b/lib/include/__stddef_null.h @@ -0,0 +1,29 @@ +/*===---- __stddef_null.h - Definition of NULL -----------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(NULL) || !__building_module(_Builtin_stddef) + +/* linux/stddef.h will define NULL to 0. glibc (and other) headers then define + * __need_NULL and rely on stddef.h to redefine NULL to the correct value again. + * Modules don't support redefining macros like that, but support that pattern + * in the non-modules case. + */ +#undef NULL + +#ifdef __cplusplus +#if !defined(__MINGW32__) && !defined(_MSC_VER) +#define NULL __null +#else +#define NULL 0 +#endif +#else +#define NULL ((void*)0) +#endif + +#endif diff --git a/lib/include/__stddef_nullptr_t.h b/lib/include/__stddef_nullptr_t.h new file mode 100644 index 0000000000..7f3fbe6fe0 --- /dev/null +++ b/lib/include/__stddef_nullptr_t.h @@ -0,0 +1,29 @@ +/*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* + * When -fbuiltin-headers-in-system-modules is set this is a non-modular header + * and needs to behave as if it was textual. + */ +#if !defined(_NULLPTR_T) || \ + (__has_feature(modules) && !__building_module(_Builtin_stddef)) +#define _NULLPTR_T + +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { +typedef decltype(nullptr) nullptr_t; +} +using ::std::nullptr_t; +#endif +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +typedef typeof(nullptr) nullptr_t; +#endif + +#endif diff --git a/lib/include/__stddef_offsetof.h b/lib/include/__stddef_offsetof.h new file mode 100644 index 0000000000..84172c6cd2 --- /dev/null +++ b/lib/include/__stddef_offsetof.h @@ -0,0 +1,17 @@ +/*===---- __stddef_offsetof.h - Definition of offsetof ---------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* + * When -fbuiltin-headers-in-system-modules is set this is a non-modular header + * and needs to behave as if it was textual. + */ +#if !defined(offsetof) || \ + (__has_feature(modules) && !__building_module(_Builtin_stddef)) +#define offsetof(t, d) __builtin_offsetof(t, d) +#endif diff --git a/lib/include/__stddef_ptrdiff_t.h b/lib/include/__stddef_ptrdiff_t.h new file mode 100644 index 0000000000..fd3c893c66 --- /dev/null +++ b/lib/include/__stddef_ptrdiff_t.h @@ -0,0 +1,20 @@ +/*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* + * When -fbuiltin-headers-in-system-modules is set this is a non-modular header + * and needs to behave as if it was textual. + */ +#if !defined(_PTRDIFF_T) || \ + (__has_feature(modules) && !__building_module(_Builtin_stddef)) +#define _PTRDIFF_T + +typedef __PTRDIFF_TYPE__ ptrdiff_t; + +#endif diff --git a/lib/include/__stddef_rsize_t.h b/lib/include/__stddef_rsize_t.h new file mode 100644 index 0000000000..dd433d40d9 --- /dev/null +++ b/lib/include/__stddef_rsize_t.h @@ -0,0 +1,20 @@ +/*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* + * When -fbuiltin-headers-in-system-modules is set this is a non-modular header + * and needs to behave as if it was textual. + */ +#if !defined(_RSIZE_T) || \ + (__has_feature(modules) && !__building_module(_Builtin_stddef)) +#define _RSIZE_T + +typedef __SIZE_TYPE__ rsize_t; + +#endif diff --git a/lib/include/__stddef_size_t.h b/lib/include/__stddef_size_t.h new file mode 100644 index 0000000000..3dd7b1f379 --- /dev/null +++ b/lib/include/__stddef_size_t.h @@ -0,0 +1,20 @@ +/*===---- __stddef_size_t.h - Definition of size_t -------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* + * When -fbuiltin-headers-in-system-modules is set this is a non-modular header + * and needs to behave as if it was textual. + */ +#if !defined(_SIZE_T) || \ + (__has_feature(modules) && !__building_module(_Builtin_stddef)) +#define _SIZE_T + +typedef __SIZE_TYPE__ size_t; + +#endif diff --git a/lib/include/__stddef_unreachable.h b/lib/include/__stddef_unreachable.h new file mode 100644 index 0000000000..61df43e973 --- /dev/null +++ b/lib/include/__stddef_unreachable.h @@ -0,0 +1,21 @@ +/*===---- __stddef_unreachable.h - Definition of unreachable ---------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __cplusplus + +/* + * When -fbuiltin-headers-in-system-modules is set this is a non-modular header + * and needs to behave as if it was textual. + */ +#if !defined(unreachable) || \ + (__has_feature(modules) && !__building_module(_Builtin_stddef)) +#define unreachable() __builtin_unreachable() +#endif + +#endif diff --git a/lib/include/__stddef_wchar_t.h b/lib/include/__stddef_wchar_t.h new file mode 100644 index 0000000000..bd69f63225 --- /dev/null +++ b/lib/include/__stddef_wchar_t.h @@ -0,0 +1,28 @@ +/*===---- __stddef_wchar.h - Definition of wchar_t -------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED) + +/* + * When -fbuiltin-headers-in-system-modules is set this is a non-modular header + * and needs to behave as if it was textual. + */ +#if !defined(_WCHAR_T) || \ + (__has_feature(modules) && !__building_module(_Builtin_stddef)) +#define _WCHAR_T + +#ifdef _MSC_EXTENSIONS +#define _WCHAR_T_DEFINED +#endif + +typedef __WCHAR_TYPE__ wchar_t; + +#endif + +#endif diff --git a/lib/include/__stddef_wint_t.h b/lib/include/__stddef_wint_t.h new file mode 100644 index 0000000000..0aa2915079 --- /dev/null +++ b/lib/include/__stddef_wint_t.h @@ -0,0 +1,15 @@ +/*===---- __stddef_wint.h - Definition of wint_t ---------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _WINT_T +#define _WINT_T + +typedef __WINT_TYPE__ wint_t; + +#endif diff --git a/lib/include/adcintrin.h b/lib/include/adcintrin.h new file mode 100644 index 0000000000..0065a1b543 --- /dev/null +++ b/lib/include/adcintrin.h @@ -0,0 +1,160 @@ +/*===---- adcintrin.h - ADC intrinsics -------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ADCINTRIN_H +#define __ADCINTRIN_H + +#if !defined(__i386__) && !defined(__x86_64__) +#error "This header is only meant to be used on x86 and x64 architecture" +#endif + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +/* Use C++ inline semantics in C++, GNU inline for C mode. */ +#if defined(__cplusplus) +#define __INLINE __inline +#else +#define __INLINE static __inline +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated +/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory +/// at \a __p, and returns the 8-bit carry-out (carry flag). +/// +/// \code{.operation} +/// temp := (__cf == 0) ? 0 : 1 +/// Store32(__p, __x + __y + temp) +/// result := CF +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ADC instruction. +/// +/// \param __cf +/// The 8-bit unsigned carry flag; any non-zero value indicates carry. +/// \param __x +/// A 32-bit unsigned addend. +/// \param __y +/// A 32-bit unsigned addend. +/// \param __p +/// Pointer to memory for storing the sum. +/// \returns The 8-bit unsigned carry-out value. +__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf, + unsigned int __x, + unsigned int __y, + unsigned int *__p) { + return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); +} + +/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry +/// flag \a __cf, and subtracts the result from unsigned 32-bit integer +/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p, +/// and returns the 8-bit carry-out (carry or overflow flag). +/// +/// \code{.operation} +/// temp := (__cf == 0) ? 0 : 1 +/// Store32(__p, __x - (__y + temp)) +/// result := CF +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c SBB instruction. +/// +/// \param __cf +/// The 8-bit unsigned carry flag; any non-zero value indicates carry. +/// \param __x +/// The 32-bit unsigned minuend. +/// \param __y +/// The 32-bit unsigned subtrahend. +/// \param __p +/// Pointer to memory for storing the difference. +/// \returns The 8-bit unsigned carry-out value. +__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf, + unsigned int __x, + unsigned int __y, + unsigned int *__p) { + return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p); +} + +#ifdef __x86_64__ +/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated +/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory +/// at \a __p, and returns the 8-bit carry-out (carry flag). +/// +/// \code{.operation} +/// temp := (__cf == 0) ? 0 : 1 +/// Store64(__p, __x + __y + temp) +/// result := CF +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ADC instruction. +/// +/// \param __cf +/// The 8-bit unsigned carry flag; any non-zero value indicates carry. +/// \param __x +/// A 64-bit unsigned addend. +/// \param __y +/// A 64-bit unsigned addend. +/// \param __p +/// Pointer to memory for storing the sum. +/// \returns The 8-bit unsigned carry-out value. +__INLINE unsigned char __DEFAULT_FN_ATTRS +_addcarry_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) { + return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); +} + +/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry +/// flag \a __cf, and subtracts the result from unsigned 64-bit integer +/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p, +/// and returns the 8-bit carry-out (carry or overflow flag). +/// +/// \code{.operation} +/// temp := (__cf == 0) ? 0 : 1 +/// Store64(__p, __x - (__y + temp)) +/// result := CF +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ADC instruction. +/// +/// \param __cf +/// The 8-bit unsigned carry flag; any non-zero value indicates carry. +/// \param __x +/// The 64-bit unsigned minuend. +/// \param __y +/// The 64-bit unsigned subtrahend. +/// \param __p +/// Pointer to memory for storing the difference. +/// \returns The 8-bit unsigned carry-out value. +__INLINE unsigned char __DEFAULT_FN_ATTRS +_subborrow_u64(unsigned char __cf, unsigned long long __x, + unsigned long long __y, unsigned long long *__p) { + return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p); +} +#endif + +#if defined(__cplusplus) +} +#endif + +#undef __INLINE +#undef __DEFAULT_FN_ATTRS + +#endif /* __ADCINTRIN_H */ diff --git a/lib/include/adxintrin.h b/lib/include/adxintrin.h index 20f6211e56..bc6a4caf35 100644 --- a/lib/include/adxintrin.h +++ b/lib/include/adxintrin.h @@ -15,7 +15,8 @@ #define __ADXINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("adx"))) /* Use C++ inline semantics in C++, GNU inline for C mode. */ #if defined(__cplusplus) @@ -53,10 +54,10 @@ extern "C" { /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. -__INLINE unsigned char - __attribute__((__always_inline__, __nodebug__, __target__("adx"))) - _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y, - unsigned int *__p) { +__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf, + unsigned int __x, + unsigned int __y, + unsigned int *__p) { return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); } @@ -84,137 +85,10 @@ __INLINE unsigned char /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. -__INLINE unsigned char - __attribute__((__always_inline__, __nodebug__, __target__("adx"))) - _addcarryx_u64(unsigned char __cf, unsigned long long __x, - unsigned long long __y, unsigned long long *__p) { - return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); -} -#endif - -/* Intrinsics that are also available if __ADX__ is undefined. */ - -/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated -/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory -/// at \a __p, and returns the 8-bit carry-out (carry flag). -/// -/// \code{.operation} -/// temp := (__cf == 0) ? 0 : 1 -/// Store32(__p, __x + __y + temp) -/// result := CF -/// \endcode -/// -/// \headerfile -/// -/// This intrinsic corresponds to the \c ADC instruction. -/// -/// \param __cf -/// The 8-bit unsigned carry flag; any non-zero value indicates carry. -/// \param __x -/// A 32-bit unsigned addend. -/// \param __y -/// A 32-bit unsigned addend. -/// \param __p -/// Pointer to memory for storing the sum. -/// \returns The 8-bit unsigned carry-out value. -__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf, - unsigned int __x, - unsigned int __y, - unsigned int *__p) { - return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); -} - -#ifdef __x86_64__ -/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated -/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory -/// at \a __p, and returns the 8-bit carry-out (carry flag). -/// -/// \code{.operation} -/// temp := (__cf == 0) ? 0 : 1 -/// Store64(__p, __x + __y + temp) -/// result := CF -/// \endcode -/// -/// \headerfile -/// -/// This intrinsic corresponds to the \c ADC instruction. -/// -/// \param __cf -/// The 8-bit unsigned carry flag; any non-zero value indicates carry. -/// \param __x -/// A 64-bit unsigned addend. -/// \param __y -/// A 64-bit unsigned addend. -/// \param __p -/// Pointer to memory for storing the sum. -/// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS -_addcarry_u64(unsigned char __cf, unsigned long long __x, - unsigned long long __y, unsigned long long *__p) { - return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); -} -#endif - -/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry -/// flag \a __cf, and subtracts the result from unsigned 32-bit integer -/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p, -/// and returns the 8-bit carry-out (carry or overflow flag). -/// -/// \code{.operation} -/// temp := (__cf == 0) ? 0 : 1 -/// Store32(__p, __x - (__y + temp)) -/// result := CF -/// \endcode -/// -/// \headerfile -/// -/// This intrinsic corresponds to the \c SBB instruction. -/// -/// \param __cf -/// The 8-bit unsigned carry flag; any non-zero value indicates carry. -/// \param __x -/// The 32-bit unsigned minuend. -/// \param __y -/// The 32-bit unsigned subtrahend. -/// \param __p -/// Pointer to memory for storing the difference. -/// \returns The 8-bit unsigned carry-out value. -__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf, - unsigned int __x, - unsigned int __y, - unsigned int *__p) { - return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p); -} - -#ifdef __x86_64__ -/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry -/// flag \a __cf, and subtracts the result from unsigned 64-bit integer -/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p, -/// and returns the 8-bit carry-out (carry or overflow flag). -/// -/// \code{.operation} -/// temp := (__cf == 0) ? 0 : 1 -/// Store64(__p, __x - (__y + temp)) -/// result := CF -/// \endcode -/// -/// \headerfile -/// -/// This intrinsic corresponds to the \c ADC instruction. -/// -/// \param __cf -/// The 8-bit unsigned carry flag; any non-zero value indicates carry. -/// \param __x -/// The 64-bit unsigned minuend. -/// \param __y -/// The 64-bit unsigned subtrahend. -/// \param __p -/// Pointer to memory for storing the difference. -/// \returns The 8-bit unsigned carry-out value. -__INLINE unsigned char __DEFAULT_FN_ATTRS -_subborrow_u64(unsigned char __cf, unsigned long long __x, +_addcarryx_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { - return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p); + return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); } #endif @@ -222,6 +96,7 @@ _subborrow_u64(unsigned char __cf, unsigned long long __x, } #endif +#undef __INLINE #undef __DEFAULT_FN_ATTRS #endif /* __ADXINTRIN_H */ diff --git a/lib/include/altivec.h b/lib/include/altivec.h index c036f5ebba..4971631c50 100644 --- a/lib/include/altivec.h +++ b/lib/include/altivec.h @@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b, static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a, int __b) { - vector signed char __res = (vector signed char)(0); - __res[__b & 0x7] = __a; + const vector signed char __zero = (vector signed char)0; + vector signed char __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1); + __res[__b & 0xf] = __a; return __res; } static __inline__ vector unsigned char __ATTRS_o_ai vec_promote(unsigned char __a, int __b) { - vector unsigned char __res = (vector unsigned char)(0); - __res[__b & 0x7] = __a; + const vector unsigned char __zero = (vector unsigned char)(0); + vector unsigned char __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1); + __res[__b & 0xf] = __a; return __res; } static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) { - vector short __res = (vector short)(0); + const vector short __zero = (vector short)(0); + vector short __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0x7] = __a; return __res; } static __inline__ vector unsigned short __ATTRS_o_ai vec_promote(unsigned short __a, int __b) { - vector unsigned short __res = (vector unsigned short)(0); + const vector unsigned short __zero = (vector unsigned short)(0); + vector unsigned short __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0x7] = __a; return __res; } static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) { - vector int __res = (vector int)(0); + const vector int __zero = (vector int)(0); + vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a, int __b) { - vector unsigned int __res = (vector unsigned int)(0); + const vector unsigned int __zero = (vector unsigned int)(0); + vector unsigned int __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) { - vector float __res = (vector float)(0); + const vector float __zero = (vector float)(0); + vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) { - vector double __res = (vector double)(0); + const vector double __zero = (vector double)(0); + vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } static __inline__ vector signed long long __ATTRS_o_ai vec_promote(signed long long __a, int __b) { - vector signed long long __res = (vector signed long long)(0); + const vector signed long long __zero = (vector signed long long)(0); + vector signed long long __res = + __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_promote(unsigned long long __a, int __b) { - vector unsigned long long __res = (vector unsigned long long)(0); + const vector unsigned long long __zero = (vector unsigned long long)(0); + vector unsigned long long __res = + __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } diff --git a/lib/include/ammintrin.h b/lib/include/ammintrin.h index 1af2096595..f549ab80d9 100644 --- a/lib/include/ammintrin.h +++ b/lib/include/ammintrin.h @@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y) /// \param __a /// The 64-bit double-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_sd(double *__p, __m128d __a) +_mm_stream_sd(void *__p, __m128d __a) { - __builtin_ia32_movntsd(__p, (__v2df)__a); + __builtin_ia32_movntsd((double *)__p, (__v2df)__a); } /// Stores a 32-bit single-precision floating-point value in a 32-bit @@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a) /// \param __a /// The 32-bit single-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_ss(float *__p, __m128 __a) +_mm_stream_ss(void *__p, __m128 __a) { - __builtin_ia32_movntss(__p, (__v4sf)__a); + __builtin_ia32_movntss((float *)__p, (__v4sf)__a); } #undef __DEFAULT_FN_ATTRS diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h index c208512bab..9cd34948e3 100644 --- a/lib/include/arm_acle.h +++ b/lib/include/arm_acle.h @@ -4,6 +4,13 @@ * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * + * The Arm C Language Extensions specifications can be found in the following + * link: https://github.com/ARM-software/acle/releases + * + * The ACLE section numbers are subject to change. When consulting the + * specifications, it is recommended to search using section titles if + * the section numbers look outdated. + * *===-----------------------------------------------------------------------=== */ @@ -20,8 +27,8 @@ extern "C" { #endif -/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */ -/* 8.3 Memory barriers */ +/* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */ +/* 7.3 Memory barriers */ #if !__has_builtin(__dmb) #define __dmb(i) __builtin_arm_dmb(i) #endif @@ -32,7 +39,7 @@ extern "C" { #define __isb(i) __builtin_arm_isb(i) #endif -/* 8.4 Hints */ +/* 7.4 Hints */ #if !__has_builtin(__wfi) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) { @@ -68,7 +75,7 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v #define __dbg(t) __builtin_arm_dbg(t) #endif -/* 8.5 Swap */ +/* 7.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { uint32_t v; @@ -78,8 +85,8 @@ __swp(uint32_t __x, volatile uint32_t *__p) { return v; } -/* 8.6 Memory prefetch intrinsics */ -/* 8.6.1 Data prefetch */ +/* 7.6 Memory prefetch intrinsics */ +/* 7.6.1 Data prefetch */ #define __pld(addr) __pldx(0, 0, 0, addr) #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE @@ -90,7 +97,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) { __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) #endif -/* 8.6.2 Instruction prefetch */ +/* 7.6.2 Instruction prefetch */ #define __pli(addr) __plix(0, 0, addr) #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE @@ -101,15 +108,15 @@ __swp(uint32_t __x, volatile uint32_t *__p) { __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0) #endif -/* 8.7 NOP */ +/* 7.7 NOP */ #if !defined(_MSC_VER) || !defined(__aarch64__) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) { __builtin_arm_nop(); } #endif -/* 9 DATA-PROCESSING INTRINSICS */ -/* 9.2 Miscellaneous data-processing intrinsics */ +/* 8 DATA-PROCESSING INTRINSICS */ +/* 8.2 Miscellaneous data-processing intrinsics */ /* ROR */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __ror(uint32_t __x, uint32_t __y) { @@ -248,9 +255,7 @@ __rbitl(unsigned long __t) { #endif } -/* - * 9.3 16-bit multiplications - */ +/* 8.3 16-bit multiplications */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulbb(int32_t __a, int32_t __b) { @@ -279,18 +284,18 @@ __smulwt(int32_t __a, int32_t __b) { #endif /* - * 9.4 Saturating intrinsics + * 8.4 Saturating intrinsics * * FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag * intrinsics are implemented and the flag is enabled. */ -/* 9.4.1 Width-specified saturation intrinsics */ +/* 8.4.1 Width-specified saturation intrinsics */ #if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT #define __ssat(x, y) __builtin_arm_ssat(x, y) #define __usat(x, y) __builtin_arm_usat(x, y) #endif -/* 9.4.2 Saturating addition and subtraction intrinsics */ +/* 8.4.2 Saturating addition and subtraction intrinsics */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qadd(int32_t __t, int32_t __v) { @@ -308,7 +313,7 @@ __qdbl(int32_t __t) { } #endif -/* 9.4.3 Accumultating multiplications */ +/* 8.4.3 Accumultating multiplications */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlabb(int32_t __a, int32_t __b, int32_t __c) { @@ -337,13 +342,13 @@ __smlawt(int32_t __a, int32_t __b, int32_t __c) { #endif -/* 9.5.4 Parallel 16-bit saturation */ +/* 8.5.4 Parallel 16-bit saturation */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 #define __ssat16(x, y) __builtin_arm_ssat16(x, y) #define __usat16(x, y) __builtin_arm_usat16(x, y) #endif -/* 9.5.5 Packing and unpacking */ +/* 8.5.5 Packing and unpacking */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 typedef int32_t int8x4_t; typedef int32_t int16x2_t; @@ -368,7 +373,7 @@ __uxtb16(int8x4_t __a) { } #endif -/* 9.5.6 Parallel selection */ +/* 8.5.6 Parallel selection */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __sel(uint8x4_t __a, uint8x4_t __b) { @@ -376,7 +381,7 @@ __sel(uint8x4_t __a, uint8x4_t __b) { } #endif -/* 9.5.7 Parallel 8-bit addition and subtraction */ +/* 8.5.7 Parallel 8-bit addition and subtraction */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __qadd8(int8x4_t __a, int8x4_t __b) { @@ -428,7 +433,7 @@ __usub8(uint8x4_t __a, uint8x4_t __b) { } #endif -/* 9.5.8 Sum of 8-bit absolute differences */ +/* 8.5.8 Sum of 8-bit absolute differences */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __usad8(uint8x4_t __a, uint8x4_t __b) { @@ -440,7 +445,7 @@ __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) { } #endif -/* 9.5.9 Parallel 16-bit addition and subtraction */ +/* 8.5.9 Parallel 16-bit addition and subtraction */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qadd16(int16x2_t __a, int16x2_t __b) { @@ -540,7 +545,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) { } #endif -/* 9.5.10 Parallel 16-bit multiplications */ +/* 8.5.10 Parallel 16-bit multiplications */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) { @@ -592,7 +597,22 @@ __smusdx(int16x2_t __a, int16x2_t __b) { } #endif -/* 9.7 CRC32 intrinsics */ +/* 8.6 Floating-point data-processing intrinsics */ +#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING) && \ + (__ARM_FEATURE_DIRECTED_ROUNDING)) && \ + (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) +static __inline__ double __attribute__((__always_inline__, __nodebug__)) +__rintn(double __a) { + return __builtin_roundeven(__a); +} + +static __inline__ float __attribute__((__always_inline__, __nodebug__)) +__rintnf(float __a) { + return __builtin_roundevenf(__a); +} +#endif + +/* 8.8 CRC32 intrinsics */ #if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \ (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) @@ -636,6 +656,7 @@ __crc32cd(uint32_t __a, uint64_t __b) { } #endif +/* 8.6 Floating-point data-processing intrinsics */ /* Armv8.3-A Javascript conversion intrinsic */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a"))) @@ -687,7 +708,7 @@ __rint64x(double __a) { } #endif -/* Armv8.7-A load/store 64-byte intrinsics */ +/* 8.9 Armv8.7-A load/store 64-byte intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE typedef struct { uint64_t val[8]; @@ -713,7 +734,7 @@ __arm_st64bv0(void *__addr, data512_t __value) { } #endif -/* 10.1 Special register intrinsics */ +/* 11.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) #define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg) @@ -727,7 +748,7 @@ __arm_st64bv0(void *__addr, data512_t __value) { #define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v)) #define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v)) -/* Memory Tagging Extensions (MTE) Intrinsics */ +/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE #define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask) #define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset) @@ -736,12 +757,71 @@ __arm_st64bv0(void *__addr, data512_t __value) { #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr) #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) -/* Memory Operations Intrinsics */ +/* 18 Memory Operations Intrinsics */ #define __arm_mops_memset_tag(__tagged_address, __value, __size) \ __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) #endif -/* Transactional Memory Extension (TME) Intrinsics */ +/* 11.3 Coprocessor Intrinsics */ +#if defined(__ARM_FEATURE_COPROC) + +#if (__ARM_FEATURE_COPROC & 0x1) + +#if (__ARM_ARCH < 8) +#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \ + __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) +#endif /* __ARM_ARCH < 8 */ + +#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p) +#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p) + +#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \ + __builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2) +#define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \ + __builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2) + +#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8) +#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) +#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) +#endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */ + +#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__) +#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \ + __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) +#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) +#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) +#endif /* ___ARM_ARCH_8M_MAIN__ */ + +#endif /* __ARM_FEATURE_COPROC & 0x1 */ + +#if (__ARM_FEATURE_COPROC & 0x2) +#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \ + __builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) +#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p) +#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p) +#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p) +#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p) +#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \ + __builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) +#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \ + __builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2) +#endif + +#if (__ARM_FEATURE_COPROC & 0x4) +#define __arm_mcrr(coproc, opc1, value, CRm) \ + __builtin_arm_mcrr(coproc, opc1, value, CRm) +#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm) +#endif + +#if (__ARM_FEATURE_COPROC & 0x8) +#define __arm_mcrr2(coproc, opc1, value, CRm) \ + __builtin_arm_mcrr2(coproc, opc1, value, CRm) +#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm) +#endif + +#endif // __ARM_FEATURE_COPROC + +/* 17 Transactional Memory Extension (TME) Intrinsics */ #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME #define _TMFAILURE_REASON 0x00007fffu @@ -763,7 +843,7 @@ __arm_st64bv0(void *__addr, data512_t __value) { #endif /* __ARM_FEATURE_TME */ -/* Armv8.5-A Random number generation intrinsics */ +/* 8.7 Armv8.5-A Random number generation intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) __rndr(uint64_t *__p) { diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h index f1aae6efd1..97431fccab 100644 --- a/lib/include/arm_neon.h +++ b/lib/include/arm_neon.h @@ -35,12 +35,7 @@ #include #include -typedef float float32_t; -typedef __fp16 float16_t; -#ifdef __aarch64__ -typedef double float64_t; -#endif - +#include #ifdef __aarch64__ typedef uint8_t poly8_t; typedef uint16_t poly16_t; @@ -51,30 +46,6 @@ typedef int8_t poly8_t; typedef int16_t poly16_t; typedef int64_t poly64_t; #endif -typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; -typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; -typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; -typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; -typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; -typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; -typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; -typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; -typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; -typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; -typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; -typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; -typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; -typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; -typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; -typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; -typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; -typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; -typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; -typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; -#ifdef __aarch64__ -typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t; -typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t; -#endif typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; @@ -82,96 +53,6 @@ typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t; typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t; -typedef struct int8x8x2_t { - int8x8_t val[2]; -} int8x8x2_t; - -typedef struct int8x16x2_t { - int8x16_t val[2]; -} int8x16x2_t; - -typedef struct int16x4x2_t { - int16x4_t val[2]; -} int16x4x2_t; - -typedef struct int16x8x2_t { - int16x8_t val[2]; -} int16x8x2_t; - -typedef struct int32x2x2_t { - int32x2_t val[2]; -} int32x2x2_t; - -typedef struct int32x4x2_t { - int32x4_t val[2]; -} int32x4x2_t; - -typedef struct int64x1x2_t { - int64x1_t val[2]; -} int64x1x2_t; - -typedef struct int64x2x2_t { - int64x2_t val[2]; -} int64x2x2_t; - -typedef struct uint8x8x2_t { - uint8x8_t val[2]; -} uint8x8x2_t; - -typedef struct uint8x16x2_t { - uint8x16_t val[2]; -} uint8x16x2_t; - -typedef struct uint16x4x2_t { - uint16x4_t val[2]; -} uint16x4x2_t; - -typedef struct uint16x8x2_t { - uint16x8_t val[2]; -} uint16x8x2_t; - -typedef struct uint32x2x2_t { - uint32x2_t val[2]; -} uint32x2x2_t; - -typedef struct uint32x4x2_t { - uint32x4_t val[2]; -} uint32x4x2_t; - -typedef struct uint64x1x2_t { - uint64x1_t val[2]; -} uint64x1x2_t; - -typedef struct uint64x2x2_t { - uint64x2_t val[2]; -} uint64x2x2_t; - -typedef struct float16x4x2_t { - float16x4_t val[2]; -} float16x4x2_t; - -typedef struct float16x8x2_t { - float16x8_t val[2]; -} float16x8x2_t; - -typedef struct float32x2x2_t { - float32x2_t val[2]; -} float32x2x2_t; - -typedef struct float32x4x2_t { - float32x4_t val[2]; -} float32x4x2_t; - -#ifdef __aarch64__ -typedef struct float64x1x2_t { - float64x1_t val[2]; -} float64x1x2_t; - -typedef struct float64x2x2_t { - float64x2_t val[2]; -} float64x2x2_t; - -#endif typedef struct poly8x8x2_t { poly8x8_t val[2]; } poly8x8x2_t; @@ -196,96 +77,6 @@ typedef struct poly64x2x2_t { poly64x2_t val[2]; } poly64x2x2_t; -typedef struct int8x8x3_t { - int8x8_t val[3]; -} int8x8x3_t; - -typedef struct int8x16x3_t { - int8x16_t val[3]; -} int8x16x3_t; - -typedef struct int16x4x3_t { - int16x4_t val[3]; -} int16x4x3_t; - -typedef struct int16x8x3_t { - int16x8_t val[3]; -} int16x8x3_t; - -typedef struct int32x2x3_t { - int32x2_t val[3]; -} int32x2x3_t; - -typedef struct int32x4x3_t { - int32x4_t val[3]; -} int32x4x3_t; - -typedef struct int64x1x3_t { - int64x1_t val[3]; -} int64x1x3_t; - -typedef struct int64x2x3_t { - int64x2_t val[3]; -} int64x2x3_t; - -typedef struct uint8x8x3_t { - uint8x8_t val[3]; -} uint8x8x3_t; - -typedef struct uint8x16x3_t { - uint8x16_t val[3]; -} uint8x16x3_t; - -typedef struct uint16x4x3_t { - uint16x4_t val[3]; -} uint16x4x3_t; - -typedef struct uint16x8x3_t { - uint16x8_t val[3]; -} uint16x8x3_t; - -typedef struct uint32x2x3_t { - uint32x2_t val[3]; -} uint32x2x3_t; - -typedef struct uint32x4x3_t { - uint32x4_t val[3]; -} uint32x4x3_t; - -typedef struct uint64x1x3_t { - uint64x1_t val[3]; -} uint64x1x3_t; - -typedef struct uint64x2x3_t { - uint64x2_t val[3]; -} uint64x2x3_t; - -typedef struct float16x4x3_t { - float16x4_t val[3]; -} float16x4x3_t; - -typedef struct float16x8x3_t { - float16x8_t val[3]; -} float16x8x3_t; - -typedef struct float32x2x3_t { - float32x2_t val[3]; -} float32x2x3_t; - -typedef struct float32x4x3_t { - float32x4_t val[3]; -} float32x4x3_t; - -#ifdef __aarch64__ -typedef struct float64x1x3_t { - float64x1_t val[3]; -} float64x1x3_t; - -typedef struct float64x2x3_t { - float64x2_t val[3]; -} float64x2x3_t; - -#endif typedef struct poly8x8x3_t { poly8x8_t val[3]; } poly8x8x3_t; @@ -310,96 +101,6 @@ typedef struct poly64x2x3_t { poly64x2_t val[3]; } poly64x2x3_t; -typedef struct int8x8x4_t { - int8x8_t val[4]; -} int8x8x4_t; - -typedef struct int8x16x4_t { - int8x16_t val[4]; -} int8x16x4_t; - -typedef struct int16x4x4_t { - int16x4_t val[4]; -} int16x4x4_t; - -typedef struct int16x8x4_t { - int16x8_t val[4]; -} int16x8x4_t; - -typedef struct int32x2x4_t { - int32x2_t val[4]; -} int32x2x4_t; - -typedef struct int32x4x4_t { - int32x4_t val[4]; -} int32x4x4_t; - -typedef struct int64x1x4_t { - int64x1_t val[4]; -} int64x1x4_t; - -typedef struct int64x2x4_t { - int64x2_t val[4]; -} int64x2x4_t; - -typedef struct uint8x8x4_t { - uint8x8_t val[4]; -} uint8x8x4_t; - -typedef struct uint8x16x4_t { - uint8x16_t val[4]; -} uint8x16x4_t; - -typedef struct uint16x4x4_t { - uint16x4_t val[4]; -} uint16x4x4_t; - -typedef struct uint16x8x4_t { - uint16x8_t val[4]; -} uint16x8x4_t; - -typedef struct uint32x2x4_t { - uint32x2_t val[4]; -} uint32x2x4_t; - -typedef struct uint32x4x4_t { - uint32x4_t val[4]; -} uint32x4x4_t; - -typedef struct uint64x1x4_t { - uint64x1_t val[4]; -} uint64x1x4_t; - -typedef struct uint64x2x4_t { - uint64x2_t val[4]; -} uint64x2x4_t; - -typedef struct float16x4x4_t { - float16x4_t val[4]; -} float16x4x4_t; - -typedef struct float16x8x4_t { - float16x8_t val[4]; -} float16x8x4_t; - -typedef struct float32x2x4_t { - float32x2_t val[4]; -} float32x2x4_t; - -typedef struct float32x4x4_t { - float32x4_t val[4]; -} float32x4x4_t; - -#ifdef __aarch64__ -typedef struct float64x1x4_t { - float64x1_t val[4]; -} float64x1x4_t; - -typedef struct float64x2x4_t { - float64x2_t val[4]; -} float64x2x4_t; - -#endif typedef struct poly8x8x4_t { poly8x8_t val[4]; } poly8x8x4_t; @@ -424,33 +125,6 @@ typedef struct poly64x2x4_t { poly64x2_t val[4]; } poly64x2x4_t; -typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t; -typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t; - -typedef struct bfloat16x4x2_t { - bfloat16x4_t val[2]; -} bfloat16x4x2_t; - -typedef struct bfloat16x8x2_t { - bfloat16x8_t val[2]; -} bfloat16x8x2_t; - -typedef struct bfloat16x4x3_t { - bfloat16x4_t val[3]; -} bfloat16x4x3_t; - -typedef struct bfloat16x8x3_t { - bfloat16x8_t val[3]; -} bfloat16x8x3_t; - -typedef struct bfloat16x4x4_t { - bfloat16x4_t val[4]; -} bfloat16x4x4_t; - -typedef struct bfloat16x8x4_t { - bfloat16x8_t val[4]; -} bfloat16x8x4_t; - #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #ifdef __LITTLE_ENDIAN__ @@ -66600,6 +66274,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0) } #endif +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42); + return __ret; +} +#else +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) { + float64x2_t __ret; + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32x_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) { float32x4_t __ret; @@ -66632,6 +66327,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0) } #endif +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42); + return __ret; +} +#else +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) { + float64x2_t __ret; + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32z_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) { float32x4_t __ret; @@ -66664,6 +66380,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0) } #endif +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42); + return __ret; +} +#else +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) { + float64x2_t __ret; + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64x_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) { float32x4_t __ret; @@ -66696,6 +66433,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0) } #endif +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42); + return __ret; +} +#else +__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) { + float64x2_t __ret; + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64z_f64(float64x1_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10); + return __ret; +} #endif #if defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING) #ifdef __LITTLE_ENDIAN__ diff --git a/lib/include/arm_sme.h b/lib/include/arm_sme.h new file mode 100644 index 0000000000..2ed316f260 --- /dev/null +++ b/lib/include/arm_sme.h @@ -0,0 +1,2412 @@ +/*===---- arm_sme.h - ARM SME intrinsics ------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ARM_SME_H +#define __ARM_SME_H + +#if !defined(__LITTLE_ENDIAN__) +#error "Big endian is currently not supported for arm_sme.h" +#endif +#include + +/* Function attributes */ +#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) + +#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__)) + +#ifdef __cplusplus +extern "C" { +#endif + +void __arm_za_disable(void) __arm_streaming_compatible; + +__ai bool __arm_has_sme(void) __arm_streaming_compatible { + uint64_t x0, x1; + __builtin_arm_get_sme_state(&x0, &x1); + return x0 & (1ULL << 63); +} + +__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible { + uint64_t x0, x1; + __builtin_arm_get_sme_state(&x0, &x1); + return x0 & 1; +} + +__ai __attribute__((target("sme"))) void svundef_za(void) __arm_streaming_compatible __arm_out("za") { } + +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m))) +void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m))) +void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m))) +void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m))) +void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb))) +uint64_t svcntsb(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd))) +uint64_t svcntsd(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh))) +uint64_t svcntsh(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw))) +uint64_t svcntsw(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128))) +void svld1_hor_vnum_za128(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16))) +void svld1_hor_vnum_za16(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32))) +void svld1_hor_vnum_za32(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64))) +void svld1_hor_vnum_za64(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8))) +void svld1_hor_vnum_za8(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128))) +void svld1_hor_za128(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16))) +void svld1_hor_za16(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32))) +void svld1_hor_za32(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64))) +void svld1_hor_za64(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8))) +void svld1_hor_za8(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128))) +void svld1_ver_vnum_za128(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16))) +void svld1_ver_vnum_za16(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32))) +void svld1_ver_vnum_za32(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64))) +void svld1_ver_vnum_za64(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8))) +void svld1_ver_vnum_za8(uint64_t, uint32_t, svbool_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128))) +void svld1_ver_za128(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16))) +void svld1_ver_za16(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32))) +void svld1_ver_za32(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64))) +void svld1_ver_za64(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8))) +void svld1_ver_za8(uint64_t, uint32_t, svbool_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za))) +void svldr_vnum_za(uint32_t, void const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_za))) +void svldr_za(uint32_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m))) +void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m))) +void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m))) +void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m))) +void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m))) +void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m))) +void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m))) +void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m))) +void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m))) +void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m))) +void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m))) +svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m))) +svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m))) +svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m))) +svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m))) +svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m))) +svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m))) +svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m))) +svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m))) +svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m))) +svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m))) +svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m))) +svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m))) +svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m))) +svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m))) +svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m))) +svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m))) +svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m))) +svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m))) +svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m))) +svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m))) +svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m))) +svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m))) +svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m))) +svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m))) +svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m))) +svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m))) +svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m))) +svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m))) +svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m))) +svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m))) +svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m))) +svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m))) +svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m))) +svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m))) +svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m))) +svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m))) +svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m))) +svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m))) +svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m))) +svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m))) +svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m))) +svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m))) +svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m))) +svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m))) +svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m))) +svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m))) +svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m))) +svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128))) +void svst1_hor_vnum_za128(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16))) +void svst1_hor_vnum_za16(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32))) +void svst1_hor_vnum_za32(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64))) +void svst1_hor_vnum_za64(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8))) +void svst1_hor_vnum_za8(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128))) +void svst1_hor_za128(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16))) +void svst1_hor_za16(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32))) +void svst1_hor_za32(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64))) +void svst1_hor_za64(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8))) +void svst1_hor_za8(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128))) +void svst1_ver_vnum_za128(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16))) +void svst1_ver_vnum_za16(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32))) +void svst1_ver_vnum_za32(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64))) +void svst1_ver_vnum_za64(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8))) +void svst1_ver_vnum_za8(uint64_t, uint32_t, svbool_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128))) +void svst1_ver_za128(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16))) +void svst1_ver_za16(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32))) +void svst1_ver_za32(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64))) +void svst1_ver_za64(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8))) +void svst1_ver_za8(uint64_t, uint32_t, svbool_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za))) +void svstr_vnum_za(uint32_t, void *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_za))) +void svstr_za(uint32_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m))) +void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m))) +void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m))) +void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m))) +void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m))) +void svwrite_hor_za128_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m))) +void svwrite_hor_za128_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m))) +void svwrite_hor_za128_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m))) +void svwrite_hor_za128_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m))) +void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m))) +void svwrite_hor_za128_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m))) +void svwrite_hor_za128_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m))) +void svwrite_hor_za128_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m))) +void svwrite_hor_za128_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m))) +void svwrite_hor_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m))) +void svwrite_hor_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m))) +void svwrite_hor_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m))) +void svwrite_hor_za16_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m))) +void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m))) +void svwrite_hor_za16_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m))) +void svwrite_hor_za16_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m))) +void svwrite_hor_za32_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m))) +void svwrite_hor_za32_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m))) +void svwrite_hor_za32_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m))) +void svwrite_hor_za64_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m))) +void svwrite_hor_za64_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m))) +void svwrite_hor_za64_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m))) +void svwrite_hor_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m))) +void svwrite_hor_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m))) +void svwrite_ver_za128_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m))) +void svwrite_ver_za128_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m))) +void svwrite_ver_za128_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m))) +void svwrite_ver_za128_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m))) +void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m))) +void svwrite_ver_za128_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m))) +void svwrite_ver_za128_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m))) +void svwrite_ver_za128_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m))) +void svwrite_ver_za128_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m))) +void svwrite_ver_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m))) +void svwrite_ver_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m))) +void svwrite_ver_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m))) +void svwrite_ver_za16_u16_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m))) +void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m))) +void svwrite_ver_za16_f16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m))) +void svwrite_ver_za16_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m))) +void svwrite_ver_za32_u32_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m))) +void svwrite_ver_za32_f32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m))) +void svwrite_ver_za32_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m))) +void svwrite_ver_za64_u64_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m))) +void svwrite_ver_za64_f64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m))) +void svwrite_ver_za64_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m))) +void svwrite_ver_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m))) +void svwrite_ver_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za))) +void svzero_mask_za(uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za))) +void svzero_za(); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m))) +void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m))) +void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m))) +void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m))) +void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m))) +void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m))) +void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m))) +void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m))) +void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m))) +void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m))) +void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m))) +void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m))) +void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m))) +void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m))) +void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m))) +svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m))) +svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m))) +svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m))) +svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m))) +svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m))) +svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m))) +svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m))) +svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m))) +svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m))) +svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m))) +svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m))) +svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m))) +svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m))) +svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m))) +svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m))) +svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m))) +svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m))) +svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m))) +svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m))) +svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m))) +svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m))) +svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m))) +svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m))) +svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m))) +svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m))) +svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m))) +svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m))) +svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m))) +svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m))) +svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m))) +svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m))) +svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m))) +svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m))) +svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m))) +svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m))) +svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m))) +svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m))) +svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m))) +svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m))) +svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m))) +svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m))) +svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m))) +svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m))) +svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m))) +svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m))) +svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m))) +svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m))) +svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m))) +void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m))) +void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m))) +void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m))) +void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m))) +void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m))) +void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m))) +void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m))) +void svwrite_hor_za16_m(uint64_t, uint32_t, svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m))) +void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m))) +void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m))) +void svwrite_hor_za32_m(uint64_t, uint32_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m))) +void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m))) +void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m))) +void svwrite_hor_za64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m))) +void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m))) +void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m))) +void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m))) +void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m))) +void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m))) +void svwrite_ver_za16_m(uint64_t, uint32_t, svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m))) +void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m))) +void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m))) +void svwrite_ver_za32_m(uint64_t, uint32_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m))) +void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m))) +void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m))) +void svwrite_ver_za64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m))) +void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m))) +void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m))) +void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m))) +void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m))) +void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m))) +void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m))) +void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m))) +void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m))) +void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m))) +void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m))) +void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m))) +void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m))) +void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m))) +void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m))) +void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m))) +void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m))) +void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m))) +void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m))) +void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m))) +void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m))) +void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m))) +void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m))) +void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m))) +void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m))) +void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m))) +void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m))) +void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m))) +void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m))) +void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m))) +void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2))) +void svadd_write_single_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x2))) +void svadd_write_single_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x4))) +void svadd_write_single_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x4))) +void svadd_write_single_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x2))) +void svadd_write_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x2))) +void svadd_write_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x4))) +void svadd_write_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x4))) +void svadd_write_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x2))) +void svadd_za32_u32_vg1x2(uint32_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x2))) +void svadd_za32_f32_vg1x2(uint32_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x2))) +void svadd_za32_s32_vg1x2(uint32_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x4))) +void svadd_za32_u32_vg1x4(uint32_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x4))) +void svadd_za32_f32_vg1x4(uint32_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x4))) +void svadd_za32_s32_vg1x4(uint32_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_u32_m))) +void svbmopa_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_s32_m))) +void svbmopa_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_u32_m))) +void svbmops_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_s32_m))) +void svbmops_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x2))) +void svdot_single_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x2))) +void svdot_single_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x2))) +void svdot_single_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x2))) +void svdot_single_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x2))) +void svdot_single_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x2))) +void svdot_single_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x4))) +void svdot_single_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x4))) +void svdot_single_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x4))) +void svdot_single_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x4))) +void svdot_single_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x4))) +void svdot_single_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x4))) +void svdot_single_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x2))) +void svdot_lane_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x2))) +void svdot_lane_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x2))) +void svdot_lane_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x2))) +void svdot_lane_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x2))) +void svdot_lane_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x2))) +void svdot_lane_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x4))) +void svdot_lane_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x4))) +void svdot_lane_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x4))) +void svdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x4))) +void svdot_lane_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x4))) +void svdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x4))) +void svdot_lane_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x2))) +void svdot_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x2))) +void svdot_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x2))) +void svdot_za32_s8_vg1x2(uint32_t, svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x2))) +void svdot_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x2))) +void svdot_za32_u8_vg1x2(uint32_t, svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x2))) +void svdot_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x4))) +void svdot_za32_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x4))) +void svdot_za32_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x4))) +void svdot_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x4))) +void svdot_za32_s16_vg1x4(uint32_t, svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x4))) +void svdot_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x4))) +void svdot_za32_u16_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_zt))) +void svldr_zt(uint64_t, void const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8))) +svuint8_t svluti2_lane_zt_u8(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32))) +svuint32_t svluti2_lane_zt_u32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16))) +svuint16_t svluti2_lane_zt_u16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16))) +svbfloat16_t svluti2_lane_zt_bf16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8))) +svint8_t svluti2_lane_zt_s8(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32))) +svfloat32_t svluti2_lane_zt_f32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16))) +svfloat16_t svluti2_lane_zt_f16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32))) +svint32_t svluti2_lane_zt_s32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16))) +svint16_t svluti2_lane_zt_s16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x2))) +svuint8x2_t svluti2_lane_zt_u8_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32_x2))) +svuint32x2_t svluti2_lane_zt_u32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16_x2))) +svuint16x2_t svluti2_lane_zt_u16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16_x2))) +svbfloat16x2_t svluti2_lane_zt_bf16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8_x2))) +svint8x2_t svluti2_lane_zt_s8_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32_x2))) +svfloat32x2_t svluti2_lane_zt_f32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_x2))) +svfloat16x2_t svluti2_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x2))) +svint32x2_t svluti2_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x2))) +svint16x2_t svluti2_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x4))) +svuint8x4_t svluti2_lane_zt_u8_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u32_x4))) +svuint32x4_t svluti2_lane_zt_u32_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u16_x4))) +svuint16x4_t svluti2_lane_zt_u16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_bf16_x4))) +svbfloat16x4_t svluti2_lane_zt_bf16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s8_x4))) +svint8x4_t svluti2_lane_zt_s8_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f32_x4))) +svfloat32x4_t svluti2_lane_zt_f32_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_x4))) +svfloat16x4_t svluti2_lane_zt_f16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x4))) +svint32x4_t svluti2_lane_zt_s32_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x4))) +svint16x4_t svluti2_lane_zt_s16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8))) +svuint8_t svluti4_lane_zt_u8(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32))) +svuint32_t svluti4_lane_zt_u32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16))) +svuint16_t svluti4_lane_zt_u16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16))) +svbfloat16_t svluti4_lane_zt_bf16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s8))) +svint8_t svluti4_lane_zt_s8(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32))) +svfloat32_t svluti4_lane_zt_f32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16))) +svfloat16_t svluti4_lane_zt_f16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32))) +svint32_t svluti4_lane_zt_s32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16))) +svint16_t svluti4_lane_zt_s16(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8_x2))) +svuint8x2_t svluti4_lane_zt_u8_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32_x2))) +svuint32x2_t svluti4_lane_zt_u32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16_x2))) +svuint16x2_t svluti4_lane_zt_u16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16_x2))) +svbfloat16x2_t svluti4_lane_zt_bf16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s8_x2))) +svint8x2_t svluti4_lane_zt_s8_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32_x2))) +svfloat32x2_t svluti4_lane_zt_f32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16_x2))) +svfloat16x2_t svluti4_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32_x2))) +svint32x2_t svluti4_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16_x2))) +svint16x2_t svluti4_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32_x4))) +svuint32x4_t svluti4_lane_zt_u32_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u16_x4))) +svuint16x4_t svluti4_lane_zt_u16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_bf16_x4))) +svbfloat16x4_t svluti4_lane_zt_bf16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f32_x4))) +svfloat32x4_t svluti4_lane_zt_f32_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16_x4))) +svfloat16x4_t svluti4_lane_zt_f16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32_x4))) +svint32x4_t svluti4_lane_zt_s32_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16_x4))) +svint16x4_t svluti4_lane_zt_s16_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x2))) +void svmla_single_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x4))) +void svmla_single_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x2))) +void svmla_single_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x2))) +void svmla_single_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x2))) +void svmla_single_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x2))) +void svmla_single_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x4))) +void svmla_single_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x4))) +void svmla_single_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x4))) +void svmla_single_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x4))) +void svmla_single_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x2))) +void svmla_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x2))) +void svmla_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x4))) +void svmla_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x4))) +void svmla_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x2))) +void svmla_lane_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x4))) +void svmla_lane_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x1))) +void svmla_lane_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x1))) +void svmla_lane_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x1))) +void svmla_lane_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x1))) +void svmla_lane_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x2))) +void svmla_lane_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x2))) +void svmla_lane_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x2))) +void svmla_lane_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x2))) +void svmla_lane_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x4))) +void svmla_lane_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x4))) +void svmla_lane_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x4))) +void svmla_lane_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x4))) +void svmla_lane_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x1))) +void svmla_lane_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x1))) +void svmla_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x2))) +void svmla_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x2))) +void svmla_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x4))) +void svmla_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x4))) +void svmla_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x2))) +void svmla_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x4))) +void svmla_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x1))) +void svmla_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x1))) +void svmla_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x1))) +void svmla_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x1))) +void svmla_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x2))) +void svmla_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x2))) +void svmla_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x2))) +void svmla_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x2))) +void svmla_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x4))) +void svmla_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x4))) +void svmla_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x4))) +void svmla_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x4))) +void svmla_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x1))) +void svmla_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x1))) +void svmla_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x2))) +void svmla_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x2))) +void svmla_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x4))) +void svmla_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x4))) +void svmla_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x2))) +void svmls_single_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x4))) +void svmls_single_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x2))) +void svmls_single_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x2))) +void svmls_single_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x2))) +void svmls_single_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x2))) +void svmls_single_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x4))) +void svmls_single_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x4))) +void svmls_single_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x4))) +void svmls_single_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x4))) +void svmls_single_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x2))) +void svmls_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x2))) +void svmls_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x4))) +void svmls_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x4))) +void svmls_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x2))) +void svmls_lane_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x4))) +void svmls_lane_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x1))) +void svmls_lane_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x1))) +void svmls_lane_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x1))) +void svmls_lane_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x1))) +void svmls_lane_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x2))) +void svmls_lane_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x2))) +void svmls_lane_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x2))) +void svmls_lane_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x2))) +void svmls_lane_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x4))) +void svmls_lane_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x4))) +void svmls_lane_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x4))) +void svmls_lane_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x4))) +void svmls_lane_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x1))) +void svmls_lane_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x1))) +void svmls_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x2))) +void svmls_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x2))) +void svmls_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x4))) +void svmls_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x4))) +void svmls_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x2))) +void svmls_za32_f32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x4))) +void svmls_za32_f32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x1))) +void svmls_za32_bf16_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x1))) +void svmls_za32_f16_vg2x1(uint32_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x1))) +void svmls_za32_s16_vg2x1(uint32_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x1))) +void svmls_za32_u16_vg2x1(uint32_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x2))) +void svmls_za32_bf16_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x2))) +void svmls_za32_f16_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x2))) +void svmls_za32_s16_vg2x2(uint32_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x2))) +void svmls_za32_u16_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x4))) +void svmls_za32_bf16_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x4))) +void svmls_za32_f16_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x4))) +void svmls_za32_s16_vg2x4(uint32_t, svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x4))) +void svmls_za32_u16_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x1))) +void svmls_za32_s8_vg4x1(uint32_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x1))) +void svmls_za32_u8_vg4x1(uint32_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x2))) +void svmls_za32_s8_vg4x2(uint32_t, svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x2))) +void svmls_za32_u8_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x4))) +void svmls_za32_s8_vg4x4(uint32_t, svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x4))) +void svmls_za32_u8_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s16_m))) +void svmopa_za32_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u16_m))) +void svmopa_za32_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s16_m))) +void svmops_za32_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u16_m))) +void svmops_za32_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_vg2))) +svuint16x2_t svread_hor_za16_u16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_vg2))) +svbfloat16x2_t svread_hor_za16_bf16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_vg2))) +svfloat16x2_t svread_hor_za16_f16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_vg2))) +svint16x2_t svread_hor_za16_s16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_vg4))) +svuint16x4_t svread_hor_za16_u16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_vg4))) +svbfloat16x4_t svread_hor_za16_bf16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_vg4))) +svfloat16x4_t svread_hor_za16_f16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_vg4))) +svint16x4_t svread_hor_za16_s16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_vg2))) +svuint32x2_t svread_hor_za32_u32_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_vg2))) +svfloat32x2_t svread_hor_za32_f32_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_vg2))) +svint32x2_t svread_hor_za32_s32_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_vg4))) +svuint32x4_t svread_hor_za32_u32_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_vg4))) +svfloat32x4_t svread_hor_za32_f32_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_vg4))) +svint32x4_t svread_hor_za32_s32_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_vg2))) +svuint64x2_t svread_hor_za64_u64_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_vg2))) +svfloat64x2_t svread_hor_za64_f64_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_vg2))) +svint64x2_t svread_hor_za64_s64_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_vg4))) +svuint64x4_t svread_hor_za64_u64_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_vg4))) +svfloat64x4_t svread_hor_za64_f64_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_vg4))) +svint64x4_t svread_hor_za64_s64_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg2))) +svuint8x2_t svread_hor_za8_u8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg2))) +svint8x2_t svread_hor_za8_s8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg4))) +svuint8x4_t svread_hor_za8_u8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg4))) +svint8x4_t svread_hor_za8_s8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_vg2))) +svuint16x2_t svread_ver_za16_u16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_vg2))) +svbfloat16x2_t svread_ver_za16_bf16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_vg2))) +svfloat16x2_t svread_ver_za16_f16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_vg2))) +svint16x2_t svread_ver_za16_s16_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_vg4))) +svuint16x4_t svread_ver_za16_u16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_vg4))) +svbfloat16x4_t svread_ver_za16_bf16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_vg4))) +svfloat16x4_t svread_ver_za16_f16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_vg4))) +svint16x4_t svread_ver_za16_s16_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_vg2))) +svuint32x2_t svread_ver_za32_u32_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_vg2))) +svfloat32x2_t svread_ver_za32_f32_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_vg2))) +svint32x2_t svread_ver_za32_s32_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_vg4))) +svuint32x4_t svread_ver_za32_u32_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_vg4))) +svfloat32x4_t svread_ver_za32_f32_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_vg4))) +svint32x4_t svread_ver_za32_s32_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_vg2))) +svuint64x2_t svread_ver_za64_u64_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_vg2))) +svfloat64x2_t svread_ver_za64_f64_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_vg2))) +svint64x2_t svread_ver_za64_s64_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_vg4))) +svuint64x4_t svread_ver_za64_u64_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_vg4))) +svfloat64x4_t svread_ver_za64_f64_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_vg4))) +svint64x4_t svread_ver_za64_s64_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg2))) +svuint8x2_t svread_ver_za8_u8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg2))) +svint8x2_t svread_ver_za8_s8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg4))) +svuint8x4_t svread_ver_za8_u8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg4))) +svint8x4_t svread_ver_za8_s8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_u16_vg1x2))) +svuint16x2_t svread_za16_u16_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_bf16_vg1x2))) +svbfloat16x2_t svread_za16_bf16_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_f16_vg1x2))) +svfloat16x2_t svread_za16_f16_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_s16_vg1x2))) +svint16x2_t svread_za16_s16_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_u16_vg1x4))) +svuint16x4_t svread_za16_u16_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_bf16_vg1x4))) +svbfloat16x4_t svread_za16_bf16_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_f16_vg1x4))) +svfloat16x4_t svread_za16_f16_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_s16_vg1x4))) +svint16x4_t svread_za16_s16_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_u32_vg1x2))) +svuint32x2_t svread_za32_u32_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_f32_vg1x2))) +svfloat32x2_t svread_za32_f32_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_s32_vg1x2))) +svint32x2_t svread_za32_s32_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_u32_vg1x4))) +svuint32x4_t svread_za32_u32_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_f32_vg1x4))) +svfloat32x4_t svread_za32_f32_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za32_s32_vg1x4))) +svint32x4_t svread_za32_s32_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_u64_vg1x2))) +svuint64x2_t svread_za64_u64_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_f64_vg1x2))) +svfloat64x2_t svread_za64_f64_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_s64_vg1x2))) +svint64x2_t svread_za64_s64_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_u64_vg1x4))) +svuint64x4_t svread_za64_u64_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_f64_vg1x4))) +svfloat64x4_t svread_za64_f64_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za64_s64_vg1x4))) +svint64x4_t svread_za64_s64_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x2))) +svuint8x2_t svread_za8_u8_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x2))) +svint8x2_t svread_za8_s8_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x4))) +svuint8x4_t svread_za8_u8_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x4))) +svint8x4_t svread_za8_s8_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_zt))) +void svstr_zt(uint64_t, void *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x2))) +void svsub_write_single_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x2))) +void svsub_write_single_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x4))) +void svsub_write_single_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x4))) +void svsub_write_single_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x2))) +void svsub_write_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x2))) +void svsub_write_za32_s32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x4))) +void svsub_write_za32_u32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x4))) +void svsub_write_za32_s32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x2))) +void svsub_za32_u32_vg1x2(uint32_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x2))) +void svsub_za32_f32_vg1x2(uint32_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x2))) +void svsub_za32_s32_vg1x2(uint32_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x4))) +void svsub_za32_u32_vg1x4(uint32_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x4))) +void svsub_za32_f32_vg1x4(uint32_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x4))) +void svsub_za32_s32_vg1x4(uint32_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x2))) +void svsudot_single_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x4))) +void svsudot_single_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x2))) +void svsudot_lane_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x4))) +void svsudot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x2))) +void svsudot_za32_s8_vg1x2(uint32_t, svint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x4))) +void svsudot_za32_s8_vg1x4(uint32_t, svint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x2))) +void svsumla_single_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x4))) +void svsumla_single_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x1))) +void svsumla_lane_za32_s8_vg4x1(uint32_t, svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x2))) +void svsumla_lane_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x4))) +void svsumla_lane_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x1))) +void svsumla_za32_s8_vg4x1(uint32_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x2))) +void svsumla_za32_s8_vg4x2(uint32_t, svint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x4))) +void svsumla_za32_s8_vg4x4(uint32_t, svint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsuvdot_lane_za32_s8_vg1x4))) +void svsuvdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x2))) +void svusdot_single_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x4))) +void svusdot_single_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x2))) +void svusdot_lane_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x4))) +void svusdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x2))) +void svusdot_za32_u8_vg1x2(uint32_t, svuint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x4))) +void svusdot_za32_u8_vg1x4(uint32_t, svuint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x2))) +void svusmla_single_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x4))) +void svusmla_single_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x1))) +void svusmla_lane_za32_u8_vg4x1(uint32_t, svuint8_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x2))) +void svusmla_lane_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x4))) +void svusmla_lane_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x1))) +void svusmla_za32_u8_vg4x1(uint32_t, svuint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x2))) +void svusmla_za32_u8_vg4x2(uint32_t, svuint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x4))) +void svusmla_za32_u8_vg4x4(uint32_t, svuint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusvdot_lane_za32_u8_vg1x4))) +void svusvdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_bf16_vg1x2))) +void svvdot_lane_za32_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_f16_vg1x2))) +void svvdot_lane_za32_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s16_vg1x2))) +void svvdot_lane_za32_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u16_vg1x2))) +void svvdot_lane_za32_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s8_vg1x4))) +void svvdot_lane_za32_s8_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u8_vg1x4))) +void svvdot_lane_za32_u8_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg2))) +void svwrite_hor_za16_u16_vg2(uint64_t, uint32_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg2))) +void svwrite_hor_za16_bf16_vg2(uint64_t, uint32_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg2))) +void svwrite_hor_za16_f16_vg2(uint64_t, uint32_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg2))) +void svwrite_hor_za16_s16_vg2(uint64_t, uint32_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg4))) +void svwrite_hor_za16_u16_vg4(uint64_t, uint32_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg4))) +void svwrite_hor_za16_bf16_vg4(uint64_t, uint32_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg4))) +void svwrite_hor_za16_f16_vg4(uint64_t, uint32_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg4))) +void svwrite_hor_za16_s16_vg4(uint64_t, uint32_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg2))) +void svwrite_hor_za32_u32_vg2(uint64_t, uint32_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg2))) +void svwrite_hor_za32_f32_vg2(uint64_t, uint32_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg2))) +void svwrite_hor_za32_s32_vg2(uint64_t, uint32_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg4))) +void svwrite_hor_za32_u32_vg4(uint64_t, uint32_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg4))) +void svwrite_hor_za32_f32_vg4(uint64_t, uint32_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg4))) +void svwrite_hor_za32_s32_vg4(uint64_t, uint32_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg2))) +void svwrite_hor_za64_u64_vg2(uint64_t, uint32_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg2))) +void svwrite_hor_za64_f64_vg2(uint64_t, uint32_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg2))) +void svwrite_hor_za64_s64_vg2(uint64_t, uint32_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg4))) +void svwrite_hor_za64_u64_vg4(uint64_t, uint32_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg4))) +void svwrite_hor_za64_f64_vg4(uint64_t, uint32_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg4))) +void svwrite_hor_za64_s64_vg4(uint64_t, uint32_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg2))) +void svwrite_hor_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2))) +void svwrite_hor_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4))) +void svwrite_hor_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4))) +void svwrite_hor_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2))) +void svwrite_ver_za16_u16_vg2(uint64_t, uint32_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2))) +void svwrite_ver_za16_bf16_vg2(uint64_t, uint32_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg2))) +void svwrite_ver_za16_f16_vg2(uint64_t, uint32_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg2))) +void svwrite_ver_za16_s16_vg2(uint64_t, uint32_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg4))) +void svwrite_ver_za16_u16_vg4(uint64_t, uint32_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg4))) +void svwrite_ver_za16_bf16_vg4(uint64_t, uint32_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg4))) +void svwrite_ver_za16_f16_vg4(uint64_t, uint32_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg4))) +void svwrite_ver_za16_s16_vg4(uint64_t, uint32_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg2))) +void svwrite_ver_za32_u32_vg2(uint64_t, uint32_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg2))) +void svwrite_ver_za32_f32_vg2(uint64_t, uint32_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg2))) +void svwrite_ver_za32_s32_vg2(uint64_t, uint32_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg4))) +void svwrite_ver_za32_u32_vg4(uint64_t, uint32_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg4))) +void svwrite_ver_za32_f32_vg4(uint64_t, uint32_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg4))) +void svwrite_ver_za32_s32_vg4(uint64_t, uint32_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg2))) +void svwrite_ver_za64_u64_vg2(uint64_t, uint32_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg2))) +void svwrite_ver_za64_f64_vg2(uint64_t, uint32_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg2))) +void svwrite_ver_za64_s64_vg2(uint64_t, uint32_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg4))) +void svwrite_ver_za64_u64_vg4(uint64_t, uint32_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg4))) +void svwrite_ver_za64_f64_vg4(uint64_t, uint32_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg4))) +void svwrite_ver_za64_s64_vg4(uint64_t, uint32_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg2))) +void svwrite_ver_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2))) +void svwrite_ver_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4))) +void svwrite_ver_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4))) +void svwrite_ver_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2))) +void svwrite_za16_u16_vg1x2(uint32_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2))) +void svwrite_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x2))) +void svwrite_za16_f16_vg1x2(uint32_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x2))) +void svwrite_za16_s16_vg1x2(uint32_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x4))) +void svwrite_za16_u16_vg1x4(uint32_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x4))) +void svwrite_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x4))) +void svwrite_za16_f16_vg1x4(uint32_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x4))) +void svwrite_za16_s16_vg1x4(uint32_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x2))) +void svwrite_za32_u32_vg1x2(uint32_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x2))) +void svwrite_za32_f32_vg1x2(uint32_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x2))) +void svwrite_za32_s32_vg1x2(uint32_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x4))) +void svwrite_za32_u32_vg1x4(uint32_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x4))) +void svwrite_za32_f32_vg1x4(uint32_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x4))) +void svwrite_za32_s32_vg1x4(uint32_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x2))) +void svwrite_za64_u64_vg1x2(uint32_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x2))) +void svwrite_za64_f64_vg1x2(uint32_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x2))) +void svwrite_za64_s64_vg1x2(uint32_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x4))) +void svwrite_za64_u64_vg1x4(uint32_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x4))) +void svwrite_za64_f64_vg1x4(uint32_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x4))) +void svwrite_za64_s64_vg1x4(uint32_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x2))) +void svwrite_za8_u8_vg1x2(uint32_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2))) +void svwrite_za8_s8_vg1x2(uint32_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4))) +void svwrite_za8_u8_vg1x4(uint32_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4))) +void svwrite_za8_s8_vg1x4(uint32_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_zt))) +void svzero_zt(uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2))) +void svadd_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x2))) +void svadd_write_za32_vg1x2(uint32_t, svint32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x4))) +void svadd_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x4))) +void svadd_write_za32_vg1x4(uint32_t, svint32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x2))) +void svadd_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x2))) +void svadd_write_za32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_u32_vg1x4))) +void svadd_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za32_s32_vg1x4))) +void svadd_write_za32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x2))) +void svadd_za32_vg1x2(uint32_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x2))) +void svadd_za32_vg1x2(uint32_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x2))) +void svadd_za32_vg1x2(uint32_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_u32_vg1x4))) +void svadd_za32_vg1x4(uint32_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_f32_vg1x4))) +void svadd_za32_vg1x4(uint32_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za32_s32_vg1x4))) +void svadd_za32_vg1x4(uint32_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_u32_m))) +void svbmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmopa_za32_s32_m))) +void svbmopa_za32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_u32_m))) +void svbmops_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svbmops_za32_s32_m))) +void svbmops_za32_m(uint64_t, svbool_t, svbool_t, svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_bf16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_f16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s8_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_s16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u8_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_u16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x2))) +void svdot_lane_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x2))) +void svdot_lane_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x2))) +void svdot_lane_za32_vg1x2(uint32_t, svint8x2_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x2))) +void svdot_lane_za32_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x2))) +void svdot_lane_za32_vg1x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x2))) +void svdot_lane_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_bf16_vg1x4))) +void svdot_lane_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_f16_vg1x4))) +void svdot_lane_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s8_vg1x4))) +void svdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_s16_vg1x4))) +void svdot_lane_za32_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u8_vg1x4))) +void svdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_u16_vg1x4))) +void svdot_lane_za32_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x2))) +void svdot_za32_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_bf16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_f16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s8_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_s16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u8_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svuint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_u16_vg1x4))) +void svdot_za32_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x2))) +void svmla_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f32_vg1x4))) +void svmla_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_bf16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_f16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x2))) +void svmla_za32_vg4x2(uint32_t, svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x2))) +void svmla_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_s8_vg4x4))) +void svmla_za32_vg4x4(uint32_t, svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_u8_vg4x4))) +void svmla_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x2))) +void svmla_lane_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f32_vg1x4))) +void svmla_lane_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x1))) +void svmla_lane_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x1))) +void svmla_lane_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x1))) +void svmla_lane_za32_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x1))) +void svmla_lane_za32_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x2))) +void svmla_lane_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x2))) +void svmla_lane_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x2))) +void svmla_lane_za32_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x2))) +void svmla_lane_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_bf16_vg2x4))) +void svmla_lane_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_f16_vg2x4))) +void svmla_lane_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s16_vg2x4))) +void svmla_lane_za32_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u16_vg2x4))) +void svmla_lane_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x1))) +void svmla_lane_za32_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x1))) +void svmla_lane_za32_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x2))) +void svmla_lane_za32_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x2))) +void svmla_lane_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_s8_vg4x4))) +void svmla_lane_za32_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_u8_vg4x4))) +void svmla_lane_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x2))) +void svmla_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f32_vg1x4))) +void svmla_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x1))) +void svmla_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x1))) +void svmla_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x1))) +void svmla_za32_vg2x1(uint32_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x1))) +void svmla_za32_vg2x1(uint32_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x2))) +void svmla_za32_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_bf16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_f16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u16_vg2x4))) +void svmla_za32_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x1))) +void svmla_za32_vg4x1(uint32_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x1))) +void svmla_za32_vg4x1(uint32_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x2))) +void svmla_za32_vg4x2(uint32_t, svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x2))) +void svmla_za32_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_s8_vg4x4))) +void svmla_za32_vg4x4(uint32_t, svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_u8_vg4x4))) +void svmla_za32_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x2))) +void svmls_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f32_vg1x4))) +void svmls_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_bf16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_f16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x2))) +void svmls_za32_vg4x2(uint32_t, svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x2))) +void svmls_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_s8_vg4x4))) +void svmls_za32_vg4x4(uint32_t, svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za32_u8_vg4x4))) +void svmls_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x2))) +void svmls_lane_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f32_vg1x4))) +void svmls_lane_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x1))) +void svmls_lane_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x1))) +void svmls_lane_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x1))) +void svmls_lane_za32_vg2x1(uint32_t, svint16_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x1))) +void svmls_lane_za32_vg2x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x2))) +void svmls_lane_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x2))) +void svmls_lane_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x2))) +void svmls_lane_za32_vg2x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x2))) +void svmls_lane_za32_vg2x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_bf16_vg2x4))) +void svmls_lane_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_f16_vg2x4))) +void svmls_lane_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s16_vg2x4))) +void svmls_lane_za32_vg2x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u16_vg2x4))) +void svmls_lane_za32_vg2x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x1))) +void svmls_lane_za32_vg4x1(uint32_t, svint8_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x1))) +void svmls_lane_za32_vg4x1(uint32_t, svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x2))) +void svmls_lane_za32_vg4x2(uint32_t, svint8x2_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x2))) +void svmls_lane_za32_vg4x2(uint32_t, svuint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_s8_vg4x4))) +void svmls_lane_za32_vg4x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za32_u8_vg4x4))) +void svmls_lane_za32_vg4x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x2))) +void svmls_za32_vg1x2(uint32_t, svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f32_vg1x4))) +void svmls_za32_vg1x4(uint32_t, svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x1))) +void svmls_za32_vg2x1(uint32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x1))) +void svmls_za32_vg2x1(uint32_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x1))) +void svmls_za32_vg2x1(uint32_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x1))) +void svmls_za32_vg2x1(uint32_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x2))) +void svmls_za32_vg2x2(uint32_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_bf16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_f16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u16_vg2x4))) +void svmls_za32_vg2x4(uint32_t, svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x1))) +void svmls_za32_vg4x1(uint32_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x1))) +void svmls_za32_vg4x1(uint32_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x2))) +void svmls_za32_vg4x2(uint32_t, svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x2))) +void svmls_za32_vg4x2(uint32_t, svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_s8_vg4x4))) +void svmls_za32_vg4x4(uint32_t, svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za32_u8_vg4x4))) +void svmls_za32_vg4x4(uint32_t, svuint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s16_m))) +void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u16_m))) +void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s16_m))) +void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u16_m))) +void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x2))) +void svsub_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x2))) +void svsub_write_za32_vg1x2(uint32_t, svint32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x4))) +void svsub_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_s32_vg1x4))) +void svsub_write_za32_vg1x4(uint32_t, svint32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x2))) +void svsub_write_za32_vg1x2(uint32_t, svuint32x2_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x2))) +void svsub_write_za32_vg1x2(uint32_t, svint32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_u32_vg1x4))) +void svsub_write_za32_vg1x4(uint32_t, svuint32x4_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za32_s32_vg1x4))) +void svsub_write_za32_vg1x4(uint32_t, svint32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x2))) +void svsub_za32_vg1x2(uint32_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x2))) +void svsub_za32_vg1x2(uint32_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x2))) +void svsub_za32_vg1x2(uint32_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_u32_vg1x4))) +void svsub_za32_vg1x4(uint32_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_f32_vg1x4))) +void svsub_za32_vg1x4(uint32_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za32_s32_vg1x4))) +void svsub_za32_vg1x4(uint32_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x2))) +void svsudot_za32_vg1x2(uint32_t, svint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_single_za32_s8_vg1x4))) +void svsudot_za32_vg1x4(uint32_t, svint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x2))) +void svsudot_lane_za32_vg1x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_lane_za32_s8_vg1x4))) +void svsudot_lane_za32_vg1x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x2))) +void svsudot_za32_vg1x2(uint32_t, svint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsudot_za32_s8_vg1x4))) +void svsudot_za32_vg1x4(uint32_t, svint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x2))) +void svsumla_za32_vg4x2(uint32_t, svint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_single_za32_s8_vg4x4))) +void svsumla_za32_vg4x4(uint32_t, svint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x1))) +void svsumla_lane_za32_vg4x1(uint32_t, svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x2))) +void svsumla_lane_za32_vg4x2(uint32_t, svint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_lane_za32_s8_vg4x4))) +void svsumla_lane_za32_vg4x4(uint32_t, svint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x1))) +void svsumla_za32_vg4x1(uint32_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x2))) +void svsumla_za32_vg4x2(uint32_t, svint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumla_za32_s8_vg4x4))) +void svsumla_za32_vg4x4(uint32_t, svint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsuvdot_lane_za32_s8_vg1x4))) +void svsuvdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x2))) +void svusdot_za32_vg1x2(uint32_t, svuint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_single_za32_u8_vg1x4))) +void svusdot_za32_vg1x4(uint32_t, svuint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x2))) +void svusdot_lane_za32_vg1x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_lane_za32_u8_vg1x4))) +void svusdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x2))) +void svusdot_za32_vg1x2(uint32_t, svuint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusdot_za32_u8_vg1x4))) +void svusdot_za32_vg1x4(uint32_t, svuint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x2))) +void svusmla_za32_vg4x2(uint32_t, svuint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_single_za32_u8_vg4x4))) +void svusmla_za32_vg4x4(uint32_t, svuint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x1))) +void svusmla_lane_za32_vg4x1(uint32_t, svuint8_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x2))) +void svusmla_lane_za32_vg4x2(uint32_t, svuint8x2_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_lane_za32_u8_vg4x4))) +void svusmla_lane_za32_vg4x4(uint32_t, svuint8x4_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x1))) +void svusmla_za32_vg4x1(uint32_t, svuint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x2))) +void svusmla_za32_vg4x2(uint32_t, svuint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmla_za32_u8_vg4x4))) +void svusmla_za32_vg4x4(uint32_t, svuint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusvdot_lane_za32_u8_vg1x4))) +void svusvdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_bf16_vg1x2))) +void svvdot_lane_za32_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_f16_vg1x2))) +void svvdot_lane_za32_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s16_vg1x2))) +void svvdot_lane_za32_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u16_vg1x2))) +void svvdot_lane_za32_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_s8_vg1x4))) +void svvdot_lane_za32_vg1x4(uint32_t, svint8x4_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za32_u8_vg1x4))) +void svvdot_lane_za32_vg1x4(uint32_t, svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg2))) +void svwrite_hor_za16_vg2(uint64_t, uint32_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg2))) +void svwrite_hor_za16_vg2(uint64_t, uint32_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg2))) +void svwrite_hor_za16_vg2(uint64_t, uint32_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg2))) +void svwrite_hor_za16_vg2(uint64_t, uint32_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_vg4))) +void svwrite_hor_za16_vg4(uint64_t, uint32_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_vg4))) +void svwrite_hor_za16_vg4(uint64_t, uint32_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_vg4))) +void svwrite_hor_za16_vg4(uint64_t, uint32_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_vg4))) +void svwrite_hor_za16_vg4(uint64_t, uint32_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg2))) +void svwrite_hor_za32_vg2(uint64_t, uint32_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg2))) +void svwrite_hor_za32_vg2(uint64_t, uint32_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg2))) +void svwrite_hor_za32_vg2(uint64_t, uint32_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_vg4))) +void svwrite_hor_za32_vg4(uint64_t, uint32_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_vg4))) +void svwrite_hor_za32_vg4(uint64_t, uint32_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_vg4))) +void svwrite_hor_za32_vg4(uint64_t, uint32_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg2))) +void svwrite_hor_za64_vg2(uint64_t, uint32_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg2))) +void svwrite_hor_za64_vg2(uint64_t, uint32_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg2))) +void svwrite_hor_za64_vg2(uint64_t, uint32_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_vg4))) +void svwrite_hor_za64_vg4(uint64_t, uint32_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_vg4))) +void svwrite_hor_za64_vg4(uint64_t, uint32_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_vg4))) +void svwrite_hor_za64_vg4(uint64_t, uint32_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg2))) +void svwrite_hor_za8_vg2(uint64_t, uint32_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2))) +void svwrite_hor_za8_vg2(uint64_t, uint32_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4))) +void svwrite_hor_za8_vg4(uint64_t, uint32_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4))) +void svwrite_hor_za8_vg4(uint64_t, uint32_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2))) +void svwrite_ver_za16_vg2(uint64_t, uint32_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2))) +void svwrite_ver_za16_vg2(uint64_t, uint32_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg2))) +void svwrite_ver_za16_vg2(uint64_t, uint32_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg2))) +void svwrite_ver_za16_vg2(uint64_t, uint32_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg4))) +void svwrite_ver_za16_vg4(uint64_t, uint32_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg4))) +void svwrite_ver_za16_vg4(uint64_t, uint32_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_vg4))) +void svwrite_ver_za16_vg4(uint64_t, uint32_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_vg4))) +void svwrite_ver_za16_vg4(uint64_t, uint32_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg2))) +void svwrite_ver_za32_vg2(uint64_t, uint32_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg2))) +void svwrite_ver_za32_vg2(uint64_t, uint32_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg2))) +void svwrite_ver_za32_vg2(uint64_t, uint32_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_vg4))) +void svwrite_ver_za32_vg4(uint64_t, uint32_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_vg4))) +void svwrite_ver_za32_vg4(uint64_t, uint32_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_vg4))) +void svwrite_ver_za32_vg4(uint64_t, uint32_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg2))) +void svwrite_ver_za64_vg2(uint64_t, uint32_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg2))) +void svwrite_ver_za64_vg2(uint64_t, uint32_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg2))) +void svwrite_ver_za64_vg2(uint64_t, uint32_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_vg4))) +void svwrite_ver_za64_vg4(uint64_t, uint32_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_vg4))) +void svwrite_ver_za64_vg4(uint64_t, uint32_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_vg4))) +void svwrite_ver_za64_vg4(uint64_t, uint32_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg2))) +void svwrite_ver_za8_vg2(uint64_t, uint32_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2))) +void svwrite_ver_za8_vg2(uint64_t, uint32_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4))) +void svwrite_ver_za8_vg4(uint64_t, uint32_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4))) +void svwrite_ver_za8_vg4(uint64_t, uint32_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2))) +void svwrite_za16_vg1x2(uint32_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2))) +void svwrite_za16_vg1x2(uint32_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x2))) +void svwrite_za16_vg1x2(uint32_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x2))) +void svwrite_za16_vg1x2(uint32_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x4))) +void svwrite_za16_vg1x4(uint32_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x4))) +void svwrite_za16_vg1x4(uint32_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_f16_vg1x4))) +void svwrite_za16_vg1x4(uint32_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_s16_vg1x4))) +void svwrite_za16_vg1x4(uint32_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x2))) +void svwrite_za32_vg1x2(uint32_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x2))) +void svwrite_za32_vg1x2(uint32_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x2))) +void svwrite_za32_vg1x2(uint32_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_u32_vg1x4))) +void svwrite_za32_vg1x4(uint32_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_f32_vg1x4))) +void svwrite_za32_vg1x4(uint32_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za32_s32_vg1x4))) +void svwrite_za32_vg1x4(uint32_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x2))) +void svwrite_za64_vg1x2(uint32_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x2))) +void svwrite_za64_vg1x2(uint32_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x2))) +void svwrite_za64_vg1x2(uint32_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_u64_vg1x4))) +void svwrite_za64_vg1x4(uint32_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_f64_vg1x4))) +void svwrite_za64_vg1x4(uint32_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za64_s64_vg1x4))) +void svwrite_za64_vg1x4(uint32_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x2))) +void svwrite_za8_vg1x2(uint32_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2))) +void svwrite_za8_vg1x2(uint32_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4))) +void svwrite_za8_vg1x4(uint32_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4))) +void svwrite_za8_vg1x4(uint32_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2))) +void svadd_za64_f64_vg1x2(uint32_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4))) +void svadd_za64_f64_vg1x4(uint32_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x2))) +void svmla_single_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x4))) +void svmla_single_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x2))) +void svmla_lane_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x4))) +void svmla_lane_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x2))) +void svmla_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x4))) +void svmla_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x2))) +void svmls_single_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x4))) +void svmls_single_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x2))) +void svmls_lane_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x4))) +void svmls_lane_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x2))) +void svmls_za64_f64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x4))) +void svmls_za64_f64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x2))) +void svsub_za64_f64_vg1x2(uint32_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x4))) +void svsub_za64_f64_vg1x4(uint32_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2))) +void svadd_za64_vg1x2(uint32_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4))) +void svadd_za64_vg1x4(uint32_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x2))) +void svmla_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_f64_vg1x4))) +void svmla_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x2))) +void svmla_lane_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_f64_vg1x4))) +void svmla_lane_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x2))) +void svmla_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_f64_vg1x4))) +void svmla_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x2))) +void svmls_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_f64_vg1x4))) +void svmls_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x2))) +void svmls_lane_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_f64_vg1x4))) +void svmls_lane_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x2))) +void svmls_za64_vg1x2(uint32_t, svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_f64_vg1x4))) +void svmls_za64_vg1x4(uint32_t, svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x2))) +void svsub_za64_vg1x2(uint32_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_f64_vg1x4))) +void svsub_za64_vg1x4(uint32_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x2))) +void svadd_write_single_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x2))) +void svadd_write_single_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x4))) +void svadd_write_single_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x4))) +void svadd_write_single_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x2))) +void svadd_write_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x2))) +void svadd_write_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x4))) +void svadd_write_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x4))) +void svadd_write_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x2))) +void svadd_za64_u64_vg1x2(uint32_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x2))) +void svadd_za64_s64_vg1x2(uint32_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x4))) +void svadd_za64_u64_vg1x4(uint32_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x4))) +void svadd_za64_s64_vg1x4(uint32_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x2))) +void svdot_single_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x2))) +void svdot_single_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x4))) +void svdot_single_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x4))) +void svdot_single_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x2))) +void svdot_lane_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x2))) +void svdot_lane_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x4))) +void svdot_lane_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x4))) +void svdot_lane_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x2))) +void svdot_za64_s16_vg1x2(uint32_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x2))) +void svdot_za64_u16_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x4))) +void svdot_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x4))) +void svdot_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x2))) +void svmla_single_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x2))) +void svmla_single_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x4))) +void svmla_single_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x4))) +void svmla_single_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x1))) +void svmla_lane_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x1))) +void svmla_lane_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x2))) +void svmla_lane_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x2))) +void svmla_lane_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x4))) +void svmla_lane_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x4))) +void svmla_lane_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x1))) +void svmla_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x1))) +void svmla_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x2))) +void svmla_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x2))) +void svmla_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x4))) +void svmla_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x4))) +void svmla_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x2))) +void svmls_single_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x2))) +void svmls_single_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x4))) +void svmls_single_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x4))) +void svmls_single_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x1))) +void svmls_lane_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x1))) +void svmls_lane_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x2))) +void svmls_lane_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x2))) +void svmls_lane_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x4))) +void svmls_lane_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x4))) +void svmls_lane_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x1))) +void svmls_za64_s16_vg4x1(uint32_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x1))) +void svmls_za64_u16_vg4x1(uint32_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x2))) +void svmls_za64_s16_vg4x2(uint32_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x2))) +void svmls_za64_u16_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x4))) +void svmls_za64_s16_vg4x4(uint32_t, svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x4))) +void svmls_za64_u16_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x2))) +void svsub_write_single_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x2))) +void svsub_write_single_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x4))) +void svsub_write_single_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x4))) +void svsub_write_single_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x2))) +void svsub_write_za64_u64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x2))) +void svsub_write_za64_s64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x4))) +void svsub_write_za64_u64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x4))) +void svsub_write_za64_s64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x2))) +void svsub_za64_u64_vg1x2(uint32_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x2))) +void svsub_za64_s64_vg1x2(uint32_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x4))) +void svsub_za64_u64_vg1x4(uint32_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x4))) +void svsub_za64_s64_vg1x4(uint32_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s16_vg1x4))) +void svvdot_lane_za64_s16_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4))) +void svvdot_lane_za64_u16_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x2))) +void svadd_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x2))) +void svadd_write_za64_vg1x2(uint32_t, svint64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_u64_vg1x4))) +void svadd_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za64_s64_vg1x4))) +void svadd_write_za64_vg1x4(uint32_t, svint64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x2))) +void svadd_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x2))) +void svadd_write_za64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_u64_vg1x4))) +void svadd_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_za64_s64_vg1x4))) +void svadd_write_za64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x2))) +void svadd_za64_vg1x2(uint32_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x2))) +void svadd_za64_vg1x2(uint32_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_u64_vg1x4))) +void svadd_za64_vg1x4(uint32_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_s64_vg1x4))) +void svadd_za64_vg1x4(uint32_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x2))) +void svdot_za64_vg1x2(uint32_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x2))) +void svdot_za64_vg1x2(uint32_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_s16_vg1x4))) +void svdot_za64_vg1x4(uint32_t, svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za64_u16_vg1x4))) +void svdot_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x2))) +void svdot_lane_za64_vg1x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x2))) +void svdot_lane_za64_vg1x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_s16_vg1x4))) +void svdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za64_u16_vg1x4))) +void svdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x2))) +void svdot_za64_vg1x2(uint32_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x2))) +void svdot_za64_vg1x2(uint32_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_s16_vg1x4))) +void svdot_za64_vg1x4(uint32_t, svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za64_u16_vg1x4))) +void svdot_za64_vg1x4(uint32_t, svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x2))) +void svmla_za64_vg4x2(uint32_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x2))) +void svmla_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_s16_vg4x4))) +void svmla_za64_vg4x4(uint32_t, svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za64_u16_vg4x4))) +void svmla_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x1))) +void svmla_lane_za64_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x1))) +void svmla_lane_za64_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x2))) +void svmla_lane_za64_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x2))) +void svmla_lane_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_s16_vg4x4))) +void svmla_lane_za64_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za64_u16_vg4x4))) +void svmla_lane_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x1))) +void svmla_za64_vg4x1(uint32_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x1))) +void svmla_za64_vg4x1(uint32_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x2))) +void svmla_za64_vg4x2(uint32_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x2))) +void svmla_za64_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_s16_vg4x4))) +void svmla_za64_vg4x4(uint32_t, svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za64_u16_vg4x4))) +void svmla_za64_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x2))) +void svmls_za64_vg4x2(uint32_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x2))) +void svmls_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_s16_vg4x4))) +void svmls_za64_vg4x4(uint32_t, svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za64_u16_vg4x4))) +void svmls_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x1))) +void svmls_lane_za64_vg4x1(uint32_t, svint16_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x1))) +void svmls_lane_za64_vg4x1(uint32_t, svuint16_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x2))) +void svmls_lane_za64_vg4x2(uint32_t, svint16x2_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x2))) +void svmls_lane_za64_vg4x2(uint32_t, svuint16x2_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_s16_vg4x4))) +void svmls_lane_za64_vg4x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za64_u16_vg4x4))) +void svmls_lane_za64_vg4x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x1))) +void svmls_za64_vg4x1(uint32_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x1))) +void svmls_za64_vg4x1(uint32_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x2))) +void svmls_za64_vg4x2(uint32_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x2))) +void svmls_za64_vg4x2(uint32_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_s16_vg4x4))) +void svmls_za64_vg4x4(uint32_t, svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za64_u16_vg4x4))) +void svmls_za64_vg4x4(uint32_t, svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x2))) +void svsub_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x2))) +void svsub_write_za64_vg1x2(uint32_t, svint64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_u64_vg1x4))) +void svsub_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za64_s64_vg1x4))) +void svsub_write_za64_vg1x4(uint32_t, svint64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x2))) +void svsub_write_za64_vg1x2(uint32_t, svuint64x2_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x2))) +void svsub_write_za64_vg1x2(uint32_t, svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_u64_vg1x4))) +void svsub_write_za64_vg1x4(uint32_t, svuint64x4_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_za64_s64_vg1x4))) +void svsub_write_za64_vg1x4(uint32_t, svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x2))) +void svsub_za64_vg1x2(uint32_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x2))) +void svsub_za64_vg1x2(uint32_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_u64_vg1x4))) +void svsub_za64_vg1x4(uint32_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za64_s64_vg1x4))) +void svsub_za64_vg1x4(uint32_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s16_vg1x4))) +void svvdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4))) +void svvdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +#ifdef __cplusplus +} // extern "C" +#endif + +#undef __ai + +#endif /* __ARM_SME_H */ diff --git a/lib/include/arm_sme_draft_spec_subject_to_change.h b/lib/include/arm_sme_draft_spec_subject_to_change.h deleted file mode 100644 index fd36d9796f..0000000000 --- a/lib/include/arm_sme_draft_spec_subject_to_change.h +++ /dev/null @@ -1,642 +0,0 @@ -/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics ------=== - * - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ - -#ifndef __ARM_SME_H -#define __ARM_SME_H - -#if !defined(__LITTLE_ENDIAN__) -#error "Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h" -#endif -#include - -/* Function attributes */ -#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) - -#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__)) - -#ifdef __cplusplus -extern "C" { -#endif - -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za)) -void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za)) -void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za)) -void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za)) -void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb), arm_streaming_compatible, arm_preserves_za)) -uint64_t svcntsb(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd), arm_streaming_compatible, arm_preserves_za)) -uint64_t svcntsd(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh), arm_streaming_compatible, arm_preserves_za)) -uint64_t svcntsh(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw), arm_streaming_compatible, arm_preserves_za)) -uint64_t svcntsw(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128), arm_streaming, arm_shared_za)) -void svld1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16), arm_streaming, arm_shared_za)) -void svld1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32), arm_streaming, arm_shared_za)) -void svld1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64), arm_streaming, arm_shared_za)) -void svld1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8), arm_streaming, arm_shared_za)) -void svld1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128), arm_streaming, arm_shared_za)) -void svld1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16), arm_streaming, arm_shared_za)) -void svld1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32), arm_streaming, arm_shared_za)) -void svld1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64), arm_streaming, arm_shared_za)) -void svld1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8), arm_streaming, arm_shared_za)) -void svld1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128), arm_streaming, arm_shared_za)) -void svld1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16), arm_streaming, arm_shared_za)) -void svld1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32), arm_streaming, arm_shared_za)) -void svld1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64), arm_streaming, arm_shared_za)) -void svld1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8), arm_streaming, arm_shared_za)) -void svld1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128), arm_streaming, arm_shared_za)) -void svld1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16), arm_streaming, arm_shared_za)) -void svld1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32), arm_streaming, arm_shared_za)) -void svld1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64), arm_streaming, arm_shared_za)) -void svld1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8), arm_streaming, arm_shared_za)) -void svld1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za)) -void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za)) -void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za)) -void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za)) -void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za)) -void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za)) -void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za)) -void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za)) -void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za)) -void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za)) -void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8), arm_streaming, arm_shared_za, arm_preserves_za)) -void svst1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za)) -void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za)) -void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za)) -void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za)) -void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za), arm_streaming_compatible, arm_shared_za)) -void svzero_mask_za(uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za), arm_streaming_compatible, arm_shared_za)) -void svzero_za(); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za)) -void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za)) -void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za)) -void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za)) -void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za)) -void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za)) -void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za)) -void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za)) -void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za)) -void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za)) -void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za)) -void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za)) -void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za)) -void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za)) -void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) -svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za)) -void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za)) -void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za)) -void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za)) -void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za)) -void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za)) -void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za)) -void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za)) -void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za)) -void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za)) -void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za)) -void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za)) -void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za)) -void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za)) -void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za)) -void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za)) -void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za)) -void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za)) -void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za)) -void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za)) -void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za)) -void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za)) -void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za)) -void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za)) -void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za)) -void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za)) -void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za)) -void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za)) -void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za)) -void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za)) -void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za)) -void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za)) -void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za)) -void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za)) -void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za), arm_streaming_compatible, arm_shared_za)) -void svldr_vnum_za(uint32_t, uint64_t, void const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za), arm_streaming_compatible, arm_shared_za, arm_preserves_za)) -void svstr_vnum_za(uint32_t, uint64_t, void *); -#ifdef __cplusplus -} // extern "C" -#endif - -#undef __ai - -#endif /* __ARM_SME_H */ diff --git a/lib/include/arm_sve.h b/lib/include/arm_sve.h index 643584387b..3990f803c5 100644 --- a/lib/include/arm_sve.h +++ b/lib/include/arm_sve.h @@ -35,8 +35,9 @@ typedef __SVUint32_t svuint32_t; typedef __SVUint64_t svuint64_t; typedef __SVFloat16_t svfloat16_t; -typedef __SVBFloat16_t svbfloat16_t; +typedef __SVBfloat16_t svbfloat16_t; #include +#include typedef __SVFloat32_t svfloat32_t; typedef __SVFloat64_t svfloat64_t; typedef __clang_svint8x2_t svint8x2_t; @@ -124,725 +125,4657 @@ enum svprfop #define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__)) #define svreinterpret_s8_s8(...) __builtin_sve_reinterpret_s8_s8(__VA_ARGS__) -#define svreinterpret_s8_s16(...) __builtin_sve_reinterpret_s8_s16(__VA_ARGS__) -#define svreinterpret_s8_s32(...) __builtin_sve_reinterpret_s8_s32(__VA_ARGS__) -#define svreinterpret_s8_s64(...) __builtin_sve_reinterpret_s8_s64(__VA_ARGS__) #define svreinterpret_s8_u8(...) __builtin_sve_reinterpret_s8_u8(__VA_ARGS__) +#define svreinterpret_s8_s16(...) __builtin_sve_reinterpret_s8_s16(__VA_ARGS__) #define svreinterpret_s8_u16(...) __builtin_sve_reinterpret_s8_u16(__VA_ARGS__) +#define svreinterpret_s8_s32(...) __builtin_sve_reinterpret_s8_s32(__VA_ARGS__) #define svreinterpret_s8_u32(...) __builtin_sve_reinterpret_s8_u32(__VA_ARGS__) +#define svreinterpret_s8_s64(...) __builtin_sve_reinterpret_s8_s64(__VA_ARGS__) #define svreinterpret_s8_u64(...) __builtin_sve_reinterpret_s8_u64(__VA_ARGS__) #define svreinterpret_s8_f16(...) __builtin_sve_reinterpret_s8_f16(__VA_ARGS__) #define svreinterpret_s8_bf16(...) __builtin_sve_reinterpret_s8_bf16(__VA_ARGS__) #define svreinterpret_s8_f32(...) __builtin_sve_reinterpret_s8_f32(__VA_ARGS__) #define svreinterpret_s8_f64(...) __builtin_sve_reinterpret_s8_f64(__VA_ARGS__) -#define svreinterpret_s16_s8(...) __builtin_sve_reinterpret_s16_s8(__VA_ARGS__) -#define svreinterpret_s16_s16(...) __builtin_sve_reinterpret_s16_s16(__VA_ARGS__) -#define svreinterpret_s16_s32(...) __builtin_sve_reinterpret_s16_s32(__VA_ARGS__) -#define svreinterpret_s16_s64(...) __builtin_sve_reinterpret_s16_s64(__VA_ARGS__) -#define svreinterpret_s16_u8(...) __builtin_sve_reinterpret_s16_u8(__VA_ARGS__) -#define svreinterpret_s16_u16(...) __builtin_sve_reinterpret_s16_u16(__VA_ARGS__) -#define svreinterpret_s16_u32(...) __builtin_sve_reinterpret_s16_u32(__VA_ARGS__) -#define svreinterpret_s16_u64(...) __builtin_sve_reinterpret_s16_u64(__VA_ARGS__) -#define svreinterpret_s16_f16(...) __builtin_sve_reinterpret_s16_f16(__VA_ARGS__) -#define svreinterpret_s16_bf16(...) __builtin_sve_reinterpret_s16_bf16(__VA_ARGS__) -#define svreinterpret_s16_f32(...) __builtin_sve_reinterpret_s16_f32(__VA_ARGS__) -#define svreinterpret_s16_f64(...) __builtin_sve_reinterpret_s16_f64(__VA_ARGS__) -#define svreinterpret_s32_s8(...) __builtin_sve_reinterpret_s32_s8(__VA_ARGS__) -#define svreinterpret_s32_s16(...) __builtin_sve_reinterpret_s32_s16(__VA_ARGS__) -#define svreinterpret_s32_s32(...) __builtin_sve_reinterpret_s32_s32(__VA_ARGS__) -#define svreinterpret_s32_s64(...) __builtin_sve_reinterpret_s32_s64(__VA_ARGS__) -#define svreinterpret_s32_u8(...) __builtin_sve_reinterpret_s32_u8(__VA_ARGS__) -#define svreinterpret_s32_u16(...) __builtin_sve_reinterpret_s32_u16(__VA_ARGS__) -#define svreinterpret_s32_u32(...) __builtin_sve_reinterpret_s32_u32(__VA_ARGS__) -#define svreinterpret_s32_u64(...) __builtin_sve_reinterpret_s32_u64(__VA_ARGS__) -#define svreinterpret_s32_f16(...) __builtin_sve_reinterpret_s32_f16(__VA_ARGS__) -#define svreinterpret_s32_bf16(...) __builtin_sve_reinterpret_s32_bf16(__VA_ARGS__) -#define svreinterpret_s32_f32(...) __builtin_sve_reinterpret_s32_f32(__VA_ARGS__) -#define svreinterpret_s32_f64(...) __builtin_sve_reinterpret_s32_f64(__VA_ARGS__) -#define svreinterpret_s64_s8(...) __builtin_sve_reinterpret_s64_s8(__VA_ARGS__) -#define svreinterpret_s64_s16(...) __builtin_sve_reinterpret_s64_s16(__VA_ARGS__) -#define svreinterpret_s64_s32(...) __builtin_sve_reinterpret_s64_s32(__VA_ARGS__) -#define svreinterpret_s64_s64(...) __builtin_sve_reinterpret_s64_s64(__VA_ARGS__) -#define svreinterpret_s64_u8(...) __builtin_sve_reinterpret_s64_u8(__VA_ARGS__) -#define svreinterpret_s64_u16(...) __builtin_sve_reinterpret_s64_u16(__VA_ARGS__) -#define svreinterpret_s64_u32(...) __builtin_sve_reinterpret_s64_u32(__VA_ARGS__) -#define svreinterpret_s64_u64(...) __builtin_sve_reinterpret_s64_u64(__VA_ARGS__) -#define svreinterpret_s64_f16(...) __builtin_sve_reinterpret_s64_f16(__VA_ARGS__) -#define svreinterpret_s64_bf16(...) __builtin_sve_reinterpret_s64_bf16(__VA_ARGS__) -#define svreinterpret_s64_f32(...) __builtin_sve_reinterpret_s64_f32(__VA_ARGS__) -#define svreinterpret_s64_f64(...) __builtin_sve_reinterpret_s64_f64(__VA_ARGS__) #define svreinterpret_u8_s8(...) __builtin_sve_reinterpret_u8_s8(__VA_ARGS__) -#define svreinterpret_u8_s16(...) __builtin_sve_reinterpret_u8_s16(__VA_ARGS__) -#define svreinterpret_u8_s32(...) __builtin_sve_reinterpret_u8_s32(__VA_ARGS__) -#define svreinterpret_u8_s64(...) __builtin_sve_reinterpret_u8_s64(__VA_ARGS__) #define svreinterpret_u8_u8(...) __builtin_sve_reinterpret_u8_u8(__VA_ARGS__) +#define svreinterpret_u8_s16(...) __builtin_sve_reinterpret_u8_s16(__VA_ARGS__) #define svreinterpret_u8_u16(...) __builtin_sve_reinterpret_u8_u16(__VA_ARGS__) +#define svreinterpret_u8_s32(...) __builtin_sve_reinterpret_u8_s32(__VA_ARGS__) #define svreinterpret_u8_u32(...) __builtin_sve_reinterpret_u8_u32(__VA_ARGS__) +#define svreinterpret_u8_s64(...) __builtin_sve_reinterpret_u8_s64(__VA_ARGS__) #define svreinterpret_u8_u64(...) __builtin_sve_reinterpret_u8_u64(__VA_ARGS__) #define svreinterpret_u8_f16(...) __builtin_sve_reinterpret_u8_f16(__VA_ARGS__) #define svreinterpret_u8_bf16(...) __builtin_sve_reinterpret_u8_bf16(__VA_ARGS__) #define svreinterpret_u8_f32(...) __builtin_sve_reinterpret_u8_f32(__VA_ARGS__) #define svreinterpret_u8_f64(...) __builtin_sve_reinterpret_u8_f64(__VA_ARGS__) +#define svreinterpret_s16_s8(...) __builtin_sve_reinterpret_s16_s8(__VA_ARGS__) +#define svreinterpret_s16_u8(...) __builtin_sve_reinterpret_s16_u8(__VA_ARGS__) +#define svreinterpret_s16_s16(...) __builtin_sve_reinterpret_s16_s16(__VA_ARGS__) +#define svreinterpret_s16_u16(...) __builtin_sve_reinterpret_s16_u16(__VA_ARGS__) +#define svreinterpret_s16_s32(...) __builtin_sve_reinterpret_s16_s32(__VA_ARGS__) +#define svreinterpret_s16_u32(...) __builtin_sve_reinterpret_s16_u32(__VA_ARGS__) +#define svreinterpret_s16_s64(...) __builtin_sve_reinterpret_s16_s64(__VA_ARGS__) +#define svreinterpret_s16_u64(...) __builtin_sve_reinterpret_s16_u64(__VA_ARGS__) +#define svreinterpret_s16_f16(...) __builtin_sve_reinterpret_s16_f16(__VA_ARGS__) +#define svreinterpret_s16_bf16(...) __builtin_sve_reinterpret_s16_bf16(__VA_ARGS__) +#define svreinterpret_s16_f32(...) __builtin_sve_reinterpret_s16_f32(__VA_ARGS__) +#define svreinterpret_s16_f64(...) __builtin_sve_reinterpret_s16_f64(__VA_ARGS__) #define svreinterpret_u16_s8(...) __builtin_sve_reinterpret_u16_s8(__VA_ARGS__) -#define svreinterpret_u16_s16(...) __builtin_sve_reinterpret_u16_s16(__VA_ARGS__) -#define svreinterpret_u16_s32(...) __builtin_sve_reinterpret_u16_s32(__VA_ARGS__) -#define svreinterpret_u16_s64(...) __builtin_sve_reinterpret_u16_s64(__VA_ARGS__) #define svreinterpret_u16_u8(...) __builtin_sve_reinterpret_u16_u8(__VA_ARGS__) +#define svreinterpret_u16_s16(...) __builtin_sve_reinterpret_u16_s16(__VA_ARGS__) #define svreinterpret_u16_u16(...) __builtin_sve_reinterpret_u16_u16(__VA_ARGS__) +#define svreinterpret_u16_s32(...) __builtin_sve_reinterpret_u16_s32(__VA_ARGS__) #define svreinterpret_u16_u32(...) __builtin_sve_reinterpret_u16_u32(__VA_ARGS__) +#define svreinterpret_u16_s64(...) __builtin_sve_reinterpret_u16_s64(__VA_ARGS__) #define svreinterpret_u16_u64(...) __builtin_sve_reinterpret_u16_u64(__VA_ARGS__) #define svreinterpret_u16_f16(...) __builtin_sve_reinterpret_u16_f16(__VA_ARGS__) #define svreinterpret_u16_bf16(...) __builtin_sve_reinterpret_u16_bf16(__VA_ARGS__) #define svreinterpret_u16_f32(...) __builtin_sve_reinterpret_u16_f32(__VA_ARGS__) #define svreinterpret_u16_f64(...) __builtin_sve_reinterpret_u16_f64(__VA_ARGS__) +#define svreinterpret_s32_s8(...) __builtin_sve_reinterpret_s32_s8(__VA_ARGS__) +#define svreinterpret_s32_u8(...) __builtin_sve_reinterpret_s32_u8(__VA_ARGS__) +#define svreinterpret_s32_s16(...) __builtin_sve_reinterpret_s32_s16(__VA_ARGS__) +#define svreinterpret_s32_u16(...) __builtin_sve_reinterpret_s32_u16(__VA_ARGS__) +#define svreinterpret_s32_s32(...) __builtin_sve_reinterpret_s32_s32(__VA_ARGS__) +#define svreinterpret_s32_u32(...) __builtin_sve_reinterpret_s32_u32(__VA_ARGS__) +#define svreinterpret_s32_s64(...) __builtin_sve_reinterpret_s32_s64(__VA_ARGS__) +#define svreinterpret_s32_u64(...) __builtin_sve_reinterpret_s32_u64(__VA_ARGS__) +#define svreinterpret_s32_f16(...) __builtin_sve_reinterpret_s32_f16(__VA_ARGS__) +#define svreinterpret_s32_bf16(...) __builtin_sve_reinterpret_s32_bf16(__VA_ARGS__) +#define svreinterpret_s32_f32(...) __builtin_sve_reinterpret_s32_f32(__VA_ARGS__) +#define svreinterpret_s32_f64(...) __builtin_sve_reinterpret_s32_f64(__VA_ARGS__) #define svreinterpret_u32_s8(...) __builtin_sve_reinterpret_u32_s8(__VA_ARGS__) -#define svreinterpret_u32_s16(...) __builtin_sve_reinterpret_u32_s16(__VA_ARGS__) -#define svreinterpret_u32_s32(...) __builtin_sve_reinterpret_u32_s32(__VA_ARGS__) -#define svreinterpret_u32_s64(...) __builtin_sve_reinterpret_u32_s64(__VA_ARGS__) #define svreinterpret_u32_u8(...) __builtin_sve_reinterpret_u32_u8(__VA_ARGS__) +#define svreinterpret_u32_s16(...) __builtin_sve_reinterpret_u32_s16(__VA_ARGS__) #define svreinterpret_u32_u16(...) __builtin_sve_reinterpret_u32_u16(__VA_ARGS__) +#define svreinterpret_u32_s32(...) __builtin_sve_reinterpret_u32_s32(__VA_ARGS__) #define svreinterpret_u32_u32(...) __builtin_sve_reinterpret_u32_u32(__VA_ARGS__) +#define svreinterpret_u32_s64(...) __builtin_sve_reinterpret_u32_s64(__VA_ARGS__) #define svreinterpret_u32_u64(...) __builtin_sve_reinterpret_u32_u64(__VA_ARGS__) #define svreinterpret_u32_f16(...) __builtin_sve_reinterpret_u32_f16(__VA_ARGS__) #define svreinterpret_u32_bf16(...) __builtin_sve_reinterpret_u32_bf16(__VA_ARGS__) #define svreinterpret_u32_f32(...) __builtin_sve_reinterpret_u32_f32(__VA_ARGS__) #define svreinterpret_u32_f64(...) __builtin_sve_reinterpret_u32_f64(__VA_ARGS__) +#define svreinterpret_s64_s8(...) __builtin_sve_reinterpret_s64_s8(__VA_ARGS__) +#define svreinterpret_s64_u8(...) __builtin_sve_reinterpret_s64_u8(__VA_ARGS__) +#define svreinterpret_s64_s16(...) __builtin_sve_reinterpret_s64_s16(__VA_ARGS__) +#define svreinterpret_s64_u16(...) __builtin_sve_reinterpret_s64_u16(__VA_ARGS__) +#define svreinterpret_s64_s32(...) __builtin_sve_reinterpret_s64_s32(__VA_ARGS__) +#define svreinterpret_s64_u32(...) __builtin_sve_reinterpret_s64_u32(__VA_ARGS__) +#define svreinterpret_s64_s64(...) __builtin_sve_reinterpret_s64_s64(__VA_ARGS__) +#define svreinterpret_s64_u64(...) __builtin_sve_reinterpret_s64_u64(__VA_ARGS__) +#define svreinterpret_s64_f16(...) __builtin_sve_reinterpret_s64_f16(__VA_ARGS__) +#define svreinterpret_s64_bf16(...) __builtin_sve_reinterpret_s64_bf16(__VA_ARGS__) +#define svreinterpret_s64_f32(...) __builtin_sve_reinterpret_s64_f32(__VA_ARGS__) +#define svreinterpret_s64_f64(...) __builtin_sve_reinterpret_s64_f64(__VA_ARGS__) #define svreinterpret_u64_s8(...) __builtin_sve_reinterpret_u64_s8(__VA_ARGS__) -#define svreinterpret_u64_s16(...) __builtin_sve_reinterpret_u64_s16(__VA_ARGS__) -#define svreinterpret_u64_s32(...) __builtin_sve_reinterpret_u64_s32(__VA_ARGS__) -#define svreinterpret_u64_s64(...) __builtin_sve_reinterpret_u64_s64(__VA_ARGS__) #define svreinterpret_u64_u8(...) __builtin_sve_reinterpret_u64_u8(__VA_ARGS__) +#define svreinterpret_u64_s16(...) __builtin_sve_reinterpret_u64_s16(__VA_ARGS__) #define svreinterpret_u64_u16(...) __builtin_sve_reinterpret_u64_u16(__VA_ARGS__) +#define svreinterpret_u64_s32(...) __builtin_sve_reinterpret_u64_s32(__VA_ARGS__) #define svreinterpret_u64_u32(...) __builtin_sve_reinterpret_u64_u32(__VA_ARGS__) +#define svreinterpret_u64_s64(...) __builtin_sve_reinterpret_u64_s64(__VA_ARGS__) #define svreinterpret_u64_u64(...) __builtin_sve_reinterpret_u64_u64(__VA_ARGS__) #define svreinterpret_u64_f16(...) __builtin_sve_reinterpret_u64_f16(__VA_ARGS__) #define svreinterpret_u64_bf16(...) __builtin_sve_reinterpret_u64_bf16(__VA_ARGS__) #define svreinterpret_u64_f32(...) __builtin_sve_reinterpret_u64_f32(__VA_ARGS__) #define svreinterpret_u64_f64(...) __builtin_sve_reinterpret_u64_f64(__VA_ARGS__) #define svreinterpret_f16_s8(...) __builtin_sve_reinterpret_f16_s8(__VA_ARGS__) -#define svreinterpret_f16_s16(...) __builtin_sve_reinterpret_f16_s16(__VA_ARGS__) -#define svreinterpret_f16_s32(...) __builtin_sve_reinterpret_f16_s32(__VA_ARGS__) -#define svreinterpret_f16_s64(...) __builtin_sve_reinterpret_f16_s64(__VA_ARGS__) #define svreinterpret_f16_u8(...) __builtin_sve_reinterpret_f16_u8(__VA_ARGS__) +#define svreinterpret_f16_s16(...) __builtin_sve_reinterpret_f16_s16(__VA_ARGS__) #define svreinterpret_f16_u16(...) __builtin_sve_reinterpret_f16_u16(__VA_ARGS__) +#define svreinterpret_f16_s32(...) __builtin_sve_reinterpret_f16_s32(__VA_ARGS__) #define svreinterpret_f16_u32(...) __builtin_sve_reinterpret_f16_u32(__VA_ARGS__) +#define svreinterpret_f16_s64(...) __builtin_sve_reinterpret_f16_s64(__VA_ARGS__) #define svreinterpret_f16_u64(...) __builtin_sve_reinterpret_f16_u64(__VA_ARGS__) #define svreinterpret_f16_f16(...) __builtin_sve_reinterpret_f16_f16(__VA_ARGS__) #define svreinterpret_f16_bf16(...) __builtin_sve_reinterpret_f16_bf16(__VA_ARGS__) #define svreinterpret_f16_f32(...) __builtin_sve_reinterpret_f16_f32(__VA_ARGS__) #define svreinterpret_f16_f64(...) __builtin_sve_reinterpret_f16_f64(__VA_ARGS__) #define svreinterpret_bf16_s8(...) __builtin_sve_reinterpret_bf16_s8(__VA_ARGS__) -#define svreinterpret_bf16_s16(...) __builtin_sve_reinterpret_bf16_s16(__VA_ARGS__) -#define svreinterpret_bf16_s32(...) __builtin_sve_reinterpret_bf16_s32(__VA_ARGS__) -#define svreinterpret_bf16_s64(...) __builtin_sve_reinterpret_bf16_s64(__VA_ARGS__) #define svreinterpret_bf16_u8(...) __builtin_sve_reinterpret_bf16_u8(__VA_ARGS__) +#define svreinterpret_bf16_s16(...) __builtin_sve_reinterpret_bf16_s16(__VA_ARGS__) #define svreinterpret_bf16_u16(...) __builtin_sve_reinterpret_bf16_u16(__VA_ARGS__) +#define svreinterpret_bf16_s32(...) __builtin_sve_reinterpret_bf16_s32(__VA_ARGS__) #define svreinterpret_bf16_u32(...) __builtin_sve_reinterpret_bf16_u32(__VA_ARGS__) +#define svreinterpret_bf16_s64(...) __builtin_sve_reinterpret_bf16_s64(__VA_ARGS__) #define svreinterpret_bf16_u64(...) __builtin_sve_reinterpret_bf16_u64(__VA_ARGS__) #define svreinterpret_bf16_f16(...) __builtin_sve_reinterpret_bf16_f16(__VA_ARGS__) #define svreinterpret_bf16_bf16(...) __builtin_sve_reinterpret_bf16_bf16(__VA_ARGS__) #define svreinterpret_bf16_f32(...) __builtin_sve_reinterpret_bf16_f32(__VA_ARGS__) #define svreinterpret_bf16_f64(...) __builtin_sve_reinterpret_bf16_f64(__VA_ARGS__) #define svreinterpret_f32_s8(...) __builtin_sve_reinterpret_f32_s8(__VA_ARGS__) -#define svreinterpret_f32_s16(...) __builtin_sve_reinterpret_f32_s16(__VA_ARGS__) -#define svreinterpret_f32_s32(...) __builtin_sve_reinterpret_f32_s32(__VA_ARGS__) -#define svreinterpret_f32_s64(...) __builtin_sve_reinterpret_f32_s64(__VA_ARGS__) #define svreinterpret_f32_u8(...) __builtin_sve_reinterpret_f32_u8(__VA_ARGS__) +#define svreinterpret_f32_s16(...) __builtin_sve_reinterpret_f32_s16(__VA_ARGS__) #define svreinterpret_f32_u16(...) __builtin_sve_reinterpret_f32_u16(__VA_ARGS__) +#define svreinterpret_f32_s32(...) __builtin_sve_reinterpret_f32_s32(__VA_ARGS__) #define svreinterpret_f32_u32(...) __builtin_sve_reinterpret_f32_u32(__VA_ARGS__) +#define svreinterpret_f32_s64(...) __builtin_sve_reinterpret_f32_s64(__VA_ARGS__) #define svreinterpret_f32_u64(...) __builtin_sve_reinterpret_f32_u64(__VA_ARGS__) #define svreinterpret_f32_f16(...) __builtin_sve_reinterpret_f32_f16(__VA_ARGS__) #define svreinterpret_f32_bf16(...) __builtin_sve_reinterpret_f32_bf16(__VA_ARGS__) #define svreinterpret_f32_f32(...) __builtin_sve_reinterpret_f32_f32(__VA_ARGS__) #define svreinterpret_f32_f64(...) __builtin_sve_reinterpret_f32_f64(__VA_ARGS__) #define svreinterpret_f64_s8(...) __builtin_sve_reinterpret_f64_s8(__VA_ARGS__) -#define svreinterpret_f64_s16(...) __builtin_sve_reinterpret_f64_s16(__VA_ARGS__) -#define svreinterpret_f64_s32(...) __builtin_sve_reinterpret_f64_s32(__VA_ARGS__) -#define svreinterpret_f64_s64(...) __builtin_sve_reinterpret_f64_s64(__VA_ARGS__) #define svreinterpret_f64_u8(...) __builtin_sve_reinterpret_f64_u8(__VA_ARGS__) +#define svreinterpret_f64_s16(...) __builtin_sve_reinterpret_f64_s16(__VA_ARGS__) #define svreinterpret_f64_u16(...) __builtin_sve_reinterpret_f64_u16(__VA_ARGS__) +#define svreinterpret_f64_s32(...) __builtin_sve_reinterpret_f64_s32(__VA_ARGS__) #define svreinterpret_f64_u32(...) __builtin_sve_reinterpret_f64_u32(__VA_ARGS__) +#define svreinterpret_f64_s64(...) __builtin_sve_reinterpret_f64_s64(__VA_ARGS__) #define svreinterpret_f64_u64(...) __builtin_sve_reinterpret_f64_u64(__VA_ARGS__) #define svreinterpret_f64_f16(...) __builtin_sve_reinterpret_f64_f16(__VA_ARGS__) #define svreinterpret_f64_bf16(...) __builtin_sve_reinterpret_f64_bf16(__VA_ARGS__) #define svreinterpret_f64_f32(...) __builtin_sve_reinterpret_f64_f32(__VA_ARGS__) #define svreinterpret_f64_f64(...) __builtin_sve_reinterpret_f64_f64(__VA_ARGS__) -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint8_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_s8(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint16_t op) { - return __builtin_sve_reinterpret_s8_s16(op); -} - -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint32_t op) { - return __builtin_sve_reinterpret_s8_s32(op); -} - -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint64_t op) { - return __builtin_sve_reinterpret_s8_s64(op); -} - -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint8_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u8(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint16_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s16(op); +} + +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u16(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint32_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s32(op); +} + +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u32(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint64_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s64(op); +} + +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_u64(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat16_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f16(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_bf16(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat32_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f32(op); } -__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat64_t op) { +__aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_s8_f64(op); } -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint8_t op) { - return __builtin_sve_reinterpret_s16_s8(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint16_t op) { - return __builtin_sve_reinterpret_s16_s16(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint32_t op) { - return __builtin_sve_reinterpret_s16_s32(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint64_t op) { - return __builtin_sve_reinterpret_s16_s64(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint8_t op) { - return __builtin_sve_reinterpret_s16_u8(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint16_t op) { - return __builtin_sve_reinterpret_s16_u16(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint32_t op) { - return __builtin_sve_reinterpret_s16_u32(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint64_t op) { - return __builtin_sve_reinterpret_s16_u64(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat16_t op) { - return __builtin_sve_reinterpret_s16_f16(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svbfloat16_t op) { - return __builtin_sve_reinterpret_s16_bf16(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat32_t op) { - return __builtin_sve_reinterpret_s16_f32(op); -} - -__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat64_t op) { - return __builtin_sve_reinterpret_s16_f64(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint8_t op) { - return __builtin_sve_reinterpret_s32_s8(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint16_t op) { - return __builtin_sve_reinterpret_s32_s16(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint32_t op) { - return __builtin_sve_reinterpret_s32_s32(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint64_t op) { - return __builtin_sve_reinterpret_s32_s64(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint8_t op) { - return __builtin_sve_reinterpret_s32_u8(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint16_t op) { - return __builtin_sve_reinterpret_s32_u16(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint32_t op) { - return __builtin_sve_reinterpret_s32_u32(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint64_t op) { - return __builtin_sve_reinterpret_s32_u64(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat16_t op) { - return __builtin_sve_reinterpret_s32_f16(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svbfloat16_t op) { - return __builtin_sve_reinterpret_s32_bf16(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat32_t op) { - return __builtin_sve_reinterpret_s32_f32(op); -} - -__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat64_t op) { - return __builtin_sve_reinterpret_s32_f64(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint8_t op) { - return __builtin_sve_reinterpret_s64_s8(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint16_t op) { - return __builtin_sve_reinterpret_s64_s16(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint32_t op) { - return __builtin_sve_reinterpret_s64_s32(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint64_t op) { - return __builtin_sve_reinterpret_s64_s64(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint8_t op) { - return __builtin_sve_reinterpret_s64_u8(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint16_t op) { - return __builtin_sve_reinterpret_s64_u16(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint32_t op) { - return __builtin_sve_reinterpret_s64_u32(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint64_t op) { - return __builtin_sve_reinterpret_s64_u64(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat16_t op) { - return __builtin_sve_reinterpret_s64_f16(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svbfloat16_t op) { - return __builtin_sve_reinterpret_s64_bf16(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat32_t op) { - return __builtin_sve_reinterpret_s64_f32(op); -} - -__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat64_t op) { - return __builtin_sve_reinterpret_s64_f64(op); -} - -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint8_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_s8(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint16_t op) { - return __builtin_sve_reinterpret_u8_s16(op); -} - -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint32_t op) { - return __builtin_sve_reinterpret_u8_s32(op); -} - -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint64_t op) { - return __builtin_sve_reinterpret_u8_s64(op); -} - -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint8_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u8(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint16_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s16(op); +} + +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u16(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint32_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s32(op); +} + +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u32(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint64_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s64(op); +} + +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_u64(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat16_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f16(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_bf16(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat32_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f32(op); } -__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat64_t op) { +__aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u8_f64(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint8_t op) { +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint8_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s8(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint8_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u8(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s16(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u16(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s32(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u32(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s64(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u64(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f16(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svbfloat16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_bf16(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f32(op); +} + +__aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f64(op); +} + +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_s8(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint16_t op) { - return __builtin_sve_reinterpret_u16_s16(op); -} - -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint32_t op) { - return __builtin_sve_reinterpret_u16_s32(op); -} - -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint64_t op) { - return __builtin_sve_reinterpret_u16_s64(op); -} - -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint8_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u8(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint16_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s16(op); +} + +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u16(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint32_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s32(op); +} + +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u32(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint64_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s64(op); +} + +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_u64(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat16_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f16(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_bf16(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat32_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f32(op); } -__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat64_t op) { +__aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u16_f64(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint8_t op) { +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint8_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s8(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint8_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u8(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s16(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u16(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s32(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u32(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s64(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u64(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f16(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svbfloat16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_bf16(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f32(op); +} + +__aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f64(op); +} + +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_s8(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint16_t op) { - return __builtin_sve_reinterpret_u32_s16(op); -} - -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint32_t op) { - return __builtin_sve_reinterpret_u32_s32(op); -} - -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint64_t op) { - return __builtin_sve_reinterpret_u32_s64(op); -} - -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint8_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u8(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint16_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s16(op); +} + +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u16(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint32_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s32(op); +} + +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u32(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint64_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s64(op); +} + +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_u64(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat16_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f16(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_bf16(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat32_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f32(op); } -__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat64_t op) { +__aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u32_f64(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint8_t op) { +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint8_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s8(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint8_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u8(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s16(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u16(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s32(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u32(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s64(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u64(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f16(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svbfloat16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_bf16(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f32(op); +} + +__aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f64(op); +} + +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_s8(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint16_t op) { - return __builtin_sve_reinterpret_u64_s16(op); -} - -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint32_t op) { - return __builtin_sve_reinterpret_u64_s32(op); -} - -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint64_t op) { - return __builtin_sve_reinterpret_u64_s64(op); -} - -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint8_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u8(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint16_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s16(op); +} + +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u16(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint32_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s32(op); +} + +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u32(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint64_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s64(op); +} + +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_u64(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat16_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f16(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_bf16(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat32_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f32(op); } -__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat64_t op) { +__aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_u64_f64(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint8_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_s8(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint16_t op) { - return __builtin_sve_reinterpret_f16_s16(op); -} - -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint32_t op) { - return __builtin_sve_reinterpret_f16_s32(op); -} - -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint64_t op) { - return __builtin_sve_reinterpret_f16_s64(op); -} - -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint8_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u8(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint16_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s16(op); +} + +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u16(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint32_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s32(op); +} + +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u32(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint64_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s64(op); +} + +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_u64(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat16_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f16(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_bf16(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat32_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f32(op); } -__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat64_t op) { +__aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f16_f64(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint8_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_s8(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint16_t op) { - return __builtin_sve_reinterpret_bf16_s16(op); -} - -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint32_t op) { - return __builtin_sve_reinterpret_bf16_s32(op); -} - -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint64_t op) { - return __builtin_sve_reinterpret_bf16_s64(op); -} - -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint8_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u8(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint16_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s16(op); +} + +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u16(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint32_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s32(op); +} + +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u32(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint64_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s64(op); +} + +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_u64(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat16_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f16(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_bf16(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat32_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f32(op); } -__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat64_t op) { +__aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_bf16_f64(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint8_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_s8(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint16_t op) { - return __builtin_sve_reinterpret_f32_s16(op); -} - -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint32_t op) { - return __builtin_sve_reinterpret_f32_s32(op); -} - -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint64_t op) { - return __builtin_sve_reinterpret_f32_s64(op); -} - -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint8_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u8(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint16_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s16(op); +} + +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u16(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint32_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s32(op); +} + +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u32(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint64_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s64(op); +} + +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_u64(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat16_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f16(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_bf16(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat32_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f32(op); } -__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat64_t op) { +__aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f32_f64(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint8_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_s8(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint16_t op) { - return __builtin_sve_reinterpret_f64_s16(op); -} - -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint32_t op) { - return __builtin_sve_reinterpret_f64_s32(op); -} - -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint64_t op) { - return __builtin_sve_reinterpret_f64_s64(op); -} - -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint8_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint8_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u8(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint16_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint16_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s16(op); +} + +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u16(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint32_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint32_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s32(op); +} + +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u32(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint64_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint64_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s64(op); +} + +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_u64(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat16_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f16(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svbfloat16_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svbfloat16_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_bf16(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat32_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat32_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f32(op); } -__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat64_t op) { +__aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat64_t op) __arm_streaming_compatible { return __builtin_sve_reinterpret_f64_f64(op); } +#define svreinterpret_s8_s8_x2(...) __builtin_sve_reinterpret_s8_s8_x2(__VA_ARGS__) +#define svreinterpret_s8_u8_x2(...) __builtin_sve_reinterpret_s8_u8_x2(__VA_ARGS__) +#define svreinterpret_s8_s16_x2(...) __builtin_sve_reinterpret_s8_s16_x2(__VA_ARGS__) +#define svreinterpret_s8_u16_x2(...) __builtin_sve_reinterpret_s8_u16_x2(__VA_ARGS__) +#define svreinterpret_s8_s32_x2(...) __builtin_sve_reinterpret_s8_s32_x2(__VA_ARGS__) +#define svreinterpret_s8_u32_x2(...) __builtin_sve_reinterpret_s8_u32_x2(__VA_ARGS__) +#define svreinterpret_s8_s64_x2(...) __builtin_sve_reinterpret_s8_s64_x2(__VA_ARGS__) +#define svreinterpret_s8_u64_x2(...) __builtin_sve_reinterpret_s8_u64_x2(__VA_ARGS__) +#define svreinterpret_s8_f16_x2(...) __builtin_sve_reinterpret_s8_f16_x2(__VA_ARGS__) +#define svreinterpret_s8_bf16_x2(...) __builtin_sve_reinterpret_s8_bf16_x2(__VA_ARGS__) +#define svreinterpret_s8_f32_x2(...) __builtin_sve_reinterpret_s8_f32_x2(__VA_ARGS__) +#define svreinterpret_s8_f64_x2(...) __builtin_sve_reinterpret_s8_f64_x2(__VA_ARGS__) +#define svreinterpret_u8_s8_x2(...) __builtin_sve_reinterpret_u8_s8_x2(__VA_ARGS__) +#define svreinterpret_u8_u8_x2(...) __builtin_sve_reinterpret_u8_u8_x2(__VA_ARGS__) +#define svreinterpret_u8_s16_x2(...) __builtin_sve_reinterpret_u8_s16_x2(__VA_ARGS__) +#define svreinterpret_u8_u16_x2(...) __builtin_sve_reinterpret_u8_u16_x2(__VA_ARGS__) +#define svreinterpret_u8_s32_x2(...) __builtin_sve_reinterpret_u8_s32_x2(__VA_ARGS__) +#define svreinterpret_u8_u32_x2(...) __builtin_sve_reinterpret_u8_u32_x2(__VA_ARGS__) +#define svreinterpret_u8_s64_x2(...) __builtin_sve_reinterpret_u8_s64_x2(__VA_ARGS__) +#define svreinterpret_u8_u64_x2(...) __builtin_sve_reinterpret_u8_u64_x2(__VA_ARGS__) +#define svreinterpret_u8_f16_x2(...) __builtin_sve_reinterpret_u8_f16_x2(__VA_ARGS__) +#define svreinterpret_u8_bf16_x2(...) __builtin_sve_reinterpret_u8_bf16_x2(__VA_ARGS__) +#define svreinterpret_u8_f32_x2(...) __builtin_sve_reinterpret_u8_f32_x2(__VA_ARGS__) +#define svreinterpret_u8_f64_x2(...) __builtin_sve_reinterpret_u8_f64_x2(__VA_ARGS__) +#define svreinterpret_s16_s8_x2(...) __builtin_sve_reinterpret_s16_s8_x2(__VA_ARGS__) +#define svreinterpret_s16_u8_x2(...) __builtin_sve_reinterpret_s16_u8_x2(__VA_ARGS__) +#define svreinterpret_s16_s16_x2(...) __builtin_sve_reinterpret_s16_s16_x2(__VA_ARGS__) +#define svreinterpret_s16_u16_x2(...) __builtin_sve_reinterpret_s16_u16_x2(__VA_ARGS__) +#define svreinterpret_s16_s32_x2(...) __builtin_sve_reinterpret_s16_s32_x2(__VA_ARGS__) +#define svreinterpret_s16_u32_x2(...) __builtin_sve_reinterpret_s16_u32_x2(__VA_ARGS__) +#define svreinterpret_s16_s64_x2(...) __builtin_sve_reinterpret_s16_s64_x2(__VA_ARGS__) +#define svreinterpret_s16_u64_x2(...) __builtin_sve_reinterpret_s16_u64_x2(__VA_ARGS__) +#define svreinterpret_s16_f16_x2(...) __builtin_sve_reinterpret_s16_f16_x2(__VA_ARGS__) +#define svreinterpret_s16_bf16_x2(...) __builtin_sve_reinterpret_s16_bf16_x2(__VA_ARGS__) +#define svreinterpret_s16_f32_x2(...) __builtin_sve_reinterpret_s16_f32_x2(__VA_ARGS__) +#define svreinterpret_s16_f64_x2(...) __builtin_sve_reinterpret_s16_f64_x2(__VA_ARGS__) +#define svreinterpret_u16_s8_x2(...) __builtin_sve_reinterpret_u16_s8_x2(__VA_ARGS__) +#define svreinterpret_u16_u8_x2(...) __builtin_sve_reinterpret_u16_u8_x2(__VA_ARGS__) +#define svreinterpret_u16_s16_x2(...) __builtin_sve_reinterpret_u16_s16_x2(__VA_ARGS__) +#define svreinterpret_u16_u16_x2(...) __builtin_sve_reinterpret_u16_u16_x2(__VA_ARGS__) +#define svreinterpret_u16_s32_x2(...) __builtin_sve_reinterpret_u16_s32_x2(__VA_ARGS__) +#define svreinterpret_u16_u32_x2(...) __builtin_sve_reinterpret_u16_u32_x2(__VA_ARGS__) +#define svreinterpret_u16_s64_x2(...) __builtin_sve_reinterpret_u16_s64_x2(__VA_ARGS__) +#define svreinterpret_u16_u64_x2(...) __builtin_sve_reinterpret_u16_u64_x2(__VA_ARGS__) +#define svreinterpret_u16_f16_x2(...) __builtin_sve_reinterpret_u16_f16_x2(__VA_ARGS__) +#define svreinterpret_u16_bf16_x2(...) __builtin_sve_reinterpret_u16_bf16_x2(__VA_ARGS__) +#define svreinterpret_u16_f32_x2(...) __builtin_sve_reinterpret_u16_f32_x2(__VA_ARGS__) +#define svreinterpret_u16_f64_x2(...) __builtin_sve_reinterpret_u16_f64_x2(__VA_ARGS__) +#define svreinterpret_s32_s8_x2(...) __builtin_sve_reinterpret_s32_s8_x2(__VA_ARGS__) +#define svreinterpret_s32_u8_x2(...) __builtin_sve_reinterpret_s32_u8_x2(__VA_ARGS__) +#define svreinterpret_s32_s16_x2(...) __builtin_sve_reinterpret_s32_s16_x2(__VA_ARGS__) +#define svreinterpret_s32_u16_x2(...) __builtin_sve_reinterpret_s32_u16_x2(__VA_ARGS__) +#define svreinterpret_s32_s32_x2(...) __builtin_sve_reinterpret_s32_s32_x2(__VA_ARGS__) +#define svreinterpret_s32_u32_x2(...) __builtin_sve_reinterpret_s32_u32_x2(__VA_ARGS__) +#define svreinterpret_s32_s64_x2(...) __builtin_sve_reinterpret_s32_s64_x2(__VA_ARGS__) +#define svreinterpret_s32_u64_x2(...) __builtin_sve_reinterpret_s32_u64_x2(__VA_ARGS__) +#define svreinterpret_s32_f16_x2(...) __builtin_sve_reinterpret_s32_f16_x2(__VA_ARGS__) +#define svreinterpret_s32_bf16_x2(...) __builtin_sve_reinterpret_s32_bf16_x2(__VA_ARGS__) +#define svreinterpret_s32_f32_x2(...) __builtin_sve_reinterpret_s32_f32_x2(__VA_ARGS__) +#define svreinterpret_s32_f64_x2(...) __builtin_sve_reinterpret_s32_f64_x2(__VA_ARGS__) +#define svreinterpret_u32_s8_x2(...) __builtin_sve_reinterpret_u32_s8_x2(__VA_ARGS__) +#define svreinterpret_u32_u8_x2(...) __builtin_sve_reinterpret_u32_u8_x2(__VA_ARGS__) +#define svreinterpret_u32_s16_x2(...) __builtin_sve_reinterpret_u32_s16_x2(__VA_ARGS__) +#define svreinterpret_u32_u16_x2(...) __builtin_sve_reinterpret_u32_u16_x2(__VA_ARGS__) +#define svreinterpret_u32_s32_x2(...) __builtin_sve_reinterpret_u32_s32_x2(__VA_ARGS__) +#define svreinterpret_u32_u32_x2(...) __builtin_sve_reinterpret_u32_u32_x2(__VA_ARGS__) +#define svreinterpret_u32_s64_x2(...) __builtin_sve_reinterpret_u32_s64_x2(__VA_ARGS__) +#define svreinterpret_u32_u64_x2(...) __builtin_sve_reinterpret_u32_u64_x2(__VA_ARGS__) +#define svreinterpret_u32_f16_x2(...) __builtin_sve_reinterpret_u32_f16_x2(__VA_ARGS__) +#define svreinterpret_u32_bf16_x2(...) __builtin_sve_reinterpret_u32_bf16_x2(__VA_ARGS__) +#define svreinterpret_u32_f32_x2(...) __builtin_sve_reinterpret_u32_f32_x2(__VA_ARGS__) +#define svreinterpret_u32_f64_x2(...) __builtin_sve_reinterpret_u32_f64_x2(__VA_ARGS__) +#define svreinterpret_s64_s8_x2(...) __builtin_sve_reinterpret_s64_s8_x2(__VA_ARGS__) +#define svreinterpret_s64_u8_x2(...) __builtin_sve_reinterpret_s64_u8_x2(__VA_ARGS__) +#define svreinterpret_s64_s16_x2(...) __builtin_sve_reinterpret_s64_s16_x2(__VA_ARGS__) +#define svreinterpret_s64_u16_x2(...) __builtin_sve_reinterpret_s64_u16_x2(__VA_ARGS__) +#define svreinterpret_s64_s32_x2(...) __builtin_sve_reinterpret_s64_s32_x2(__VA_ARGS__) +#define svreinterpret_s64_u32_x2(...) __builtin_sve_reinterpret_s64_u32_x2(__VA_ARGS__) +#define svreinterpret_s64_s64_x2(...) __builtin_sve_reinterpret_s64_s64_x2(__VA_ARGS__) +#define svreinterpret_s64_u64_x2(...) __builtin_sve_reinterpret_s64_u64_x2(__VA_ARGS__) +#define svreinterpret_s64_f16_x2(...) __builtin_sve_reinterpret_s64_f16_x2(__VA_ARGS__) +#define svreinterpret_s64_bf16_x2(...) __builtin_sve_reinterpret_s64_bf16_x2(__VA_ARGS__) +#define svreinterpret_s64_f32_x2(...) __builtin_sve_reinterpret_s64_f32_x2(__VA_ARGS__) +#define svreinterpret_s64_f64_x2(...) __builtin_sve_reinterpret_s64_f64_x2(__VA_ARGS__) +#define svreinterpret_u64_s8_x2(...) __builtin_sve_reinterpret_u64_s8_x2(__VA_ARGS__) +#define svreinterpret_u64_u8_x2(...) __builtin_sve_reinterpret_u64_u8_x2(__VA_ARGS__) +#define svreinterpret_u64_s16_x2(...) __builtin_sve_reinterpret_u64_s16_x2(__VA_ARGS__) +#define svreinterpret_u64_u16_x2(...) __builtin_sve_reinterpret_u64_u16_x2(__VA_ARGS__) +#define svreinterpret_u64_s32_x2(...) __builtin_sve_reinterpret_u64_s32_x2(__VA_ARGS__) +#define svreinterpret_u64_u32_x2(...) __builtin_sve_reinterpret_u64_u32_x2(__VA_ARGS__) +#define svreinterpret_u64_s64_x2(...) __builtin_sve_reinterpret_u64_s64_x2(__VA_ARGS__) +#define svreinterpret_u64_u64_x2(...) __builtin_sve_reinterpret_u64_u64_x2(__VA_ARGS__) +#define svreinterpret_u64_f16_x2(...) __builtin_sve_reinterpret_u64_f16_x2(__VA_ARGS__) +#define svreinterpret_u64_bf16_x2(...) __builtin_sve_reinterpret_u64_bf16_x2(__VA_ARGS__) +#define svreinterpret_u64_f32_x2(...) __builtin_sve_reinterpret_u64_f32_x2(__VA_ARGS__) +#define svreinterpret_u64_f64_x2(...) __builtin_sve_reinterpret_u64_f64_x2(__VA_ARGS__) +#define svreinterpret_f16_s8_x2(...) __builtin_sve_reinterpret_f16_s8_x2(__VA_ARGS__) +#define svreinterpret_f16_u8_x2(...) __builtin_sve_reinterpret_f16_u8_x2(__VA_ARGS__) +#define svreinterpret_f16_s16_x2(...) __builtin_sve_reinterpret_f16_s16_x2(__VA_ARGS__) +#define svreinterpret_f16_u16_x2(...) __builtin_sve_reinterpret_f16_u16_x2(__VA_ARGS__) +#define svreinterpret_f16_s32_x2(...) __builtin_sve_reinterpret_f16_s32_x2(__VA_ARGS__) +#define svreinterpret_f16_u32_x2(...) __builtin_sve_reinterpret_f16_u32_x2(__VA_ARGS__) +#define svreinterpret_f16_s64_x2(...) __builtin_sve_reinterpret_f16_s64_x2(__VA_ARGS__) +#define svreinterpret_f16_u64_x2(...) __builtin_sve_reinterpret_f16_u64_x2(__VA_ARGS__) +#define svreinterpret_f16_f16_x2(...) __builtin_sve_reinterpret_f16_f16_x2(__VA_ARGS__) +#define svreinterpret_f16_bf16_x2(...) __builtin_sve_reinterpret_f16_bf16_x2(__VA_ARGS__) +#define svreinterpret_f16_f32_x2(...) __builtin_sve_reinterpret_f16_f32_x2(__VA_ARGS__) +#define svreinterpret_f16_f64_x2(...) __builtin_sve_reinterpret_f16_f64_x2(__VA_ARGS__) +#define svreinterpret_bf16_s8_x2(...) __builtin_sve_reinterpret_bf16_s8_x2(__VA_ARGS__) +#define svreinterpret_bf16_u8_x2(...) __builtin_sve_reinterpret_bf16_u8_x2(__VA_ARGS__) +#define svreinterpret_bf16_s16_x2(...) __builtin_sve_reinterpret_bf16_s16_x2(__VA_ARGS__) +#define svreinterpret_bf16_u16_x2(...) __builtin_sve_reinterpret_bf16_u16_x2(__VA_ARGS__) +#define svreinterpret_bf16_s32_x2(...) __builtin_sve_reinterpret_bf16_s32_x2(__VA_ARGS__) +#define svreinterpret_bf16_u32_x2(...) __builtin_sve_reinterpret_bf16_u32_x2(__VA_ARGS__) +#define svreinterpret_bf16_s64_x2(...) __builtin_sve_reinterpret_bf16_s64_x2(__VA_ARGS__) +#define svreinterpret_bf16_u64_x2(...) __builtin_sve_reinterpret_bf16_u64_x2(__VA_ARGS__) +#define svreinterpret_bf16_f16_x2(...) __builtin_sve_reinterpret_bf16_f16_x2(__VA_ARGS__) +#define svreinterpret_bf16_bf16_x2(...) __builtin_sve_reinterpret_bf16_bf16_x2(__VA_ARGS__) +#define svreinterpret_bf16_f32_x2(...) __builtin_sve_reinterpret_bf16_f32_x2(__VA_ARGS__) +#define svreinterpret_bf16_f64_x2(...) __builtin_sve_reinterpret_bf16_f64_x2(__VA_ARGS__) +#define svreinterpret_f32_s8_x2(...) __builtin_sve_reinterpret_f32_s8_x2(__VA_ARGS__) +#define svreinterpret_f32_u8_x2(...) __builtin_sve_reinterpret_f32_u8_x2(__VA_ARGS__) +#define svreinterpret_f32_s16_x2(...) __builtin_sve_reinterpret_f32_s16_x2(__VA_ARGS__) +#define svreinterpret_f32_u16_x2(...) __builtin_sve_reinterpret_f32_u16_x2(__VA_ARGS__) +#define svreinterpret_f32_s32_x2(...) __builtin_sve_reinterpret_f32_s32_x2(__VA_ARGS__) +#define svreinterpret_f32_u32_x2(...) __builtin_sve_reinterpret_f32_u32_x2(__VA_ARGS__) +#define svreinterpret_f32_s64_x2(...) __builtin_sve_reinterpret_f32_s64_x2(__VA_ARGS__) +#define svreinterpret_f32_u64_x2(...) __builtin_sve_reinterpret_f32_u64_x2(__VA_ARGS__) +#define svreinterpret_f32_f16_x2(...) __builtin_sve_reinterpret_f32_f16_x2(__VA_ARGS__) +#define svreinterpret_f32_bf16_x2(...) __builtin_sve_reinterpret_f32_bf16_x2(__VA_ARGS__) +#define svreinterpret_f32_f32_x2(...) __builtin_sve_reinterpret_f32_f32_x2(__VA_ARGS__) +#define svreinterpret_f32_f64_x2(...) __builtin_sve_reinterpret_f32_f64_x2(__VA_ARGS__) +#define svreinterpret_f64_s8_x2(...) __builtin_sve_reinterpret_f64_s8_x2(__VA_ARGS__) +#define svreinterpret_f64_u8_x2(...) __builtin_sve_reinterpret_f64_u8_x2(__VA_ARGS__) +#define svreinterpret_f64_s16_x2(...) __builtin_sve_reinterpret_f64_s16_x2(__VA_ARGS__) +#define svreinterpret_f64_u16_x2(...) __builtin_sve_reinterpret_f64_u16_x2(__VA_ARGS__) +#define svreinterpret_f64_s32_x2(...) __builtin_sve_reinterpret_f64_s32_x2(__VA_ARGS__) +#define svreinterpret_f64_u32_x2(...) __builtin_sve_reinterpret_f64_u32_x2(__VA_ARGS__) +#define svreinterpret_f64_s64_x2(...) __builtin_sve_reinterpret_f64_s64_x2(__VA_ARGS__) +#define svreinterpret_f64_u64_x2(...) __builtin_sve_reinterpret_f64_u64_x2(__VA_ARGS__) +#define svreinterpret_f64_f16_x2(...) __builtin_sve_reinterpret_f64_f16_x2(__VA_ARGS__) +#define svreinterpret_f64_bf16_x2(...) __builtin_sve_reinterpret_f64_bf16_x2(__VA_ARGS__) +#define svreinterpret_f64_f32_x2(...) __builtin_sve_reinterpret_f64_f32_x2(__VA_ARGS__) +#define svreinterpret_f64_f64_x2(...) __builtin_sve_reinterpret_f64_f64_x2(__VA_ARGS__) +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svint8x2_t svreinterpret_s8(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint8x2_t svreinterpret_u8(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svint16x2_t svreinterpret_s16(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint16x2_t svreinterpret_u16(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svint32x2_t svreinterpret_s32(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint32x2_t svreinterpret_u32(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svint64x2_t svreinterpret_s64(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svuint64x2_t svreinterpret_u64(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat16x2_t svreinterpret_f16(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x2_t svreinterpret_bf16(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat32x2_t svreinterpret_f32(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s8_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint8x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u8_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svuint64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u64_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svbfloat16x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_bf16_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat32x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f32_x2(op); +} + +__aio __attribute__((target("sve"))) svfloat64x2_t svreinterpret_f64(svfloat64x2_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f64_x2(op); +} + +#define svreinterpret_s8_s8_x3(...) __builtin_sve_reinterpret_s8_s8_x3(__VA_ARGS__) +#define svreinterpret_s8_u8_x3(...) __builtin_sve_reinterpret_s8_u8_x3(__VA_ARGS__) +#define svreinterpret_s8_s16_x3(...) __builtin_sve_reinterpret_s8_s16_x3(__VA_ARGS__) +#define svreinterpret_s8_u16_x3(...) __builtin_sve_reinterpret_s8_u16_x3(__VA_ARGS__) +#define svreinterpret_s8_s32_x3(...) __builtin_sve_reinterpret_s8_s32_x3(__VA_ARGS__) +#define svreinterpret_s8_u32_x3(...) __builtin_sve_reinterpret_s8_u32_x3(__VA_ARGS__) +#define svreinterpret_s8_s64_x3(...) __builtin_sve_reinterpret_s8_s64_x3(__VA_ARGS__) +#define svreinterpret_s8_u64_x3(...) __builtin_sve_reinterpret_s8_u64_x3(__VA_ARGS__) +#define svreinterpret_s8_f16_x3(...) __builtin_sve_reinterpret_s8_f16_x3(__VA_ARGS__) +#define svreinterpret_s8_bf16_x3(...) __builtin_sve_reinterpret_s8_bf16_x3(__VA_ARGS__) +#define svreinterpret_s8_f32_x3(...) __builtin_sve_reinterpret_s8_f32_x3(__VA_ARGS__) +#define svreinterpret_s8_f64_x3(...) __builtin_sve_reinterpret_s8_f64_x3(__VA_ARGS__) +#define svreinterpret_u8_s8_x3(...) __builtin_sve_reinterpret_u8_s8_x3(__VA_ARGS__) +#define svreinterpret_u8_u8_x3(...) __builtin_sve_reinterpret_u8_u8_x3(__VA_ARGS__) +#define svreinterpret_u8_s16_x3(...) __builtin_sve_reinterpret_u8_s16_x3(__VA_ARGS__) +#define svreinterpret_u8_u16_x3(...) __builtin_sve_reinterpret_u8_u16_x3(__VA_ARGS__) +#define svreinterpret_u8_s32_x3(...) __builtin_sve_reinterpret_u8_s32_x3(__VA_ARGS__) +#define svreinterpret_u8_u32_x3(...) __builtin_sve_reinterpret_u8_u32_x3(__VA_ARGS__) +#define svreinterpret_u8_s64_x3(...) __builtin_sve_reinterpret_u8_s64_x3(__VA_ARGS__) +#define svreinterpret_u8_u64_x3(...) __builtin_sve_reinterpret_u8_u64_x3(__VA_ARGS__) +#define svreinterpret_u8_f16_x3(...) __builtin_sve_reinterpret_u8_f16_x3(__VA_ARGS__) +#define svreinterpret_u8_bf16_x3(...) __builtin_sve_reinterpret_u8_bf16_x3(__VA_ARGS__) +#define svreinterpret_u8_f32_x3(...) __builtin_sve_reinterpret_u8_f32_x3(__VA_ARGS__) +#define svreinterpret_u8_f64_x3(...) __builtin_sve_reinterpret_u8_f64_x3(__VA_ARGS__) +#define svreinterpret_s16_s8_x3(...) __builtin_sve_reinterpret_s16_s8_x3(__VA_ARGS__) +#define svreinterpret_s16_u8_x3(...) __builtin_sve_reinterpret_s16_u8_x3(__VA_ARGS__) +#define svreinterpret_s16_s16_x3(...) __builtin_sve_reinterpret_s16_s16_x3(__VA_ARGS__) +#define svreinterpret_s16_u16_x3(...) __builtin_sve_reinterpret_s16_u16_x3(__VA_ARGS__) +#define svreinterpret_s16_s32_x3(...) __builtin_sve_reinterpret_s16_s32_x3(__VA_ARGS__) +#define svreinterpret_s16_u32_x3(...) __builtin_sve_reinterpret_s16_u32_x3(__VA_ARGS__) +#define svreinterpret_s16_s64_x3(...) __builtin_sve_reinterpret_s16_s64_x3(__VA_ARGS__) +#define svreinterpret_s16_u64_x3(...) __builtin_sve_reinterpret_s16_u64_x3(__VA_ARGS__) +#define svreinterpret_s16_f16_x3(...) __builtin_sve_reinterpret_s16_f16_x3(__VA_ARGS__) +#define svreinterpret_s16_bf16_x3(...) __builtin_sve_reinterpret_s16_bf16_x3(__VA_ARGS__) +#define svreinterpret_s16_f32_x3(...) __builtin_sve_reinterpret_s16_f32_x3(__VA_ARGS__) +#define svreinterpret_s16_f64_x3(...) __builtin_sve_reinterpret_s16_f64_x3(__VA_ARGS__) +#define svreinterpret_u16_s8_x3(...) __builtin_sve_reinterpret_u16_s8_x3(__VA_ARGS__) +#define svreinterpret_u16_u8_x3(...) __builtin_sve_reinterpret_u16_u8_x3(__VA_ARGS__) +#define svreinterpret_u16_s16_x3(...) __builtin_sve_reinterpret_u16_s16_x3(__VA_ARGS__) +#define svreinterpret_u16_u16_x3(...) __builtin_sve_reinterpret_u16_u16_x3(__VA_ARGS__) +#define svreinterpret_u16_s32_x3(...) __builtin_sve_reinterpret_u16_s32_x3(__VA_ARGS__) +#define svreinterpret_u16_u32_x3(...) __builtin_sve_reinterpret_u16_u32_x3(__VA_ARGS__) +#define svreinterpret_u16_s64_x3(...) __builtin_sve_reinterpret_u16_s64_x3(__VA_ARGS__) +#define svreinterpret_u16_u64_x3(...) __builtin_sve_reinterpret_u16_u64_x3(__VA_ARGS__) +#define svreinterpret_u16_f16_x3(...) __builtin_sve_reinterpret_u16_f16_x3(__VA_ARGS__) +#define svreinterpret_u16_bf16_x3(...) __builtin_sve_reinterpret_u16_bf16_x3(__VA_ARGS__) +#define svreinterpret_u16_f32_x3(...) __builtin_sve_reinterpret_u16_f32_x3(__VA_ARGS__) +#define svreinterpret_u16_f64_x3(...) __builtin_sve_reinterpret_u16_f64_x3(__VA_ARGS__) +#define svreinterpret_s32_s8_x3(...) __builtin_sve_reinterpret_s32_s8_x3(__VA_ARGS__) +#define svreinterpret_s32_u8_x3(...) __builtin_sve_reinterpret_s32_u8_x3(__VA_ARGS__) +#define svreinterpret_s32_s16_x3(...) __builtin_sve_reinterpret_s32_s16_x3(__VA_ARGS__) +#define svreinterpret_s32_u16_x3(...) __builtin_sve_reinterpret_s32_u16_x3(__VA_ARGS__) +#define svreinterpret_s32_s32_x3(...) __builtin_sve_reinterpret_s32_s32_x3(__VA_ARGS__) +#define svreinterpret_s32_u32_x3(...) __builtin_sve_reinterpret_s32_u32_x3(__VA_ARGS__) +#define svreinterpret_s32_s64_x3(...) __builtin_sve_reinterpret_s32_s64_x3(__VA_ARGS__) +#define svreinterpret_s32_u64_x3(...) __builtin_sve_reinterpret_s32_u64_x3(__VA_ARGS__) +#define svreinterpret_s32_f16_x3(...) __builtin_sve_reinterpret_s32_f16_x3(__VA_ARGS__) +#define svreinterpret_s32_bf16_x3(...) __builtin_sve_reinterpret_s32_bf16_x3(__VA_ARGS__) +#define svreinterpret_s32_f32_x3(...) __builtin_sve_reinterpret_s32_f32_x3(__VA_ARGS__) +#define svreinterpret_s32_f64_x3(...) __builtin_sve_reinterpret_s32_f64_x3(__VA_ARGS__) +#define svreinterpret_u32_s8_x3(...) __builtin_sve_reinterpret_u32_s8_x3(__VA_ARGS__) +#define svreinterpret_u32_u8_x3(...) __builtin_sve_reinterpret_u32_u8_x3(__VA_ARGS__) +#define svreinterpret_u32_s16_x3(...) __builtin_sve_reinterpret_u32_s16_x3(__VA_ARGS__) +#define svreinterpret_u32_u16_x3(...) __builtin_sve_reinterpret_u32_u16_x3(__VA_ARGS__) +#define svreinterpret_u32_s32_x3(...) __builtin_sve_reinterpret_u32_s32_x3(__VA_ARGS__) +#define svreinterpret_u32_u32_x3(...) __builtin_sve_reinterpret_u32_u32_x3(__VA_ARGS__) +#define svreinterpret_u32_s64_x3(...) __builtin_sve_reinterpret_u32_s64_x3(__VA_ARGS__) +#define svreinterpret_u32_u64_x3(...) __builtin_sve_reinterpret_u32_u64_x3(__VA_ARGS__) +#define svreinterpret_u32_f16_x3(...) __builtin_sve_reinterpret_u32_f16_x3(__VA_ARGS__) +#define svreinterpret_u32_bf16_x3(...) __builtin_sve_reinterpret_u32_bf16_x3(__VA_ARGS__) +#define svreinterpret_u32_f32_x3(...) __builtin_sve_reinterpret_u32_f32_x3(__VA_ARGS__) +#define svreinterpret_u32_f64_x3(...) __builtin_sve_reinterpret_u32_f64_x3(__VA_ARGS__) +#define svreinterpret_s64_s8_x3(...) __builtin_sve_reinterpret_s64_s8_x3(__VA_ARGS__) +#define svreinterpret_s64_u8_x3(...) __builtin_sve_reinterpret_s64_u8_x3(__VA_ARGS__) +#define svreinterpret_s64_s16_x3(...) __builtin_sve_reinterpret_s64_s16_x3(__VA_ARGS__) +#define svreinterpret_s64_u16_x3(...) __builtin_sve_reinterpret_s64_u16_x3(__VA_ARGS__) +#define svreinterpret_s64_s32_x3(...) __builtin_sve_reinterpret_s64_s32_x3(__VA_ARGS__) +#define svreinterpret_s64_u32_x3(...) __builtin_sve_reinterpret_s64_u32_x3(__VA_ARGS__) +#define svreinterpret_s64_s64_x3(...) __builtin_sve_reinterpret_s64_s64_x3(__VA_ARGS__) +#define svreinterpret_s64_u64_x3(...) __builtin_sve_reinterpret_s64_u64_x3(__VA_ARGS__) +#define svreinterpret_s64_f16_x3(...) __builtin_sve_reinterpret_s64_f16_x3(__VA_ARGS__) +#define svreinterpret_s64_bf16_x3(...) __builtin_sve_reinterpret_s64_bf16_x3(__VA_ARGS__) +#define svreinterpret_s64_f32_x3(...) __builtin_sve_reinterpret_s64_f32_x3(__VA_ARGS__) +#define svreinterpret_s64_f64_x3(...) __builtin_sve_reinterpret_s64_f64_x3(__VA_ARGS__) +#define svreinterpret_u64_s8_x3(...) __builtin_sve_reinterpret_u64_s8_x3(__VA_ARGS__) +#define svreinterpret_u64_u8_x3(...) __builtin_sve_reinterpret_u64_u8_x3(__VA_ARGS__) +#define svreinterpret_u64_s16_x3(...) __builtin_sve_reinterpret_u64_s16_x3(__VA_ARGS__) +#define svreinterpret_u64_u16_x3(...) __builtin_sve_reinterpret_u64_u16_x3(__VA_ARGS__) +#define svreinterpret_u64_s32_x3(...) __builtin_sve_reinterpret_u64_s32_x3(__VA_ARGS__) +#define svreinterpret_u64_u32_x3(...) __builtin_sve_reinterpret_u64_u32_x3(__VA_ARGS__) +#define svreinterpret_u64_s64_x3(...) __builtin_sve_reinterpret_u64_s64_x3(__VA_ARGS__) +#define svreinterpret_u64_u64_x3(...) __builtin_sve_reinterpret_u64_u64_x3(__VA_ARGS__) +#define svreinterpret_u64_f16_x3(...) __builtin_sve_reinterpret_u64_f16_x3(__VA_ARGS__) +#define svreinterpret_u64_bf16_x3(...) __builtin_sve_reinterpret_u64_bf16_x3(__VA_ARGS__) +#define svreinterpret_u64_f32_x3(...) __builtin_sve_reinterpret_u64_f32_x3(__VA_ARGS__) +#define svreinterpret_u64_f64_x3(...) __builtin_sve_reinterpret_u64_f64_x3(__VA_ARGS__) +#define svreinterpret_f16_s8_x3(...) __builtin_sve_reinterpret_f16_s8_x3(__VA_ARGS__) +#define svreinterpret_f16_u8_x3(...) __builtin_sve_reinterpret_f16_u8_x3(__VA_ARGS__) +#define svreinterpret_f16_s16_x3(...) __builtin_sve_reinterpret_f16_s16_x3(__VA_ARGS__) +#define svreinterpret_f16_u16_x3(...) __builtin_sve_reinterpret_f16_u16_x3(__VA_ARGS__) +#define svreinterpret_f16_s32_x3(...) __builtin_sve_reinterpret_f16_s32_x3(__VA_ARGS__) +#define svreinterpret_f16_u32_x3(...) __builtin_sve_reinterpret_f16_u32_x3(__VA_ARGS__) +#define svreinterpret_f16_s64_x3(...) __builtin_sve_reinterpret_f16_s64_x3(__VA_ARGS__) +#define svreinterpret_f16_u64_x3(...) __builtin_sve_reinterpret_f16_u64_x3(__VA_ARGS__) +#define svreinterpret_f16_f16_x3(...) __builtin_sve_reinterpret_f16_f16_x3(__VA_ARGS__) +#define svreinterpret_f16_bf16_x3(...) __builtin_sve_reinterpret_f16_bf16_x3(__VA_ARGS__) +#define svreinterpret_f16_f32_x3(...) __builtin_sve_reinterpret_f16_f32_x3(__VA_ARGS__) +#define svreinterpret_f16_f64_x3(...) __builtin_sve_reinterpret_f16_f64_x3(__VA_ARGS__) +#define svreinterpret_bf16_s8_x3(...) __builtin_sve_reinterpret_bf16_s8_x3(__VA_ARGS__) +#define svreinterpret_bf16_u8_x3(...) __builtin_sve_reinterpret_bf16_u8_x3(__VA_ARGS__) +#define svreinterpret_bf16_s16_x3(...) __builtin_sve_reinterpret_bf16_s16_x3(__VA_ARGS__) +#define svreinterpret_bf16_u16_x3(...) __builtin_sve_reinterpret_bf16_u16_x3(__VA_ARGS__) +#define svreinterpret_bf16_s32_x3(...) __builtin_sve_reinterpret_bf16_s32_x3(__VA_ARGS__) +#define svreinterpret_bf16_u32_x3(...) __builtin_sve_reinterpret_bf16_u32_x3(__VA_ARGS__) +#define svreinterpret_bf16_s64_x3(...) __builtin_sve_reinterpret_bf16_s64_x3(__VA_ARGS__) +#define svreinterpret_bf16_u64_x3(...) __builtin_sve_reinterpret_bf16_u64_x3(__VA_ARGS__) +#define svreinterpret_bf16_f16_x3(...) __builtin_sve_reinterpret_bf16_f16_x3(__VA_ARGS__) +#define svreinterpret_bf16_bf16_x3(...) __builtin_sve_reinterpret_bf16_bf16_x3(__VA_ARGS__) +#define svreinterpret_bf16_f32_x3(...) __builtin_sve_reinterpret_bf16_f32_x3(__VA_ARGS__) +#define svreinterpret_bf16_f64_x3(...) __builtin_sve_reinterpret_bf16_f64_x3(__VA_ARGS__) +#define svreinterpret_f32_s8_x3(...) __builtin_sve_reinterpret_f32_s8_x3(__VA_ARGS__) +#define svreinterpret_f32_u8_x3(...) __builtin_sve_reinterpret_f32_u8_x3(__VA_ARGS__) +#define svreinterpret_f32_s16_x3(...) __builtin_sve_reinterpret_f32_s16_x3(__VA_ARGS__) +#define svreinterpret_f32_u16_x3(...) __builtin_sve_reinterpret_f32_u16_x3(__VA_ARGS__) +#define svreinterpret_f32_s32_x3(...) __builtin_sve_reinterpret_f32_s32_x3(__VA_ARGS__) +#define svreinterpret_f32_u32_x3(...) __builtin_sve_reinterpret_f32_u32_x3(__VA_ARGS__) +#define svreinterpret_f32_s64_x3(...) __builtin_sve_reinterpret_f32_s64_x3(__VA_ARGS__) +#define svreinterpret_f32_u64_x3(...) __builtin_sve_reinterpret_f32_u64_x3(__VA_ARGS__) +#define svreinterpret_f32_f16_x3(...) __builtin_sve_reinterpret_f32_f16_x3(__VA_ARGS__) +#define svreinterpret_f32_bf16_x3(...) __builtin_sve_reinterpret_f32_bf16_x3(__VA_ARGS__) +#define svreinterpret_f32_f32_x3(...) __builtin_sve_reinterpret_f32_f32_x3(__VA_ARGS__) +#define svreinterpret_f32_f64_x3(...) __builtin_sve_reinterpret_f32_f64_x3(__VA_ARGS__) +#define svreinterpret_f64_s8_x3(...) __builtin_sve_reinterpret_f64_s8_x3(__VA_ARGS__) +#define svreinterpret_f64_u8_x3(...) __builtin_sve_reinterpret_f64_u8_x3(__VA_ARGS__) +#define svreinterpret_f64_s16_x3(...) __builtin_sve_reinterpret_f64_s16_x3(__VA_ARGS__) +#define svreinterpret_f64_u16_x3(...) __builtin_sve_reinterpret_f64_u16_x3(__VA_ARGS__) +#define svreinterpret_f64_s32_x3(...) __builtin_sve_reinterpret_f64_s32_x3(__VA_ARGS__) +#define svreinterpret_f64_u32_x3(...) __builtin_sve_reinterpret_f64_u32_x3(__VA_ARGS__) +#define svreinterpret_f64_s64_x3(...) __builtin_sve_reinterpret_f64_s64_x3(__VA_ARGS__) +#define svreinterpret_f64_u64_x3(...) __builtin_sve_reinterpret_f64_u64_x3(__VA_ARGS__) +#define svreinterpret_f64_f16_x3(...) __builtin_sve_reinterpret_f64_f16_x3(__VA_ARGS__) +#define svreinterpret_f64_bf16_x3(...) __builtin_sve_reinterpret_f64_bf16_x3(__VA_ARGS__) +#define svreinterpret_f64_f32_x3(...) __builtin_sve_reinterpret_f64_f32_x3(__VA_ARGS__) +#define svreinterpret_f64_f64_x3(...) __builtin_sve_reinterpret_f64_f64_x3(__VA_ARGS__) +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svint8x3_t svreinterpret_s8(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint8x3_t svreinterpret_u8(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svint16x3_t svreinterpret_s16(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint16x3_t svreinterpret_u16(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svint32x3_t svreinterpret_s32(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint32x3_t svreinterpret_u32(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svint64x3_t svreinterpret_s64(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svuint64x3_t svreinterpret_u64(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat16x3_t svreinterpret_f16(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x3_t svreinterpret_bf16(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat32x3_t svreinterpret_f32(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s8_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint8x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u8_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svuint64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u64_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svbfloat16x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_bf16_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat32x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f32_x3(op); +} + +__aio __attribute__((target("sve"))) svfloat64x3_t svreinterpret_f64(svfloat64x3_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f64_x3(op); +} + +#define svreinterpret_s8_s8_x4(...) __builtin_sve_reinterpret_s8_s8_x4(__VA_ARGS__) +#define svreinterpret_s8_u8_x4(...) __builtin_sve_reinterpret_s8_u8_x4(__VA_ARGS__) +#define svreinterpret_s8_s16_x4(...) __builtin_sve_reinterpret_s8_s16_x4(__VA_ARGS__) +#define svreinterpret_s8_u16_x4(...) __builtin_sve_reinterpret_s8_u16_x4(__VA_ARGS__) +#define svreinterpret_s8_s32_x4(...) __builtin_sve_reinterpret_s8_s32_x4(__VA_ARGS__) +#define svreinterpret_s8_u32_x4(...) __builtin_sve_reinterpret_s8_u32_x4(__VA_ARGS__) +#define svreinterpret_s8_s64_x4(...) __builtin_sve_reinterpret_s8_s64_x4(__VA_ARGS__) +#define svreinterpret_s8_u64_x4(...) __builtin_sve_reinterpret_s8_u64_x4(__VA_ARGS__) +#define svreinterpret_s8_f16_x4(...) __builtin_sve_reinterpret_s8_f16_x4(__VA_ARGS__) +#define svreinterpret_s8_bf16_x4(...) __builtin_sve_reinterpret_s8_bf16_x4(__VA_ARGS__) +#define svreinterpret_s8_f32_x4(...) __builtin_sve_reinterpret_s8_f32_x4(__VA_ARGS__) +#define svreinterpret_s8_f64_x4(...) __builtin_sve_reinterpret_s8_f64_x4(__VA_ARGS__) +#define svreinterpret_u8_s8_x4(...) __builtin_sve_reinterpret_u8_s8_x4(__VA_ARGS__) +#define svreinterpret_u8_u8_x4(...) __builtin_sve_reinterpret_u8_u8_x4(__VA_ARGS__) +#define svreinterpret_u8_s16_x4(...) __builtin_sve_reinterpret_u8_s16_x4(__VA_ARGS__) +#define svreinterpret_u8_u16_x4(...) __builtin_sve_reinterpret_u8_u16_x4(__VA_ARGS__) +#define svreinterpret_u8_s32_x4(...) __builtin_sve_reinterpret_u8_s32_x4(__VA_ARGS__) +#define svreinterpret_u8_u32_x4(...) __builtin_sve_reinterpret_u8_u32_x4(__VA_ARGS__) +#define svreinterpret_u8_s64_x4(...) __builtin_sve_reinterpret_u8_s64_x4(__VA_ARGS__) +#define svreinterpret_u8_u64_x4(...) __builtin_sve_reinterpret_u8_u64_x4(__VA_ARGS__) +#define svreinterpret_u8_f16_x4(...) __builtin_sve_reinterpret_u8_f16_x4(__VA_ARGS__) +#define svreinterpret_u8_bf16_x4(...) __builtin_sve_reinterpret_u8_bf16_x4(__VA_ARGS__) +#define svreinterpret_u8_f32_x4(...) __builtin_sve_reinterpret_u8_f32_x4(__VA_ARGS__) +#define svreinterpret_u8_f64_x4(...) __builtin_sve_reinterpret_u8_f64_x4(__VA_ARGS__) +#define svreinterpret_s16_s8_x4(...) __builtin_sve_reinterpret_s16_s8_x4(__VA_ARGS__) +#define svreinterpret_s16_u8_x4(...) __builtin_sve_reinterpret_s16_u8_x4(__VA_ARGS__) +#define svreinterpret_s16_s16_x4(...) __builtin_sve_reinterpret_s16_s16_x4(__VA_ARGS__) +#define svreinterpret_s16_u16_x4(...) __builtin_sve_reinterpret_s16_u16_x4(__VA_ARGS__) +#define svreinterpret_s16_s32_x4(...) __builtin_sve_reinterpret_s16_s32_x4(__VA_ARGS__) +#define svreinterpret_s16_u32_x4(...) __builtin_sve_reinterpret_s16_u32_x4(__VA_ARGS__) +#define svreinterpret_s16_s64_x4(...) __builtin_sve_reinterpret_s16_s64_x4(__VA_ARGS__) +#define svreinterpret_s16_u64_x4(...) __builtin_sve_reinterpret_s16_u64_x4(__VA_ARGS__) +#define svreinterpret_s16_f16_x4(...) __builtin_sve_reinterpret_s16_f16_x4(__VA_ARGS__) +#define svreinterpret_s16_bf16_x4(...) __builtin_sve_reinterpret_s16_bf16_x4(__VA_ARGS__) +#define svreinterpret_s16_f32_x4(...) __builtin_sve_reinterpret_s16_f32_x4(__VA_ARGS__) +#define svreinterpret_s16_f64_x4(...) __builtin_sve_reinterpret_s16_f64_x4(__VA_ARGS__) +#define svreinterpret_u16_s8_x4(...) __builtin_sve_reinterpret_u16_s8_x4(__VA_ARGS__) +#define svreinterpret_u16_u8_x4(...) __builtin_sve_reinterpret_u16_u8_x4(__VA_ARGS__) +#define svreinterpret_u16_s16_x4(...) __builtin_sve_reinterpret_u16_s16_x4(__VA_ARGS__) +#define svreinterpret_u16_u16_x4(...) __builtin_sve_reinterpret_u16_u16_x4(__VA_ARGS__) +#define svreinterpret_u16_s32_x4(...) __builtin_sve_reinterpret_u16_s32_x4(__VA_ARGS__) +#define svreinterpret_u16_u32_x4(...) __builtin_sve_reinterpret_u16_u32_x4(__VA_ARGS__) +#define svreinterpret_u16_s64_x4(...) __builtin_sve_reinterpret_u16_s64_x4(__VA_ARGS__) +#define svreinterpret_u16_u64_x4(...) __builtin_sve_reinterpret_u16_u64_x4(__VA_ARGS__) +#define svreinterpret_u16_f16_x4(...) __builtin_sve_reinterpret_u16_f16_x4(__VA_ARGS__) +#define svreinterpret_u16_bf16_x4(...) __builtin_sve_reinterpret_u16_bf16_x4(__VA_ARGS__) +#define svreinterpret_u16_f32_x4(...) __builtin_sve_reinterpret_u16_f32_x4(__VA_ARGS__) +#define svreinterpret_u16_f64_x4(...) __builtin_sve_reinterpret_u16_f64_x4(__VA_ARGS__) +#define svreinterpret_s32_s8_x4(...) __builtin_sve_reinterpret_s32_s8_x4(__VA_ARGS__) +#define svreinterpret_s32_u8_x4(...) __builtin_sve_reinterpret_s32_u8_x4(__VA_ARGS__) +#define svreinterpret_s32_s16_x4(...) __builtin_sve_reinterpret_s32_s16_x4(__VA_ARGS__) +#define svreinterpret_s32_u16_x4(...) __builtin_sve_reinterpret_s32_u16_x4(__VA_ARGS__) +#define svreinterpret_s32_s32_x4(...) __builtin_sve_reinterpret_s32_s32_x4(__VA_ARGS__) +#define svreinterpret_s32_u32_x4(...) __builtin_sve_reinterpret_s32_u32_x4(__VA_ARGS__) +#define svreinterpret_s32_s64_x4(...) __builtin_sve_reinterpret_s32_s64_x4(__VA_ARGS__) +#define svreinterpret_s32_u64_x4(...) __builtin_sve_reinterpret_s32_u64_x4(__VA_ARGS__) +#define svreinterpret_s32_f16_x4(...) __builtin_sve_reinterpret_s32_f16_x4(__VA_ARGS__) +#define svreinterpret_s32_bf16_x4(...) __builtin_sve_reinterpret_s32_bf16_x4(__VA_ARGS__) +#define svreinterpret_s32_f32_x4(...) __builtin_sve_reinterpret_s32_f32_x4(__VA_ARGS__) +#define svreinterpret_s32_f64_x4(...) __builtin_sve_reinterpret_s32_f64_x4(__VA_ARGS__) +#define svreinterpret_u32_s8_x4(...) __builtin_sve_reinterpret_u32_s8_x4(__VA_ARGS__) +#define svreinterpret_u32_u8_x4(...) __builtin_sve_reinterpret_u32_u8_x4(__VA_ARGS__) +#define svreinterpret_u32_s16_x4(...) __builtin_sve_reinterpret_u32_s16_x4(__VA_ARGS__) +#define svreinterpret_u32_u16_x4(...) __builtin_sve_reinterpret_u32_u16_x4(__VA_ARGS__) +#define svreinterpret_u32_s32_x4(...) __builtin_sve_reinterpret_u32_s32_x4(__VA_ARGS__) +#define svreinterpret_u32_u32_x4(...) __builtin_sve_reinterpret_u32_u32_x4(__VA_ARGS__) +#define svreinterpret_u32_s64_x4(...) __builtin_sve_reinterpret_u32_s64_x4(__VA_ARGS__) +#define svreinterpret_u32_u64_x4(...) __builtin_sve_reinterpret_u32_u64_x4(__VA_ARGS__) +#define svreinterpret_u32_f16_x4(...) __builtin_sve_reinterpret_u32_f16_x4(__VA_ARGS__) +#define svreinterpret_u32_bf16_x4(...) __builtin_sve_reinterpret_u32_bf16_x4(__VA_ARGS__) +#define svreinterpret_u32_f32_x4(...) __builtin_sve_reinterpret_u32_f32_x4(__VA_ARGS__) +#define svreinterpret_u32_f64_x4(...) __builtin_sve_reinterpret_u32_f64_x4(__VA_ARGS__) +#define svreinterpret_s64_s8_x4(...) __builtin_sve_reinterpret_s64_s8_x4(__VA_ARGS__) +#define svreinterpret_s64_u8_x4(...) __builtin_sve_reinterpret_s64_u8_x4(__VA_ARGS__) +#define svreinterpret_s64_s16_x4(...) __builtin_sve_reinterpret_s64_s16_x4(__VA_ARGS__) +#define svreinterpret_s64_u16_x4(...) __builtin_sve_reinterpret_s64_u16_x4(__VA_ARGS__) +#define svreinterpret_s64_s32_x4(...) __builtin_sve_reinterpret_s64_s32_x4(__VA_ARGS__) +#define svreinterpret_s64_u32_x4(...) __builtin_sve_reinterpret_s64_u32_x4(__VA_ARGS__) +#define svreinterpret_s64_s64_x4(...) __builtin_sve_reinterpret_s64_s64_x4(__VA_ARGS__) +#define svreinterpret_s64_u64_x4(...) __builtin_sve_reinterpret_s64_u64_x4(__VA_ARGS__) +#define svreinterpret_s64_f16_x4(...) __builtin_sve_reinterpret_s64_f16_x4(__VA_ARGS__) +#define svreinterpret_s64_bf16_x4(...) __builtin_sve_reinterpret_s64_bf16_x4(__VA_ARGS__) +#define svreinterpret_s64_f32_x4(...) __builtin_sve_reinterpret_s64_f32_x4(__VA_ARGS__) +#define svreinterpret_s64_f64_x4(...) __builtin_sve_reinterpret_s64_f64_x4(__VA_ARGS__) +#define svreinterpret_u64_s8_x4(...) __builtin_sve_reinterpret_u64_s8_x4(__VA_ARGS__) +#define svreinterpret_u64_u8_x4(...) __builtin_sve_reinterpret_u64_u8_x4(__VA_ARGS__) +#define svreinterpret_u64_s16_x4(...) __builtin_sve_reinterpret_u64_s16_x4(__VA_ARGS__) +#define svreinterpret_u64_u16_x4(...) __builtin_sve_reinterpret_u64_u16_x4(__VA_ARGS__) +#define svreinterpret_u64_s32_x4(...) __builtin_sve_reinterpret_u64_s32_x4(__VA_ARGS__) +#define svreinterpret_u64_u32_x4(...) __builtin_sve_reinterpret_u64_u32_x4(__VA_ARGS__) +#define svreinterpret_u64_s64_x4(...) __builtin_sve_reinterpret_u64_s64_x4(__VA_ARGS__) +#define svreinterpret_u64_u64_x4(...) __builtin_sve_reinterpret_u64_u64_x4(__VA_ARGS__) +#define svreinterpret_u64_f16_x4(...) __builtin_sve_reinterpret_u64_f16_x4(__VA_ARGS__) +#define svreinterpret_u64_bf16_x4(...) __builtin_sve_reinterpret_u64_bf16_x4(__VA_ARGS__) +#define svreinterpret_u64_f32_x4(...) __builtin_sve_reinterpret_u64_f32_x4(__VA_ARGS__) +#define svreinterpret_u64_f64_x4(...) __builtin_sve_reinterpret_u64_f64_x4(__VA_ARGS__) +#define svreinterpret_f16_s8_x4(...) __builtin_sve_reinterpret_f16_s8_x4(__VA_ARGS__) +#define svreinterpret_f16_u8_x4(...) __builtin_sve_reinterpret_f16_u8_x4(__VA_ARGS__) +#define svreinterpret_f16_s16_x4(...) __builtin_sve_reinterpret_f16_s16_x4(__VA_ARGS__) +#define svreinterpret_f16_u16_x4(...) __builtin_sve_reinterpret_f16_u16_x4(__VA_ARGS__) +#define svreinterpret_f16_s32_x4(...) __builtin_sve_reinterpret_f16_s32_x4(__VA_ARGS__) +#define svreinterpret_f16_u32_x4(...) __builtin_sve_reinterpret_f16_u32_x4(__VA_ARGS__) +#define svreinterpret_f16_s64_x4(...) __builtin_sve_reinterpret_f16_s64_x4(__VA_ARGS__) +#define svreinterpret_f16_u64_x4(...) __builtin_sve_reinterpret_f16_u64_x4(__VA_ARGS__) +#define svreinterpret_f16_f16_x4(...) __builtin_sve_reinterpret_f16_f16_x4(__VA_ARGS__) +#define svreinterpret_f16_bf16_x4(...) __builtin_sve_reinterpret_f16_bf16_x4(__VA_ARGS__) +#define svreinterpret_f16_f32_x4(...) __builtin_sve_reinterpret_f16_f32_x4(__VA_ARGS__) +#define svreinterpret_f16_f64_x4(...) __builtin_sve_reinterpret_f16_f64_x4(__VA_ARGS__) +#define svreinterpret_bf16_s8_x4(...) __builtin_sve_reinterpret_bf16_s8_x4(__VA_ARGS__) +#define svreinterpret_bf16_u8_x4(...) __builtin_sve_reinterpret_bf16_u8_x4(__VA_ARGS__) +#define svreinterpret_bf16_s16_x4(...) __builtin_sve_reinterpret_bf16_s16_x4(__VA_ARGS__) +#define svreinterpret_bf16_u16_x4(...) __builtin_sve_reinterpret_bf16_u16_x4(__VA_ARGS__) +#define svreinterpret_bf16_s32_x4(...) __builtin_sve_reinterpret_bf16_s32_x4(__VA_ARGS__) +#define svreinterpret_bf16_u32_x4(...) __builtin_sve_reinterpret_bf16_u32_x4(__VA_ARGS__) +#define svreinterpret_bf16_s64_x4(...) __builtin_sve_reinterpret_bf16_s64_x4(__VA_ARGS__) +#define svreinterpret_bf16_u64_x4(...) __builtin_sve_reinterpret_bf16_u64_x4(__VA_ARGS__) +#define svreinterpret_bf16_f16_x4(...) __builtin_sve_reinterpret_bf16_f16_x4(__VA_ARGS__) +#define svreinterpret_bf16_bf16_x4(...) __builtin_sve_reinterpret_bf16_bf16_x4(__VA_ARGS__) +#define svreinterpret_bf16_f32_x4(...) __builtin_sve_reinterpret_bf16_f32_x4(__VA_ARGS__) +#define svreinterpret_bf16_f64_x4(...) __builtin_sve_reinterpret_bf16_f64_x4(__VA_ARGS__) +#define svreinterpret_f32_s8_x4(...) __builtin_sve_reinterpret_f32_s8_x4(__VA_ARGS__) +#define svreinterpret_f32_u8_x4(...) __builtin_sve_reinterpret_f32_u8_x4(__VA_ARGS__) +#define svreinterpret_f32_s16_x4(...) __builtin_sve_reinterpret_f32_s16_x4(__VA_ARGS__) +#define svreinterpret_f32_u16_x4(...) __builtin_sve_reinterpret_f32_u16_x4(__VA_ARGS__) +#define svreinterpret_f32_s32_x4(...) __builtin_sve_reinterpret_f32_s32_x4(__VA_ARGS__) +#define svreinterpret_f32_u32_x4(...) __builtin_sve_reinterpret_f32_u32_x4(__VA_ARGS__) +#define svreinterpret_f32_s64_x4(...) __builtin_sve_reinterpret_f32_s64_x4(__VA_ARGS__) +#define svreinterpret_f32_u64_x4(...) __builtin_sve_reinterpret_f32_u64_x4(__VA_ARGS__) +#define svreinterpret_f32_f16_x4(...) __builtin_sve_reinterpret_f32_f16_x4(__VA_ARGS__) +#define svreinterpret_f32_bf16_x4(...) __builtin_sve_reinterpret_f32_bf16_x4(__VA_ARGS__) +#define svreinterpret_f32_f32_x4(...) __builtin_sve_reinterpret_f32_f32_x4(__VA_ARGS__) +#define svreinterpret_f32_f64_x4(...) __builtin_sve_reinterpret_f32_f64_x4(__VA_ARGS__) +#define svreinterpret_f64_s8_x4(...) __builtin_sve_reinterpret_f64_s8_x4(__VA_ARGS__) +#define svreinterpret_f64_u8_x4(...) __builtin_sve_reinterpret_f64_u8_x4(__VA_ARGS__) +#define svreinterpret_f64_s16_x4(...) __builtin_sve_reinterpret_f64_s16_x4(__VA_ARGS__) +#define svreinterpret_f64_u16_x4(...) __builtin_sve_reinterpret_f64_u16_x4(__VA_ARGS__) +#define svreinterpret_f64_s32_x4(...) __builtin_sve_reinterpret_f64_s32_x4(__VA_ARGS__) +#define svreinterpret_f64_u32_x4(...) __builtin_sve_reinterpret_f64_u32_x4(__VA_ARGS__) +#define svreinterpret_f64_s64_x4(...) __builtin_sve_reinterpret_f64_s64_x4(__VA_ARGS__) +#define svreinterpret_f64_u64_x4(...) __builtin_sve_reinterpret_f64_u64_x4(__VA_ARGS__) +#define svreinterpret_f64_f16_x4(...) __builtin_sve_reinterpret_f64_f16_x4(__VA_ARGS__) +#define svreinterpret_f64_bf16_x4(...) __builtin_sve_reinterpret_f64_bf16_x4(__VA_ARGS__) +#define svreinterpret_f64_f32_x4(...) __builtin_sve_reinterpret_f64_f32_x4(__VA_ARGS__) +#define svreinterpret_f64_f64_x4(...) __builtin_sve_reinterpret_f64_f64_x4(__VA_ARGS__) +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svint8x4_t svreinterpret_s8(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s8_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint8x4_t svreinterpret_u8(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u8_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svint16x4_t svreinterpret_s16(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s16_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint16x4_t svreinterpret_u16(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u16_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svint32x4_t svreinterpret_s32(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s32_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint32x4_t svreinterpret_u32(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u32_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svint64x4_t svreinterpret_s64(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_s64_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svuint64x4_t svreinterpret_u64(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_u64_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat16x4_t svreinterpret_f16(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f16_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svbfloat16x4_t svreinterpret_bf16(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_bf16_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat32x4_t svreinterpret_f32(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f32_f64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s8_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint8x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u8_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_s64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svuint64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_u64_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svbfloat16x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_bf16_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat32x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f32_x4(op); +} + +__aio __attribute__((target("sve"))) svfloat64x4_t svreinterpret_f64(svfloat64x4_t op) __arm_streaming_compatible { + return __builtin_sve_reinterpret_f64_f64_x4(op); +} + +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) +svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) +svbfloat16_t svadd_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) +svbfloat16_t svadd_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) +svbfloat16_t svadd_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) +svbfloat16_t svadd_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) +svbfloat16_t svadd_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) +svbfloat16_t svclamp_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) +svbfloat16_t svmax_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) +svbfloat16_t svmax_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) +svbfloat16_t svmax_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) +svbfloat16_t svmax_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) +svbfloat16_t svmax_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) +svbfloat16_t svmax_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) +svbfloat16_t svmaxnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) +svbfloat16_t svmaxnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) +svbfloat16_t svmaxnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) +svbfloat16_t svmaxnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) +svbfloat16_t svmaxnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) +svbfloat16_t svmaxnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) +svbfloat16_t svmin_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) +svbfloat16_t svmin_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) +svbfloat16_t svmin_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) +svbfloat16_t svmin_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) +svbfloat16_t svmin_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) +svbfloat16_t svmin_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) +svbfloat16_t svminnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) +svbfloat16_t svminnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) +svbfloat16_t svminnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) +svbfloat16_t svminnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) +svbfloat16_t svminnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) +svbfloat16_t svminnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) +svbfloat16_t svmla_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) +svbfloat16_t svmla_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) +svbfloat16_t svmla_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) +svbfloat16_t svmla_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) +svbfloat16_t svmla_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) +svbfloat16_t svmla_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) +svbfloat16_t svmla_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) +svbfloat16_t svmls_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) +svbfloat16_t svmls_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) +svbfloat16_t svmls_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) +svbfloat16_t svmls_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) +svbfloat16_t svmls_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) +svbfloat16_t svmls_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) +svbfloat16_t svmls_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) +svbfloat16_t svmul_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) +svbfloat16_t svmul_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) +svbfloat16_t svmul_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) +svbfloat16_t svmul_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) +svbfloat16_t svmul_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) +svbfloat16_t svmul_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) +svbfloat16_t svmul_lane_bf16(svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) +svbfloat16_t svsub_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) +svbfloat16_t svsub_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) +svbfloat16_t svsub_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) +svbfloat16_t svsub_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) +svbfloat16_t svsub_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) +svbfloat16_t svsub_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) +svbfloat16_t svadd_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) +svbfloat16_t svadd_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) +svbfloat16_t svadd_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) +svbfloat16_t svadd_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) +svbfloat16_t svadd_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) +svbfloat16_t svadd_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) +svbfloat16_t svclamp(svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) +svbfloat16_t svmax_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) +svbfloat16_t svmax_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) +svbfloat16_t svmax_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) +svbfloat16_t svmax_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) +svbfloat16_t svmax_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) +svbfloat16_t svmax_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) +svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) +svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) +svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) +svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) +svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) +svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) +svbfloat16_t svmin_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) +svbfloat16_t svmin_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) +svbfloat16_t svmin_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) +svbfloat16_t svmin_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) +svbfloat16_t svmin_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) +svbfloat16_t svmin_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) +svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) +svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) +svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) +svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) +svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) +svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) +svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) +svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) +svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) +svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) +svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) +svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) +svbfloat16_t svmla_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) +svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) +svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) +svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) +svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) +svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) +svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) +svbfloat16_t svmls_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) +svbfloat16_t svmul_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) +svbfloat16_t svmul_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) +svbfloat16_t svmul_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) +svbfloat16_t svmul_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) +svbfloat16_t svmul_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) +svbfloat16_t svmul_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) +svbfloat16_t svmul_lane(svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) +svbfloat16_t svsub_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) +svbfloat16_t svsub_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) +svbfloat16_t svsub_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) +svbfloat16_t svsub_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) +svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) +svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x2))) +svuint8x2_t svadd_single_u8_x2(svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x2))) +svuint32x2_t svadd_single_u32_x2(svuint32x2_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x2))) +svuint64x2_t svadd_single_u64_x2(svuint64x2_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x2))) +svuint16x2_t svadd_single_u16_x2(svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x2))) +svint8x2_t svadd_single_s8_x2(svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x2))) +svint32x2_t svadd_single_s32_x2(svint32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x2))) +svint64x2_t svadd_single_s64_x2(svint64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x2))) +svint16x2_t svadd_single_s16_x2(svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x4))) +svuint8x4_t svadd_single_u8_x4(svuint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x4))) +svuint32x4_t svadd_single_u32_x4(svuint32x4_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x4))) +svuint64x4_t svadd_single_u64_x4(svuint64x4_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x4))) +svuint16x4_t svadd_single_u16_x4(svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x4))) +svint8x4_t svadd_single_s8_x4(svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x4))) +svint32x4_t svadd_single_s32_x4(svint32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x4))) +svint64x4_t svadd_single_s64_x4(svint64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x4))) +svint16x4_t svadd_single_s16_x4(svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x2))) +svfloat64x2_t svclamp_single_f64_x2(svfloat64x2_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x2))) +svfloat32x2_t svclamp_single_f32_x2(svfloat32x2_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x2))) +svfloat16x2_t svclamp_single_f16_x2(svfloat16x2_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x2))) +svint8x2_t svclamp_single_s8_x2(svint8x2_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x2))) +svint32x2_t svclamp_single_s32_x2(svint32x2_t, svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x2))) +svint64x2_t svclamp_single_s64_x2(svint64x2_t, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x2))) +svint16x2_t svclamp_single_s16_x2(svint16x2_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x2))) +svuint8x2_t svclamp_single_u8_x2(svuint8x2_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x2))) +svuint32x2_t svclamp_single_u32_x2(svuint32x2_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x2))) +svuint64x2_t svclamp_single_u64_x2(svuint64x2_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x2))) +svuint16x2_t svclamp_single_u16_x2(svuint16x2_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x4))) +svfloat64x4_t svclamp_single_f64_x4(svfloat64x4_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x4))) +svfloat32x4_t svclamp_single_f32_x4(svfloat32x4_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x4))) +svfloat16x4_t svclamp_single_f16_x4(svfloat16x4_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x4))) +svint8x4_t svclamp_single_s8_x4(svint8x4_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x4))) +svint32x4_t svclamp_single_s32_x4(svint32x4_t, svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x4))) +svint64x4_t svclamp_single_s64_x4(svint64x4_t, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x4))) +svint16x4_t svclamp_single_s16_x4(svint16x4_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x4))) +svuint8x4_t svclamp_single_u8_x4(svuint8x4_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x4))) +svuint32x4_t svclamp_single_u32_x4(svuint32x4_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x4))) +svuint64x4_t svclamp_single_u64_x4(svuint64x4_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x4))) +svuint16x4_t svclamp_single_u16_x4(svuint16x4_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x2))) +svbfloat16_t svcvt_bf16_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x2))) +svfloat16_t svcvt_f16_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x2))) +svint32x2_t svcvt_s32_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x2))) +svuint32x2_t svcvt_u32_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x4))) +svint32x4_t svcvt_s32_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x4))) +svuint32x4_t svcvt_u32_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x2))) +svfloat32x2_t svcvt_f32_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x4))) +svfloat32x4_t svcvt_f32_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x2))) +svfloat32x2_t svcvt_f32_u32_x2(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x4))) +svfloat32x4_t svcvt_f32_u32_x4(svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_bf16_f32_x2))) +svbfloat16_t svcvtn_bf16_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_f16_f32_x2))) +svfloat16_t svcvtn_f16_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x2))) +svfloat64x2_t svmax_single_f64_x2(svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x2))) +svfloat32x2_t svmax_single_f32_x2(svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x2))) +svfloat16x2_t svmax_single_f16_x2(svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x2))) +svint8x2_t svmax_single_s8_x2(svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x2))) +svint32x2_t svmax_single_s32_x2(svint32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x2))) +svint64x2_t svmax_single_s64_x2(svint64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x2))) +svint16x2_t svmax_single_s16_x2(svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x2))) +svuint8x2_t svmax_single_u8_x2(svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x2))) +svuint32x2_t svmax_single_u32_x2(svuint32x2_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x2))) +svuint64x2_t svmax_single_u64_x2(svuint64x2_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x2))) +svuint16x2_t svmax_single_u16_x2(svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x4))) +svfloat64x4_t svmax_single_f64_x4(svfloat64x4_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x4))) +svfloat32x4_t svmax_single_f32_x4(svfloat32x4_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x4))) +svfloat16x4_t svmax_single_f16_x4(svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x4))) +svint8x4_t svmax_single_s8_x4(svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x4))) +svint32x4_t svmax_single_s32_x4(svint32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x4))) +svint64x4_t svmax_single_s64_x4(svint64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x4))) +svint16x4_t svmax_single_s16_x4(svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x4))) +svuint8x4_t svmax_single_u8_x4(svuint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x4))) +svuint32x4_t svmax_single_u32_x4(svuint32x4_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x4))) +svuint64x4_t svmax_single_u64_x4(svuint64x4_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x4))) +svuint16x4_t svmax_single_u16_x4(svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x2))) +svfloat64x2_t svmax_f64_x2(svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x2))) +svfloat32x2_t svmax_f32_x2(svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x2))) +svfloat16x2_t svmax_f16_x2(svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x2))) +svint8x2_t svmax_s8_x2(svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x2))) +svint32x2_t svmax_s32_x2(svint32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x2))) +svint64x2_t svmax_s64_x2(svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x2))) +svint16x2_t svmax_s16_x2(svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x2))) +svuint8x2_t svmax_u8_x2(svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x2))) +svuint32x2_t svmax_u32_x2(svuint32x2_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x2))) +svuint64x2_t svmax_u64_x2(svuint64x2_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x2))) +svuint16x2_t svmax_u16_x2(svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x4))) +svfloat64x4_t svmax_f64_x4(svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x4))) +svfloat32x4_t svmax_f32_x4(svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x4))) +svfloat16x4_t svmax_f16_x4(svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x4))) +svint8x4_t svmax_s8_x4(svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x4))) +svint32x4_t svmax_s32_x4(svint32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x4))) +svint64x4_t svmax_s64_x4(svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x4))) +svint16x4_t svmax_s16_x4(svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x4))) +svuint8x4_t svmax_u8_x4(svuint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x4))) +svuint32x4_t svmax_u32_x4(svuint32x4_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x4))) +svuint64x4_t svmax_u64_x4(svuint64x4_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x4))) +svuint16x4_t svmax_u16_x4(svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x2))) +svfloat64x2_t svmaxnm_single_f64_x2(svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x2))) +svfloat32x2_t svmaxnm_single_f32_x2(svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x2))) +svfloat16x2_t svmaxnm_single_f16_x2(svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x4))) +svfloat64x4_t svmaxnm_single_f64_x4(svfloat64x4_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x4))) +svfloat32x4_t svmaxnm_single_f32_x4(svfloat32x4_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x4))) +svfloat16x4_t svmaxnm_single_f16_x4(svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x2))) +svfloat64x2_t svmaxnm_f64_x2(svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x2))) +svfloat32x2_t svmaxnm_f32_x2(svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x2))) +svfloat16x2_t svmaxnm_f16_x2(svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x4))) +svfloat64x4_t svmaxnm_f64_x4(svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x4))) +svfloat32x4_t svmaxnm_f32_x4(svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x4))) +svfloat16x4_t svmaxnm_f16_x4(svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x2))) +svfloat64x2_t svmin_single_f64_x2(svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x2))) +svfloat32x2_t svmin_single_f32_x2(svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x2))) +svfloat16x2_t svmin_single_f16_x2(svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x2))) +svint8x2_t svmin_single_s8_x2(svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x2))) +svint32x2_t svmin_single_s32_x2(svint32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x2))) +svint64x2_t svmin_single_s64_x2(svint64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x2))) +svint16x2_t svmin_single_s16_x2(svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x2))) +svuint8x2_t svmin_single_u8_x2(svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x2))) +svuint32x2_t svmin_single_u32_x2(svuint32x2_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x2))) +svuint64x2_t svmin_single_u64_x2(svuint64x2_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x2))) +svuint16x2_t svmin_single_u16_x2(svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x4))) +svfloat64x4_t svmin_single_f64_x4(svfloat64x4_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x4))) +svfloat32x4_t svmin_single_f32_x4(svfloat32x4_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x4))) +svfloat16x4_t svmin_single_f16_x4(svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x4))) +svint8x4_t svmin_single_s8_x4(svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x4))) +svint32x4_t svmin_single_s32_x4(svint32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x4))) +svint64x4_t svmin_single_s64_x4(svint64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x4))) +svint16x4_t svmin_single_s16_x4(svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x4))) +svuint8x4_t svmin_single_u8_x4(svuint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x4))) +svuint32x4_t svmin_single_u32_x4(svuint32x4_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x4))) +svuint64x4_t svmin_single_u64_x4(svuint64x4_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x4))) +svuint16x4_t svmin_single_u16_x4(svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x2))) +svfloat64x2_t svmin_f64_x2(svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x2))) +svfloat32x2_t svmin_f32_x2(svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x2))) +svfloat16x2_t svmin_f16_x2(svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x2))) +svint8x2_t svmin_s8_x2(svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x2))) +svint32x2_t svmin_s32_x2(svint32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x2))) +svint64x2_t svmin_s64_x2(svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x2))) +svint16x2_t svmin_s16_x2(svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x2))) +svuint8x2_t svmin_u8_x2(svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x2))) +svuint32x2_t svmin_u32_x2(svuint32x2_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x2))) +svuint64x2_t svmin_u64_x2(svuint64x2_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x2))) +svuint16x2_t svmin_u16_x2(svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x4))) +svfloat64x4_t svmin_f64_x4(svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x4))) +svfloat32x4_t svmin_f32_x4(svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x4))) +svfloat16x4_t svmin_f16_x4(svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x4))) +svint8x4_t svmin_s8_x4(svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x4))) +svint32x4_t svmin_s32_x4(svint32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x4))) +svint64x4_t svmin_s64_x4(svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x4))) +svint16x4_t svmin_s16_x4(svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x4))) +svuint8x4_t svmin_u8_x4(svuint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x4))) +svuint32x4_t svmin_u32_x4(svuint32x4_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x4))) +svuint64x4_t svmin_u64_x4(svuint64x4_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x4))) +svuint16x4_t svmin_u16_x4(svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x2))) +svfloat64x2_t svminnm_single_f64_x2(svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x2))) +svfloat32x2_t svminnm_single_f32_x2(svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x2))) +svfloat16x2_t svminnm_single_f16_x2(svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x4))) +svfloat64x4_t svminnm_single_f64_x4(svfloat64x4_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x4))) +svfloat32x4_t svminnm_single_f32_x4(svfloat32x4_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x4))) +svfloat16x4_t svminnm_single_f16_x4(svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x2))) +svfloat64x2_t svminnm_f64_x2(svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x2))) +svfloat32x2_t svminnm_f32_x2(svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x2))) +svfloat16x2_t svminnm_f16_x2(svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x4))) +svfloat64x4_t svminnm_f64_x4(svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x4))) +svfloat32x4_t svminnm_f32_x4(svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x4))) +svfloat16x4_t svminnm_f16_x4(svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s32_x2))) +svint16_t svqcvt_s16_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s64_x4))) +svint16_t svqcvt_s16_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s8_s32_x4))) +svint8_t svqcvt_s8_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s32_x2))) +svuint16_t svqcvt_u16_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u32_x2))) +svuint16_t svqcvt_u16_u32_x2(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s64_x4))) +svuint16_t svqcvt_u16_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u64_x4))) +svuint16_t svqcvt_u16_u64_x4(svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_s32_x4))) +svuint8_t svqcvt_u8_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_u32_x4))) +svuint8_t svqcvt_u8_u32_x4(svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s64_x4))) +svint16_t svqcvtn_s16_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s8_s32_x4))) +svint8_t svqcvtn_s8_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s64_x4))) +svuint16_t svqcvtn_u16_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u64_x4))) +svuint16_t svqcvtn_u16_u64_x4(svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_s32_x4))) +svuint8_t svqcvtn_u8_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_u32_x4))) +svuint8_t svqcvtn_u8_u32_x4(svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x2))) +svint8x2_t svqdmulh_single_s8_x2(svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x2))) +svint32x2_t svqdmulh_single_s32_x2(svint32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x2))) +svint64x2_t svqdmulh_single_s64_x2(svint64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x2))) +svint16x2_t svqdmulh_single_s16_x2(svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x4))) +svint8x4_t svqdmulh_single_s8_x4(svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x4))) +svint32x4_t svqdmulh_single_s32_x4(svint32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x4))) +svint64x4_t svqdmulh_single_s64_x4(svint64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x4))) +svint16x4_t svqdmulh_single_s16_x4(svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x2))) +svint8x2_t svqdmulh_s8_x2(svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x2))) +svint32x2_t svqdmulh_s32_x2(svint32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x2))) +svint64x2_t svqdmulh_s64_x2(svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x2))) +svint16x2_t svqdmulh_s16_x2(svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x4))) +svint8x4_t svqdmulh_s8_x4(svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x4))) +svint32x4_t svqdmulh_s32_x4(svint32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x4))) +svint64x4_t svqdmulh_s64_x4(svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x4))) +svint16x4_t svqdmulh_s16_x4(svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s32_x2))) +svint16_t svqrshr_n_s16_s32_x2(svint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u32_x2))) +svuint16_t svqrshr_n_u16_u32_x2(svuint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s8_s32_x4))) +svint8_t svqrshr_n_s8_s32_x4(svint32x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s64_x4))) +svint16_t svqrshr_n_s16_s64_x4(svint64x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u8_u32_x4))) +svuint8_t svqrshr_n_u8_u32_x4(svuint32x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u64_x4))) +svuint16_t svqrshr_n_u16_u64_x4(svuint64x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s8_s32_x4))) +svint8_t svqrshrn_n_s8_s32_x4(svint32x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s64_x4))) +svint16_t svqrshrn_n_s16_s64_x4(svint64x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u8_u32_x4))) +svuint8_t svqrshrn_n_u8_u32_x4(svuint32x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u64_x4))) +svuint16_t svqrshrn_n_u16_u64_x4(svuint64x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s32_x2))) +svuint16_t svqrshru_n_u16_s32_x2(svint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u8_s32_x4))) +svuint8_t svqrshru_n_u8_s32_x4(svint32x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s64_x4))) +svuint16_t svqrshru_n_u16_s64_x4(svint64x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u8_s32_x4))) +svuint8_t svqrshrun_n_u8_s32_x4(svint32x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s64_x4))) +svuint16_t svqrshrun_n_u16_s64_x4(svint64x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) +svbool_t svreinterpret_b(svcount_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) +svcount_t svreinterpret_c(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x2))) +svfloat32x2_t svrinta_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x4))) +svfloat32x4_t svrinta_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x2))) +svfloat32x2_t svrintm_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x4))) +svfloat32x4_t svrintm_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x2))) +svfloat32x2_t svrintn_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x4))) +svfloat32x4_t svrintn_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x2))) +svfloat32x2_t svrintp_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x4))) +svfloat32x4_t svrintp_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x2))) +svint8x2_t svrshl_single_s8_x2(svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x2))) +svint32x2_t svrshl_single_s32_x2(svint32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x2))) +svint64x2_t svrshl_single_s64_x2(svint64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x2))) +svint16x2_t svrshl_single_s16_x2(svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x2))) +svuint8x2_t svrshl_single_u8_x2(svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x2))) +svuint32x2_t svrshl_single_u32_x2(svuint32x2_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x2))) +svuint64x2_t svrshl_single_u64_x2(svuint64x2_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x2))) +svuint16x2_t svrshl_single_u16_x2(svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x4))) +svint8x4_t svrshl_single_s8_x4(svint8x4_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x4))) +svint32x4_t svrshl_single_s32_x4(svint32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x4))) +svint64x4_t svrshl_single_s64_x4(svint64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x4))) +svint16x4_t svrshl_single_s16_x4(svint16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x4))) +svuint8x4_t svrshl_single_u8_x4(svuint8x4_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x4))) +svuint32x4_t svrshl_single_u32_x4(svuint32x4_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x4))) +svuint64x4_t svrshl_single_u64_x4(svuint64x4_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x4))) +svuint16x4_t svrshl_single_u16_x4(svuint16x4_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x2))) +svint8x2_t svrshl_s8_x2(svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x2))) +svint32x2_t svrshl_s32_x2(svint32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x2))) +svint64x2_t svrshl_s64_x2(svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x2))) +svint16x2_t svrshl_s16_x2(svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x2))) +svuint8x2_t svrshl_u8_x2(svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x2))) +svuint32x2_t svrshl_u32_x2(svuint32x2_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x2))) +svuint64x2_t svrshl_u64_x2(svuint64x2_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x2))) +svuint16x2_t svrshl_u16_x2(svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x4))) +svint8x4_t svrshl_s8_x4(svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x4))) +svint32x4_t svrshl_s32_x4(svint32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x4))) +svint64x4_t svrshl_s64_x4(svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x4))) +svint16x4_t svrshl_s16_x4(svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x4))) +svuint8x4_t svrshl_u8_x4(svuint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x4))) +svuint32x4_t svrshl_u32_x4(svuint32x4_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x4))) +svuint64x4_t svrshl_u64_x4(svuint64x4_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x4))) +svuint16x4_t svrshl_u16_x4(svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x2))) +svuint8x2_t svsel_u8_x2(svcount_t, svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x2))) +svuint32x2_t svsel_u32_x2(svcount_t, svuint32x2_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x2))) +svuint64x2_t svsel_u64_x2(svcount_t, svuint64x2_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x2))) +svuint16x2_t svsel_u16_x2(svcount_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x2))) +svbfloat16x2_t svsel_bf16_x2(svcount_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x2))) +svint8x2_t svsel_s8_x2(svcount_t, svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x2))) +svfloat64x2_t svsel_f64_x2(svcount_t, svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x2))) +svfloat32x2_t svsel_f32_x2(svcount_t, svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x2))) +svfloat16x2_t svsel_f16_x2(svcount_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2))) +svint32x2_t svsel_s32_x2(svcount_t, svint32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2))) +svint64x2_t svsel_s64_x2(svcount_t, svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2))) +svint16x2_t svsel_s16_x2(svcount_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4))) +svuint8x4_t svsel_u8_x4(svcount_t, svuint8x4_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x4))) +svuint32x4_t svsel_u32_x4(svcount_t, svuint32x4_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x4))) +svuint64x4_t svsel_u64_x4(svcount_t, svuint64x4_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x4))) +svuint16x4_t svsel_u16_x4(svcount_t, svuint16x4_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x4))) +svbfloat16x4_t svsel_bf16_x4(svcount_t, svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x4))) +svint8x4_t svsel_s8_x4(svcount_t, svint8x4_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x4))) +svfloat64x4_t svsel_f64_x4(svcount_t, svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x4))) +svfloat32x4_t svsel_f32_x4(svcount_t, svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x4))) +svfloat16x4_t svsel_f16_x4(svcount_t, svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4))) +svint32x4_t svsel_s32_x4(svcount_t, svint32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4))) +svint64x4_t svsel_s64_x4(svcount_t, svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4))) +svint16x4_t svsel_s16_x4(svcount_t, svint16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2))) +svint32x2_t svunpk_s32_s16_x2(svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x2))) +svint64x2_t svunpk_s64_s32_x2(svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x2))) +svint16x2_t svunpk_s16_s8_x2(svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x2))) +svuint32x2_t svunpk_u32_u16_x2(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x2))) +svuint64x2_t svunpk_u64_u32_x2(svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x2))) +svuint16x2_t svunpk_u16_u8_x2(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x4))) +svint32x4_t svunpk_s32_s16_x4(svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x4))) +svint64x4_t svunpk_s64_s32_x4(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x4))) +svint16x4_t svunpk_s16_s8_x4(svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x4))) +svuint32x4_t svunpk_u32_u16_x4(svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x4))) +svuint64x4_t svunpk_u64_u32_x4(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x4))) +svuint16x4_t svunpk_u16_u8_x4(svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x2))) +svuint8x2_t svuzp_u8_x2(svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x2))) +svuint32x2_t svuzp_u32_x2(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x2))) +svuint64x2_t svuzp_u64_x2(svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x2))) +svuint16x2_t svuzp_u16_x2(svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x2))) +svbfloat16x2_t svuzp_bf16_x2(svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x2))) +svint8x2_t svuzp_s8_x2(svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x2))) +svfloat64x2_t svuzp_f64_x2(svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x2))) +svfloat32x2_t svuzp_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x2))) +svfloat16x2_t svuzp_f16_x2(svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2))) +svint32x2_t svuzp_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2))) +svint64x2_t svuzp_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2))) +svint16x2_t svuzp_s16_x2(svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4))) +svuint8x4_t svuzp_u8_x4(svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x4))) +svuint32x4_t svuzp_u32_x4(svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x4))) +svuint64x4_t svuzp_u64_x4(svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x4))) +svuint16x4_t svuzp_u16_x4(svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x4))) +svbfloat16x4_t svuzp_bf16_x4(svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x4))) +svint8x4_t svuzp_s8_x4(svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x4))) +svfloat64x4_t svuzp_f64_x4(svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x4))) +svfloat32x4_t svuzp_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x4))) +svfloat16x4_t svuzp_f16_x4(svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4))) +svint32x4_t svuzp_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4))) +svint64x4_t svuzp_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4))) +svint16x4_t svuzp_s16_x4(svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2))) +svuint8x2_t svuzpq_u8_x2(svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x2))) +svuint32x2_t svuzpq_u32_x2(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x2))) +svuint64x2_t svuzpq_u64_x2(svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x2))) +svuint16x2_t svuzpq_u16_x2(svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x2))) +svbfloat16x2_t svuzpq_bf16_x2(svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x2))) +svint8x2_t svuzpq_s8_x2(svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x2))) +svfloat64x2_t svuzpq_f64_x2(svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x2))) +svfloat32x2_t svuzpq_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x2))) +svfloat16x2_t svuzpq_f16_x2(svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2))) +svint32x2_t svuzpq_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2))) +svint64x2_t svuzpq_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2))) +svint16x2_t svuzpq_s16_x2(svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4))) +svuint8x4_t svuzpq_u8_x4(svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x4))) +svuint32x4_t svuzpq_u32_x4(svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x4))) +svuint64x4_t svuzpq_u64_x4(svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x4))) +svuint16x4_t svuzpq_u16_x4(svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x4))) +svbfloat16x4_t svuzpq_bf16_x4(svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x4))) +svint8x4_t svuzpq_s8_x4(svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x4))) +svfloat64x4_t svuzpq_f64_x4(svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x4))) +svfloat32x4_t svuzpq_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x4))) +svfloat16x4_t svuzpq_f16_x4(svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4))) +svint32x4_t svuzpq_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4))) +svint64x4_t svuzpq_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4))) +svint16x4_t svuzpq_s16_x4(svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2))) +svuint8x2_t svzip_u8_x2(svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x2))) +svuint32x2_t svzip_u32_x2(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x2))) +svuint64x2_t svzip_u64_x2(svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x2))) +svuint16x2_t svzip_u16_x2(svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x2))) +svbfloat16x2_t svzip_bf16_x2(svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x2))) +svint8x2_t svzip_s8_x2(svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x2))) +svfloat64x2_t svzip_f64_x2(svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x2))) +svfloat32x2_t svzip_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x2))) +svfloat16x2_t svzip_f16_x2(svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2))) +svint32x2_t svzip_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2))) +svint64x2_t svzip_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2))) +svint16x2_t svzip_s16_x2(svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4))) +svuint8x4_t svzip_u8_x4(svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x4))) +svuint32x4_t svzip_u32_x4(svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x4))) +svuint64x4_t svzip_u64_x4(svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x4))) +svuint16x4_t svzip_u16_x4(svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x4))) +svbfloat16x4_t svzip_bf16_x4(svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x4))) +svint8x4_t svzip_s8_x4(svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x4))) +svfloat64x4_t svzip_f64_x4(svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x4))) +svfloat32x4_t svzip_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x4))) +svfloat16x4_t svzip_f16_x4(svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4))) +svint32x4_t svzip_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4))) +svint64x4_t svzip_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4))) +svint16x4_t svzip_s16_x4(svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2))) +svuint8x2_t svzipq_u8_x2(svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x2))) +svuint32x2_t svzipq_u32_x2(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x2))) +svuint64x2_t svzipq_u64_x2(svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x2))) +svuint16x2_t svzipq_u16_x2(svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x2))) +svbfloat16x2_t svzipq_bf16_x2(svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x2))) +svint8x2_t svzipq_s8_x2(svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x2))) +svfloat64x2_t svzipq_f64_x2(svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x2))) +svfloat32x2_t svzipq_f32_x2(svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x2))) +svfloat16x2_t svzipq_f16_x2(svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2))) +svint32x2_t svzipq_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2))) +svint64x2_t svzipq_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2))) +svint16x2_t svzipq_s16_x2(svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4))) +svuint8x4_t svzipq_u8_x4(svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x4))) +svuint32x4_t svzipq_u32_x4(svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x4))) +svuint64x4_t svzipq_u64_x4(svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x4))) +svuint16x4_t svzipq_u16_x4(svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x4))) +svbfloat16x4_t svzipq_bf16_x4(svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x4))) +svint8x4_t svzipq_s8_x4(svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x4))) +svfloat64x4_t svzipq_f64_x4(svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x4))) +svfloat32x4_t svzipq_f32_x4(svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x4))) +svfloat16x4_t svzipq_f16_x4(svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4))) +svint32x4_t svzipq_s32_x4(svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4))) +svint64x4_t svzipq_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4))) +svint16x4_t svzipq_s16_x4(svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x2))) +svuint8x2_t svadd(svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x2))) +svuint32x2_t svadd(svuint32x2_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x2))) +svuint64x2_t svadd(svuint64x2_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x2))) +svuint16x2_t svadd(svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x2))) +svint8x2_t svadd(svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x2))) +svint32x2_t svadd(svint32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x2))) +svint64x2_t svadd(svint64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x2))) +svint16x2_t svadd(svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x4))) +svuint8x4_t svadd(svuint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u32_x4))) +svuint32x4_t svadd(svuint32x4_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u64_x4))) +svuint64x4_t svadd(svuint64x4_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u16_x4))) +svuint16x4_t svadd(svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s8_x4))) +svint8x4_t svadd(svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s32_x4))) +svint32x4_t svadd(svint32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s64_x4))) +svint64x4_t svadd(svint64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_s16_x4))) +svint16x4_t svadd(svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x2))) +svfloat64x2_t svclamp(svfloat64x2_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x2))) +svfloat32x2_t svclamp(svfloat32x2_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x2))) +svfloat16x2_t svclamp(svfloat16x2_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x2))) +svint8x2_t svclamp(svint8x2_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x2))) +svint32x2_t svclamp(svint32x2_t, svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x2))) +svint64x2_t svclamp(svint64x2_t, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x2))) +svint16x2_t svclamp(svint16x2_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x2))) +svuint8x2_t svclamp(svuint8x2_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x2))) +svuint32x2_t svclamp(svuint32x2_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x2))) +svuint64x2_t svclamp(svuint64x2_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x2))) +svuint16x2_t svclamp(svuint16x2_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f64_x4))) +svfloat64x4_t svclamp(svfloat64x4_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f32_x4))) +svfloat32x4_t svclamp(svfloat32x4_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_f16_x4))) +svfloat16x4_t svclamp(svfloat16x4_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s8_x4))) +svint8x4_t svclamp(svint8x4_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s32_x4))) +svint32x4_t svclamp(svint32x4_t, svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s64_x4))) +svint64x4_t svclamp(svint64x4_t, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_s16_x4))) +svint16x4_t svclamp(svint16x4_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u8_x4))) +svuint8x4_t svclamp(svuint8x4_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u32_x4))) +svuint32x4_t svclamp(svuint32x4_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u64_x4))) +svuint64x4_t svclamp(svuint64x4_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_u16_x4))) +svuint16x4_t svclamp(svuint16x4_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x2))) +svbfloat16_t svcvt_bf16(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x2))) +svfloat16_t svcvt_f16(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x2))) +svint32x2_t svcvt_s32(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x2))) +svuint32x2_t svcvt_u32(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x4))) +svint32x4_t svcvt_s32(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x4))) +svuint32x4_t svcvt_u32(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x2))) +svfloat32x2_t svcvt_f32(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x4))) +svfloat32x4_t svcvt_f32(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x2))) +svfloat32x2_t svcvt_f32(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x4))) +svfloat32x4_t svcvt_f32(svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_bf16_f32_x2))) +svbfloat16_t svcvtn_bf16(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_f16_f32_x2))) +svfloat16_t svcvtn_f16(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x2))) +svfloat64x2_t svmax(svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x2))) +svfloat32x2_t svmax(svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x2))) +svfloat16x2_t svmax(svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x2))) +svint8x2_t svmax(svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x2))) +svint32x2_t svmax(svint32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x2))) +svint64x2_t svmax(svint64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x2))) +svint16x2_t svmax(svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x2))) +svuint8x2_t svmax(svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x2))) +svuint32x2_t svmax(svuint32x2_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x2))) +svuint64x2_t svmax(svuint64x2_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x2))) +svuint16x2_t svmax(svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f64_x4))) +svfloat64x4_t svmax(svfloat64x4_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f32_x4))) +svfloat32x4_t svmax(svfloat32x4_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_f16_x4))) +svfloat16x4_t svmax(svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s8_x4))) +svint8x4_t svmax(svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s32_x4))) +svint32x4_t svmax(svint32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s64_x4))) +svint64x4_t svmax(svint64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_s16_x4))) +svint16x4_t svmax(svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u8_x4))) +svuint8x4_t svmax(svuint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u32_x4))) +svuint32x4_t svmax(svuint32x4_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u64_x4))) +svuint64x4_t svmax(svuint64x4_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_single_u16_x4))) +svuint16x4_t svmax(svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x2))) +svfloat64x2_t svmax(svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x2))) +svfloat32x2_t svmax(svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x2))) +svfloat16x2_t svmax(svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x2))) +svint8x2_t svmax(svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x2))) +svint32x2_t svmax(svint32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x2))) +svint64x2_t svmax(svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x2))) +svint16x2_t svmax(svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x2))) +svuint8x2_t svmax(svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x2))) +svuint32x2_t svmax(svuint32x2_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x2))) +svuint64x2_t svmax(svuint64x2_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x2))) +svuint16x2_t svmax(svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x4))) +svfloat64x4_t svmax(svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x4))) +svfloat32x4_t svmax(svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x4))) +svfloat16x4_t svmax(svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x4))) +svint8x4_t svmax(svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x4))) +svint32x4_t svmax(svint32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x4))) +svint64x4_t svmax(svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x4))) +svint16x4_t svmax(svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x4))) +svuint8x4_t svmax(svuint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x4))) +svuint32x4_t svmax(svuint32x4_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x4))) +svuint64x4_t svmax(svuint64x4_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x4))) +svuint16x4_t svmax(svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x2))) +svfloat64x2_t svmaxnm(svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x2))) +svfloat32x2_t svmaxnm(svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x2))) +svfloat16x2_t svmaxnm(svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f64_x4))) +svfloat64x4_t svmaxnm(svfloat64x4_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f32_x4))) +svfloat32x4_t svmaxnm(svfloat32x4_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_single_f16_x4))) +svfloat16x4_t svmaxnm(svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x2))) +svfloat64x2_t svmaxnm(svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x2))) +svfloat32x2_t svmaxnm(svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x2))) +svfloat16x2_t svmaxnm(svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x4))) +svfloat64x4_t svmaxnm(svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x4))) +svfloat32x4_t svmaxnm(svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x4))) +svfloat16x4_t svmaxnm(svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x2))) +svfloat64x2_t svmin(svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x2))) +svfloat32x2_t svmin(svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x2))) +svfloat16x2_t svmin(svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x2))) +svint8x2_t svmin(svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x2))) +svint32x2_t svmin(svint32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x2))) +svint64x2_t svmin(svint64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x2))) +svint16x2_t svmin(svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x2))) +svuint8x2_t svmin(svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x2))) +svuint32x2_t svmin(svuint32x2_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x2))) +svuint64x2_t svmin(svuint64x2_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x2))) +svuint16x2_t svmin(svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f64_x4))) +svfloat64x4_t svmin(svfloat64x4_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f32_x4))) +svfloat32x4_t svmin(svfloat32x4_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_f16_x4))) +svfloat16x4_t svmin(svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s8_x4))) +svint8x4_t svmin(svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s32_x4))) +svint32x4_t svmin(svint32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s64_x4))) +svint64x4_t svmin(svint64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_s16_x4))) +svint16x4_t svmin(svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u8_x4))) +svuint8x4_t svmin(svuint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u32_x4))) +svuint32x4_t svmin(svuint32x4_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u64_x4))) +svuint64x4_t svmin(svuint64x4_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_single_u16_x4))) +svuint16x4_t svmin(svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x2))) +svfloat64x2_t svmin(svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x2))) +svfloat32x2_t svmin(svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x2))) +svfloat16x2_t svmin(svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x2))) +svint8x2_t svmin(svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x2))) +svint32x2_t svmin(svint32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x2))) +svint64x2_t svmin(svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x2))) +svint16x2_t svmin(svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x2))) +svuint8x2_t svmin(svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x2))) +svuint32x2_t svmin(svuint32x2_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x2))) +svuint64x2_t svmin(svuint64x2_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x2))) +svuint16x2_t svmin(svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x4))) +svfloat64x4_t svmin(svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x4))) +svfloat32x4_t svmin(svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x4))) +svfloat16x4_t svmin(svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x4))) +svint8x4_t svmin(svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x4))) +svint32x4_t svmin(svint32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x4))) +svint64x4_t svmin(svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x4))) +svint16x4_t svmin(svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x4))) +svuint8x4_t svmin(svuint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x4))) +svuint32x4_t svmin(svuint32x4_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x4))) +svuint64x4_t svmin(svuint64x4_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x4))) +svuint16x4_t svmin(svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x2))) +svfloat64x2_t svminnm(svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x2))) +svfloat32x2_t svminnm(svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x2))) +svfloat16x2_t svminnm(svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f64_x4))) +svfloat64x4_t svminnm(svfloat64x4_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f32_x4))) +svfloat32x4_t svminnm(svfloat32x4_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_single_f16_x4))) +svfloat16x4_t svminnm(svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x2))) +svfloat64x2_t svminnm(svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x2))) +svfloat32x2_t svminnm(svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x2))) +svfloat16x2_t svminnm(svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x4))) +svfloat64x4_t svminnm(svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x4))) +svfloat32x4_t svminnm(svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x4))) +svfloat16x4_t svminnm(svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s32_x2))) +svint16_t svqcvt_s16(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s16_s64_x4))) +svint16_t svqcvt_s16(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_s8_s32_x4))) +svint8_t svqcvt_s8(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s32_x2))) +svuint16_t svqcvt_u16(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u32_x2))) +svuint16_t svqcvt_u16(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_s64_x4))) +svuint16_t svqcvt_u16(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u16_u64_x4))) +svuint16_t svqcvt_u16(svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_s32_x4))) +svuint8_t svqcvt_u8(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvt_u8_u32_x4))) +svuint8_t svqcvt_u8(svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s64_x4))) +svint16_t svqcvtn_s16(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s8_s32_x4))) +svint8_t svqcvtn_s8(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s64_x4))) +svuint16_t svqcvtn_u16(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u64_x4))) +svuint16_t svqcvtn_u16(svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_s32_x4))) +svuint8_t svqcvtn_u8(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u8_u32_x4))) +svuint8_t svqcvtn_u8(svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x2))) +svint8x2_t svqdmulh(svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x2))) +svint32x2_t svqdmulh(svint32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x2))) +svint64x2_t svqdmulh(svint64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x2))) +svint16x2_t svqdmulh(svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s8_x4))) +svint8x4_t svqdmulh(svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s32_x4))) +svint32x4_t svqdmulh(svint32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s64_x4))) +svint64x4_t svqdmulh(svint64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_single_s16_x4))) +svint16x4_t svqdmulh(svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x2))) +svint8x2_t svqdmulh(svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x2))) +svint32x2_t svqdmulh(svint32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x2))) +svint64x2_t svqdmulh(svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x2))) +svint16x2_t svqdmulh(svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8_x4))) +svint8x4_t svqdmulh(svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32_x4))) +svint32x4_t svqdmulh(svint32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64_x4))) +svint64x4_t svqdmulh(svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16_x4))) +svint16x4_t svqdmulh(svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s32_x2))) +svint16_t svqrshr_s16(svint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u32_x2))) +svuint16_t svqrshr_u16(svuint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s8_s32_x4))) +svint8_t svqrshr_s8(svint32x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_s16_s64_x4))) +svint16_t svqrshr_s16(svint64x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u8_u32_x4))) +svuint8_t svqrshr_u8(svuint32x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshr_n_u16_u64_x4))) +svuint16_t svqrshr_u16(svuint64x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s8_s32_x4))) +svint8_t svqrshrn_s8(svint32x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s64_x4))) +svint16_t svqrshrn_s16(svint64x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u8_u32_x4))) +svuint8_t svqrshrn_u8(svuint32x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u64_x4))) +svuint16_t svqrshrn_u16(svuint64x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s32_x2))) +svuint16_t svqrshru_u16(svint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u8_s32_x4))) +svuint8_t svqrshru_u8(svint32x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshru_n_u16_s64_x4))) +svuint16_t svqrshru_u16(svint64x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u8_s32_x4))) +svuint8_t svqrshrun_u8(svint32x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s64_x4))) +svuint16_t svqrshrun_u16(svint64x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) +svbool_t svreinterpret(svcount_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) +svcount_t svreinterpret(svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x2))) +svfloat32x2_t svrinta(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x4))) +svfloat32x4_t svrinta(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x2))) +svfloat32x2_t svrintm(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x4))) +svfloat32x4_t svrintm(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x2))) +svfloat32x2_t svrintn(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x4))) +svfloat32x4_t svrintn(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x2))) +svfloat32x2_t svrintp(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x4))) +svfloat32x4_t svrintp(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x2))) +svint8x2_t svrshl(svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x2))) +svint32x2_t svrshl(svint32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x2))) +svint64x2_t svrshl(svint64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x2))) +svint16x2_t svrshl(svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x2))) +svuint8x2_t svrshl(svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x2))) +svuint32x2_t svrshl(svuint32x2_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x2))) +svuint64x2_t svrshl(svuint64x2_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x2))) +svuint16x2_t svrshl(svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s8_x4))) +svint8x4_t svrshl(svint8x4_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s32_x4))) +svint32x4_t svrshl(svint32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s64_x4))) +svint64x4_t svrshl(svint64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_s16_x4))) +svint16x4_t svrshl(svint16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u8_x4))) +svuint8x4_t svrshl(svuint8x4_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u32_x4))) +svuint32x4_t svrshl(svuint32x4_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u64_x4))) +svuint64x4_t svrshl(svuint64x4_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_single_u16_x4))) +svuint16x4_t svrshl(svuint16x4_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x2))) +svint8x2_t svrshl(svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x2))) +svint32x2_t svrshl(svint32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x2))) +svint64x2_t svrshl(svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x2))) +svint16x2_t svrshl(svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x2))) +svuint8x2_t svrshl(svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x2))) +svuint32x2_t svrshl(svuint32x2_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x2))) +svuint64x2_t svrshl(svuint64x2_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x2))) +svuint16x2_t svrshl(svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x4))) +svint8x4_t svrshl(svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x4))) +svint32x4_t svrshl(svint32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x4))) +svint64x4_t svrshl(svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x4))) +svint16x4_t svrshl(svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x4))) +svuint8x4_t svrshl(svuint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x4))) +svuint32x4_t svrshl(svuint32x4_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x4))) +svuint64x4_t svrshl(svuint64x4_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x4))) +svuint16x4_t svrshl(svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x2))) +svuint8x2_t svsel(svcount_t, svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x2))) +svuint32x2_t svsel(svcount_t, svuint32x2_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x2))) +svuint64x2_t svsel(svcount_t, svuint64x2_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x2))) +svuint16x2_t svsel(svcount_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x2))) +svbfloat16x2_t svsel(svcount_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x2))) +svint8x2_t svsel(svcount_t, svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x2))) +svfloat64x2_t svsel(svcount_t, svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x2))) +svfloat32x2_t svsel(svcount_t, svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x2))) +svfloat16x2_t svsel(svcount_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2))) +svint32x2_t svsel(svcount_t, svint32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2))) +svint64x2_t svsel(svcount_t, svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2))) +svint16x2_t svsel(svcount_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4))) +svuint8x4_t svsel(svcount_t, svuint8x4_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32_x4))) +svuint32x4_t svsel(svcount_t, svuint32x4_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64_x4))) +svuint64x4_t svsel(svcount_t, svuint64x4_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16_x4))) +svuint16x4_t svsel(svcount_t, svuint16x4_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16_x4))) +svbfloat16x4_t svsel(svcount_t, svbfloat16x4_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8_x4))) +svint8x4_t svsel(svcount_t, svint8x4_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64_x4))) +svfloat64x4_t svsel(svcount_t, svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32_x4))) +svfloat32x4_t svsel(svcount_t, svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16_x4))) +svfloat16x4_t svsel(svcount_t, svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4))) +svint32x4_t svsel(svcount_t, svint32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4))) +svint64x4_t svsel(svcount_t, svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4))) +svint16x4_t svsel(svcount_t, svint16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2))) +svint32x2_t svunpk_s32(svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x2))) +svint64x2_t svunpk_s64(svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x2))) +svint16x2_t svunpk_s16(svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x2))) +svuint32x2_t svunpk_u32(svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x2))) +svuint64x2_t svunpk_u64(svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x2))) +svuint16x2_t svunpk_u16(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x4))) +svint32x4_t svunpk_s32(svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s64_s32_x4))) +svint64x4_t svunpk_s64(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s16_s8_x4))) +svint16x4_t svunpk_s16(svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u32_u16_x4))) +svuint32x4_t svunpk_u32(svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u64_u32_x4))) +svuint64x4_t svunpk_u64(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_u16_u8_x4))) +svuint16x4_t svunpk_u16(svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x2))) +svuint8x2_t svuzp(svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x2))) +svuint32x2_t svuzp(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x2))) +svuint64x2_t svuzp(svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x2))) +svuint16x2_t svuzp(svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x2))) +svbfloat16x2_t svuzp(svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x2))) +svint8x2_t svuzp(svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x2))) +svfloat64x2_t svuzp(svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x2))) +svfloat32x2_t svuzp(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x2))) +svfloat16x2_t svuzp(svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2))) +svint32x2_t svuzp(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2))) +svint64x2_t svuzp(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2))) +svint16x2_t svuzp(svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4))) +svuint8x4_t svuzp(svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u32_x4))) +svuint32x4_t svuzp(svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u64_x4))) +svuint64x4_t svuzp(svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u16_x4))) +svuint16x4_t svuzp(svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_bf16_x4))) +svbfloat16x4_t svuzp(svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s8_x4))) +svint8x4_t svuzp(svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f64_x4))) +svfloat64x4_t svuzp(svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f32_x4))) +svfloat32x4_t svuzp(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_f16_x4))) +svfloat16x4_t svuzp(svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4))) +svint32x4_t svuzp(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4))) +svint64x4_t svuzp(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4))) +svint16x4_t svuzp(svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2))) +svuint8x2_t svuzpq(svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x2))) +svuint32x2_t svuzpq(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x2))) +svuint64x2_t svuzpq(svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x2))) +svuint16x2_t svuzpq(svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x2))) +svbfloat16x2_t svuzpq(svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x2))) +svint8x2_t svuzpq(svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x2))) +svfloat64x2_t svuzpq(svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x2))) +svfloat32x2_t svuzpq(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x2))) +svfloat16x2_t svuzpq(svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2))) +svint32x2_t svuzpq(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2))) +svint64x2_t svuzpq(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2))) +svint16x2_t svuzpq(svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4))) +svuint8x4_t svuzpq(svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u32_x4))) +svuint32x4_t svuzpq(svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u64_x4))) +svuint64x4_t svuzpq(svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u16_x4))) +svuint16x4_t svuzpq(svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_bf16_x4))) +svbfloat16x4_t svuzpq(svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s8_x4))) +svint8x4_t svuzpq(svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f64_x4))) +svfloat64x4_t svuzpq(svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f32_x4))) +svfloat32x4_t svuzpq(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_f16_x4))) +svfloat16x4_t svuzpq(svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4))) +svint32x4_t svuzpq(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4))) +svint64x4_t svuzpq(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4))) +svint16x4_t svuzpq(svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2))) +svuint8x2_t svzip(svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x2))) +svuint32x2_t svzip(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x2))) +svuint64x2_t svzip(svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x2))) +svuint16x2_t svzip(svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x2))) +svbfloat16x2_t svzip(svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x2))) +svint8x2_t svzip(svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x2))) +svfloat64x2_t svzip(svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x2))) +svfloat32x2_t svzip(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x2))) +svfloat16x2_t svzip(svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2))) +svint32x2_t svzip(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2))) +svint64x2_t svzip(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2))) +svint16x2_t svzip(svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4))) +svuint8x4_t svzip(svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u32_x4))) +svuint32x4_t svzip(svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u64_x4))) +svuint64x4_t svzip(svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u16_x4))) +svuint16x4_t svzip(svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_bf16_x4))) +svbfloat16x4_t svzip(svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s8_x4))) +svint8x4_t svzip(svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f64_x4))) +svfloat64x4_t svzip(svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f32_x4))) +svfloat32x4_t svzip(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_f16_x4))) +svfloat16x4_t svzip(svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4))) +svint32x4_t svzip(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4))) +svint64x4_t svzip(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4))) +svint16x4_t svzip(svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2))) +svuint8x2_t svzipq(svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x2))) +svuint32x2_t svzipq(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x2))) +svuint64x2_t svzipq(svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x2))) +svuint16x2_t svzipq(svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x2))) +svbfloat16x2_t svzipq(svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x2))) +svint8x2_t svzipq(svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x2))) +svfloat64x2_t svzipq(svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x2))) +svfloat32x2_t svzipq(svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x2))) +svfloat16x2_t svzipq(svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2))) +svint32x2_t svzipq(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2))) +svint64x2_t svzipq(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2))) +svint16x2_t svzipq(svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4))) +svuint8x4_t svzipq(svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u32_x4))) +svuint32x4_t svzipq(svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u64_x4))) +svuint64x4_t svzipq(svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u16_x4))) +svuint16x4_t svzipq(svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_bf16_x4))) +svbfloat16x4_t svzipq(svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s8_x4))) +svint8x4_t svzipq(svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f64_x4))) +svfloat64x4_t svzipq(svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f32_x4))) +svfloat32x4_t svzipq(svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_f16_x4))) +svfloat16x4_t svzipq(svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4))) +svint32x4_t svzipq(svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4))) +svint64x4_t svzipq(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4))) +svint16x4_t svzipq(svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) +svint16_t svqcvtn_s16_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) +svuint16_t svqcvtn_u16_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) +svuint16_t svqcvtn_u16_u32_x2(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) +svint16_t svqcvtn_s16(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) +svuint16_t svqcvtn_u16(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) +svuint16_t svqcvtn_u16(svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) svfloat64_t svabd_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m))) @@ -23877,6 +27810,1834 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) svuint32_t svsm4e(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) svuint32_t svsm4ekey(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) +uint8x16_t svaddqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) +uint32x4_t svaddqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) +uint64x2_t svaddqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) +uint16x8_t svaddqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) +int8x16_t svaddqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) +float64x2_t svaddqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) +float32x4_t svaddqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) +float16x8_t svaddqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) +int32x4_t svaddqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) +int64x2_t svaddqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) +int16x8_t svaddqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) +uint8x16_t svandqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) +uint32x4_t svandqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) +uint64x2_t svandqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) +uint16x8_t svandqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) +int8x16_t svandqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) +int32x4_t svandqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) +int64x2_t svandqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) +int16x8_t svandqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) +uint8x16_t sveorqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) +uint32x4_t sveorqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) +uint64x2_t sveorqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) +uint16x8_t sveorqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) +int8x16_t sveorqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) +int32x4_t sveorqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) +int64x2_t sveorqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) +int16x8_t sveorqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) +svuint8_t svextq_u8(svuint8_t, svuint8_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) +svuint32_t svextq_u32(svuint32_t, svuint32_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) +svuint64_t svextq_u64(svuint64_t, svuint64_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) +svuint16_t svextq_u16(svuint16_t, svuint16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) +svbfloat16_t svextq_bf16(svbfloat16_t, svbfloat16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) +svint8_t svextq_s8(svint8_t, svint8_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) +svfloat64_t svextq_f64(svfloat64_t, svfloat64_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) +svfloat32_t svextq_f32(svfloat32_t, svfloat32_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) +svfloat16_t svextq_f16(svfloat16_t, svfloat16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) +svint32_t svextq_s32(svint32_t, svint32_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) +svint64_t svextq_s64(svint64_t, svint64_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) +svint16_t svextq_s16(svint16_t, svint16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) +svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) +svuint64_t svld1q_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) +svuint16_t svld1q_gather_u64base_index_u16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) +svbfloat16_t svld1q_gather_u64base_index_bf16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) +svfloat64_t svld1q_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) +svfloat32_t svld1q_gather_u64base_index_f32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) +svfloat16_t svld1q_gather_u64base_index_f16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) +svint32_t svld1q_gather_u64base_index_s32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) +svint64_t svld1q_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) +svint16_t svld1q_gather_u64base_index_s16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) +svuint8_t svld1q_gather_u64base_offset_u8(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) +svuint32_t svld1q_gather_u64base_offset_u32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) +svuint64_t svld1q_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) +svuint16_t svld1q_gather_u64base_offset_u16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) +svbfloat16_t svld1q_gather_u64base_offset_bf16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) +svint8_t svld1q_gather_u64base_offset_s8(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) +svfloat64_t svld1q_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) +svfloat32_t svld1q_gather_u64base_offset_f32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) +svfloat16_t svld1q_gather_u64base_offset_f16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) +svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) +svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) +svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) +svuint8_t svld1q_gather_u64base_u8(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) +svuint32_t svld1q_gather_u64base_u32(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) +svuint64_t svld1q_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) +svuint16_t svld1q_gather_u64base_u16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) +svbfloat16_t svld1q_gather_u64base_bf16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) +svint8_t svld1q_gather_u64base_s8(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) +svfloat64_t svld1q_gather_u64base_f64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) +svfloat32_t svld1q_gather_u64base_f32(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) +svfloat16_t svld1q_gather_u64base_f16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) +svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) +svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) +svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) +svuint32_t svld1q_gather_u64index_u32(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) +svuint64_t svld1q_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) +svuint16_t svld1q_gather_u64index_u16(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) +svbfloat16_t svld1q_gather_u64index_bf16(svbool_t, bfloat16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) +svfloat64_t svld1q_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) +svfloat32_t svld1q_gather_u64index_f32(svbool_t, float32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) +svfloat16_t svld1q_gather_u64index_f16(svbool_t, float16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) +svint32_t svld1q_gather_u64index_s32(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) +svint64_t svld1q_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) +svint16_t svld1q_gather_u64index_s16(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) +svuint8_t svld1q_gather_u64offset_u8(svbool_t, uint8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) +svuint32_t svld1q_gather_u64offset_u32(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) +svuint64_t svld1q_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) +svuint16_t svld1q_gather_u64offset_u16(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) +svbfloat16_t svld1q_gather_u64offset_bf16(svbool_t, bfloat16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) +svint8_t svld1q_gather_u64offset_s8(svbool_t, int8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) +svfloat64_t svld1q_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) +svfloat32_t svld1q_gather_u64offset_f32(svbool_t, float32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) +svfloat16_t svld1q_gather_u64offset_f16(svbool_t, float16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) +svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) +svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) +svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) +svuint64_t svld1udq_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) +svfloat64_t svld1udq_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) +svint64_t svld1udq_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) +svuint64_t svld1udq_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) +svfloat64_t svld1udq_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) +svint64_t svld1udq_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) +svuint32_t svld1uwq_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) +svfloat32_t svld1uwq_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) +svint32_t svld1uwq_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) +svuint32_t svld1uwq_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) +svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) +svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) +svuint8x2_t svld2q_u8(svbool_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) +svuint32x2_t svld2q_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) +svuint64x2_t svld2q_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) +svuint16x2_t svld2q_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) +svint8x2_t svld2q_s8(svbool_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) +svfloat64x2_t svld2q_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) +svfloat32x2_t svld2q_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) +svfloat16x2_t svld2q_f16(svbool_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) +svint32x2_t svld2q_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) +svint64x2_t svld2q_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) +svint16x2_t svld2q_s16(svbool_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) +svbfloat16x2_t svld2q_bf16(svbool_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) +svuint8x2_t svld2q_vnum_u8(svbool_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) +svuint32x2_t svld2q_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) +svuint64x2_t svld2q_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) +svuint16x2_t svld2q_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) +svint8x2_t svld2q_vnum_s8(svbool_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) +svfloat64x2_t svld2q_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) +svfloat32x2_t svld2q_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) +svfloat16x2_t svld2q_vnum_f16(svbool_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) +svint32x2_t svld2q_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) +svint64x2_t svld2q_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) +svint16x2_t svld2q_vnum_s16(svbool_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) +svbfloat16x2_t svld2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) +svuint8x3_t svld3q_u8(svbool_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) +svuint32x3_t svld3q_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) +svuint64x3_t svld3q_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) +svuint16x3_t svld3q_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) +svint8x3_t svld3q_s8(svbool_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) +svfloat64x3_t svld3q_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) +svfloat32x3_t svld3q_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) +svfloat16x3_t svld3q_f16(svbool_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) +svint32x3_t svld3q_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) +svint64x3_t svld3q_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) +svint16x3_t svld3q_s16(svbool_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) +svbfloat16x3_t svld3q_bf16(svbool_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) +svuint8x3_t svld3q_vnum_u8(svbool_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) +svuint32x3_t svld3q_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) +svuint64x3_t svld3q_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) +svuint16x3_t svld3q_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) +svint8x3_t svld3q_vnum_s8(svbool_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) +svfloat64x3_t svld3q_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) +svfloat32x3_t svld3q_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) +svfloat16x3_t svld3q_vnum_f16(svbool_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) +svint32x3_t svld3q_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) +svint64x3_t svld3q_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) +svint16x3_t svld3q_vnum_s16(svbool_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) +svbfloat16x3_t svld3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) +svuint8x4_t svld4q_u8(svbool_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) +svuint32x4_t svld4q_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) +svuint64x4_t svld4q_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) +svuint16x4_t svld4q_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) +svint8x4_t svld4q_s8(svbool_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) +svfloat64x4_t svld4q_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) +svfloat32x4_t svld4q_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) +svfloat16x4_t svld4q_f16(svbool_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) +svint32x4_t svld4q_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) +svint64x4_t svld4q_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) +svint16x4_t svld4q_s16(svbool_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) +svbfloat16x4_t svld4q_bf16(svbool_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) +svuint8x4_t svld4q_vnum_u8(svbool_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) +svuint32x4_t svld4q_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) +svuint64x4_t svld4q_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) +svuint16x4_t svld4q_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) +svint8x4_t svld4q_vnum_s8(svbool_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) +svfloat64x4_t svld4q_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) +svfloat32x4_t svld4q_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) +svfloat16x4_t svld4q_vnum_f16(svbool_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) +svint32x4_t svld4q_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) +svint64x4_t svld4q_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) +svint16x4_t svld4q_vnum_s16(svbool_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) +svbfloat16x4_t svld4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) +float64x2_t svmaxnmqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) +float32x4_t svmaxnmqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) +float16x8_t svmaxnmqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) +float64x2_t svmaxqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) +float32x4_t svmaxqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) +float16x8_t svmaxqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) +int8x16_t svmaxqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) +int32x4_t svmaxqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) +int64x2_t svmaxqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) +int16x8_t svmaxqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) +uint8x16_t svmaxqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) +uint32x4_t svmaxqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) +uint64x2_t svmaxqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) +uint16x8_t svmaxqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) +float64x2_t svminnmqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) +float32x4_t svminnmqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) +float16x8_t svminnmqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) +float64x2_t svminqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) +float32x4_t svminqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) +float16x8_t svminqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) +int8x16_t svminqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) +int32x4_t svminqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) +int64x2_t svminqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) +int16x8_t svminqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) +uint8x16_t svminqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) +uint32x4_t svminqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) +uint64x2_t svminqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) +uint16x8_t svminqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) +uint8x16_t svorqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) +uint32x4_t svorqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) +uint64x2_t svorqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) +uint16x8_t svorqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) +int8x16_t svorqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) +int32x4_t svorqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) +int64x2_t svorqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) +int16x8_t svorqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) +svbool_t svpmov_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) +svbool_t svpmov_s8(svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) +svbool_t svpmov_u64(svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) +svbool_t svpmov_s64(svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) +svbool_t svpmov_u16(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) +svbool_t svpmov_s16(svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) +svbool_t svpmov_u32(svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) +svbool_t svpmov_s32(svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) +svbool_t svpmov_lane_u8(svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) +svbool_t svpmov_lane_s8(svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) +svbool_t svpmov_lane_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) +svbool_t svpmov_lane_s64(svint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) +svbool_t svpmov_lane_u16(svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) +svbool_t svpmov_lane_s16(svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) +svbool_t svpmov_lane_u32(svuint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) +svbool_t svpmov_lane_s32(svint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) +svuint64_t svpmov_lane_u64_m(svuint64_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) +svint64_t svpmov_lane_s64_m(svint64_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) +svuint16_t svpmov_lane_u16_m(svuint16_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) +svint16_t svpmov_lane_s16_m(svint16_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) +svuint32_t svpmov_lane_u32_m(svuint32_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) +svint32_t svpmov_lane_s32_m(svint32_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8_z))) +svuint8_t svpmov_u8_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8_z))) +svint8_t svpmov_s8_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64_z))) +svuint64_t svpmov_u64_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64_z))) +svint64_t svpmov_s64_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16_z))) +svuint16_t svpmov_u16_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16_z))) +svint16_t svpmov_s16_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32_z))) +svuint32_t svpmov_u32_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32_z))) +svint32_t svpmov_s32_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) +void svst1dq_u64(svbool_t, uint64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) +void svst1dq_f64(svbool_t, float64_t const *, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) +void svst1dq_s64(svbool_t, int64_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) +void svst1dq_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) +void svst1dq_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) +void svst1dq_vnum_s64(svbool_t, int64_t const *, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) +void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) +void svst1q_scatter_u64base_u32(svbool_t, svuint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) +void svst1q_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) +void svst1q_scatter_u64base_u16(svbool_t, svuint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) +void svst1q_scatter_u64base_bf16(svbool_t, svuint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) +void svst1q_scatter_u64base_s8(svbool_t, svuint64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) +void svst1q_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) +void svst1q_scatter_u64base_f32(svbool_t, svuint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) +void svst1q_scatter_u64base_f16(svbool_t, svuint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) +void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) +void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) +void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) +void svst1q_scatter_u64base_index_u32(svbool_t, svuint64_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) +void svst1q_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) +void svst1q_scatter_u64base_index_u16(svbool_t, svuint64_t, int64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) +void svst1q_scatter_u64base_index_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) +void svst1q_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) +void svst1q_scatter_u64base_index_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) +void svst1q_scatter_u64base_index_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) +void svst1q_scatter_u64base_index_s32(svbool_t, svuint64_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) +void svst1q_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) +void svst1q_scatter_u64base_index_s16(svbool_t, svuint64_t, int64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) +void svst1q_scatter_u64base_offset_u8(svbool_t, svuint64_t, int64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) +void svst1q_scatter_u64base_offset_u32(svbool_t, svuint64_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) +void svst1q_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) +void svst1q_scatter_u64base_offset_u16(svbool_t, svuint64_t, int64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) +void svst1q_scatter_u64base_offset_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) +void svst1q_scatter_u64base_offset_s8(svbool_t, svuint64_t, int64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) +void svst1q_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) +void svst1q_scatter_u64base_offset_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) +void svst1q_scatter_u64base_offset_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) +void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) +void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) +void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) +void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) +void svst1q_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) +void svst1q_scatter_u64index_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) +void svst1q_scatter_u64index_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) +void svst1q_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) +void svst1q_scatter_u64index_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) +void svst1q_scatter_u64index_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) +void svst1q_scatter_u64index_s32(svbool_t, int32_t *, svuint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) +void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) +void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) +void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) +void svst1q_scatter_u64offset_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) +void svst1q_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) +void svst1q_scatter_u64offset_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) +void svst1q_scatter_u64offset_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) +void svst1q_scatter_u64offset_s8(svbool_t, int8_t *, svuint64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) +void svst1q_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) +void svst1q_scatter_u64offset_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) +void svst1q_scatter_u64offset_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) +void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) +void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) +void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) +void svst1wq_u32(svbool_t, uint32_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) +void svst1wq_f32(svbool_t, float32_t const *, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) +void svst1wq_s32(svbool_t, int32_t const *, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) +void svst1wq_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) +void svst1wq_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) +void svst1wq_vnum_s32(svbool_t, int32_t const *, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) +void svst2q_u8(svbool_t, uint8_t const *, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) +void svst2q_u32(svbool_t, uint32_t const *, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) +void svst2q_u64(svbool_t, uint64_t const *, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) +void svst2q_u16(svbool_t, uint16_t const *, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) +void svst2q_s8(svbool_t, int8_t const *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) +void svst2q_f64(svbool_t, float64_t const *, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) +void svst2q_f32(svbool_t, float32_t const *, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) +void svst2q_f16(svbool_t, float16_t const *, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) +void svst2q_s32(svbool_t, int32_t const *, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) +void svst2q_s64(svbool_t, int64_t const *, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) +void svst2q_s16(svbool_t, int16_t const *, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) +void svst2q_bf16(svbool_t, bfloat16_t const *, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) +void svst2q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) +void svst2q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) +void svst2q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) +void svst2q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) +void svst2q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) +void svst2q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) +void svst2q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) +void svst2q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) +void svst2q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) +void svst2q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) +void svst2q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) +void svst2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) +void svst3q_u8(svbool_t, uint8_t const *, svuint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) +void svst3q_u32(svbool_t, uint32_t const *, svuint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) +void svst3q_u64(svbool_t, uint64_t const *, svuint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) +void svst3q_u16(svbool_t, uint16_t const *, svuint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) +void svst3q_s8(svbool_t, int8_t const *, svint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) +void svst3q_f64(svbool_t, float64_t const *, svfloat64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) +void svst3q_f32(svbool_t, float32_t const *, svfloat32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) +void svst3q_f16(svbool_t, float16_t const *, svfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) +void svst3q_s32(svbool_t, int32_t const *, svint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) +void svst3q_s64(svbool_t, int64_t const *, svint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) +void svst3q_s16(svbool_t, int16_t const *, svint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) +void svst3q_bf16(svbool_t, bfloat16_t const *, svbfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) +void svst3q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) +void svst3q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) +void svst3q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) +void svst3q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) +void svst3q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) +void svst3q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) +void svst3q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) +void svst3q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) +void svst3q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) +void svst3q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) +void svst3q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) +void svst3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) +void svst4q_u8(svbool_t, uint8_t const *, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) +void svst4q_u32(svbool_t, uint32_t const *, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) +void svst4q_u64(svbool_t, uint64_t const *, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) +void svst4q_u16(svbool_t, uint16_t const *, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) +void svst4q_s8(svbool_t, int8_t const *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) +void svst4q_f64(svbool_t, float64_t const *, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) +void svst4q_f32(svbool_t, float32_t const *, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) +void svst4q_f16(svbool_t, float16_t const *, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) +void svst4q_s32(svbool_t, int32_t const *, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) +void svst4q_s64(svbool_t, int64_t const *, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) +void svst4q_s16(svbool_t, int16_t const *, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) +void svst4q_bf16(svbool_t, bfloat16_t const *, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) +void svst4q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) +void svst4q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) +void svst4q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) +void svst4q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) +void svst4q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) +void svst4q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) +void svst4q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) +void svst4q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) +void svst4q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) +void svst4q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) +void svst4q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) +void svst4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) +svuint8_t svtblq_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) +svuint32_t svtblq_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) +svuint64_t svtblq_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) +svuint16_t svtblq_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) +svbfloat16_t svtblq_bf16(svbfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) +svint8_t svtblq_s8(svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) +svfloat64_t svtblq_f64(svfloat64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) +svfloat32_t svtblq_f32(svfloat32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) +svfloat16_t svtblq_f16(svfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) +svint32_t svtblq_s32(svint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) +svint64_t svtblq_s64(svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) +svint16_t svtblq_s16(svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) +svuint8_t svtbxq_u8(svuint8_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) +svuint32_t svtbxq_u32(svuint32_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) +svuint64_t svtbxq_u64(svuint64_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) +svuint16_t svtbxq_u16(svuint16_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) +svbfloat16_t svtbxq_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) +svint8_t svtbxq_s8(svint8_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) +svfloat64_t svtbxq_f64(svfloat64_t, svfloat64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) +svfloat32_t svtbxq_f32(svfloat32_t, svfloat32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) +svfloat16_t svtbxq_f16(svfloat16_t, svfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) +svint32_t svtbxq_s32(svint32_t, svint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) +svint64_t svtbxq_s64(svint64_t, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) +svint16_t svtbxq_s16(svint16_t, svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) +svuint8_t svuzpq1_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) +svuint32_t svuzpq1_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) +svuint64_t svuzpq1_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) +svuint16_t svuzpq1_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) +svbfloat16_t svuzpq1_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) +svint8_t svuzpq1_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) +svfloat64_t svuzpq1_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) +svfloat32_t svuzpq1_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) +svfloat16_t svuzpq1_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) +svint32_t svuzpq1_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) +svint64_t svuzpq1_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) +svint16_t svuzpq1_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) +svuint8_t svuzpq2_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) +svuint32_t svuzpq2_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) +svuint64_t svuzpq2_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) +svuint16_t svuzpq2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) +svbfloat16_t svuzpq2_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) +svint8_t svuzpq2_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) +svfloat64_t svuzpq2_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) +svfloat32_t svuzpq2_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) +svfloat16_t svuzpq2_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) +svint32_t svuzpq2_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) +svint64_t svuzpq2_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) +svint16_t svuzpq2_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) +svuint8_t svzipq1_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) +svuint32_t svzipq1_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) +svuint64_t svzipq1_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) +svuint16_t svzipq1_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) +svbfloat16_t svzipq1_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) +svint8_t svzipq1_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) +svfloat64_t svzipq1_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) +svfloat32_t svzipq1_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) +svfloat16_t svzipq1_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) +svint32_t svzipq1_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) +svint64_t svzipq1_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) +svint16_t svzipq1_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) +svuint8_t svzipq2_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) +svuint32_t svzipq2_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) +svuint64_t svzipq2_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) +svuint16_t svzipq2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) +svbfloat16_t svzipq2_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) +svint8_t svzipq2_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) +svfloat64_t svzipq2_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) +svfloat32_t svzipq2_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) +svfloat16_t svzipq2_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) +svint32_t svzipq2_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) +svint64_t svzipq2_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) +svint16_t svzipq2_s16(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) +uint8x16_t svaddqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) +uint32x4_t svaddqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) +uint64x2_t svaddqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) +uint16x8_t svaddqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) +int8x16_t svaddqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) +float64x2_t svaddqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) +float32x4_t svaddqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) +float16x8_t svaddqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) +int32x4_t svaddqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) +int64x2_t svaddqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) +int16x8_t svaddqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) +uint8x16_t svandqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) +uint32x4_t svandqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) +uint64x2_t svandqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) +uint16x8_t svandqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) +int8x16_t svandqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) +int32x4_t svandqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) +int64x2_t svandqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) +int16x8_t svandqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) +uint8x16_t sveorqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) +uint32x4_t sveorqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) +uint64x2_t sveorqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) +uint16x8_t sveorqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) +int8x16_t sveorqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) +int32x4_t sveorqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) +int64x2_t sveorqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) +int16x8_t sveorqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) +svuint8_t svextq(svuint8_t, svuint8_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) +svuint32_t svextq(svuint32_t, svuint32_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) +svuint64_t svextq(svuint64_t, svuint64_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) +svuint16_t svextq(svuint16_t, svuint16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) +svbfloat16_t svextq(svbfloat16_t, svbfloat16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) +svint8_t svextq(svint8_t, svint8_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) +svfloat64_t svextq(svfloat64_t, svfloat64_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) +svfloat32_t svextq(svfloat32_t, svfloat32_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) +svfloat16_t svextq(svfloat16_t, svfloat16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) +svint32_t svextq(svint32_t, svint32_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) +svint64_t svextq(svint64_t, svint64_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) +svint16_t svextq(svint16_t, svint16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) +svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) +svuint64_t svld1q_gather_index_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) +svuint16_t svld1q_gather_index_u16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) +svbfloat16_t svld1q_gather_index_bf16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) +svfloat64_t svld1q_gather_index_f64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) +svfloat32_t svld1q_gather_index_f32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) +svfloat16_t svld1q_gather_index_f16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) +svint32_t svld1q_gather_index_s32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) +svint64_t svld1q_gather_index_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) +svint16_t svld1q_gather_index_s16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) +svuint8_t svld1q_gather_offset_u8(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) +svuint32_t svld1q_gather_offset_u32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) +svuint64_t svld1q_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) +svuint16_t svld1q_gather_offset_u16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) +svbfloat16_t svld1q_gather_offset_bf16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) +svint8_t svld1q_gather_offset_s8(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) +svfloat64_t svld1q_gather_offset_f64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) +svfloat32_t svld1q_gather_offset_f32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) +svfloat16_t svld1q_gather_offset_f16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) +svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) +svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) +svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) +svuint8_t svld1q_gather_u8(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) +svuint32_t svld1q_gather_u32(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) +svuint64_t svld1q_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) +svuint16_t svld1q_gather_u16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) +svbfloat16_t svld1q_gather_bf16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) +svint8_t svld1q_gather_s8(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) +svfloat64_t svld1q_gather_f64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) +svfloat32_t svld1q_gather_f32(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) +svfloat16_t svld1q_gather_f16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) +svint32_t svld1q_gather_s32(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) +svint64_t svld1q_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) +svint16_t svld1q_gather_s16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) +svuint32_t svld1q_gather_index(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) +svuint64_t svld1q_gather_index(svbool_t, uint64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) +svuint16_t svld1q_gather_index(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) +svbfloat16_t svld1q_gather_index(svbool_t, bfloat16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) +svfloat64_t svld1q_gather_index(svbool_t, float64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) +svfloat32_t svld1q_gather_index(svbool_t, float32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) +svfloat16_t svld1q_gather_index(svbool_t, float16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) +svint32_t svld1q_gather_index(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) +svint64_t svld1q_gather_index(svbool_t, int64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) +svint16_t svld1q_gather_index(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) +svuint8_t svld1q_gather_offset(svbool_t, uint8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) +svuint32_t svld1q_gather_offset(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) +svuint64_t svld1q_gather_offset(svbool_t, uint64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) +svuint16_t svld1q_gather_offset(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) +svbfloat16_t svld1q_gather_offset(svbool_t, bfloat16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) +svint8_t svld1q_gather_offset(svbool_t, int8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) +svfloat64_t svld1q_gather_offset(svbool_t, float64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) +svfloat32_t svld1q_gather_offset(svbool_t, float32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) +svfloat16_t svld1q_gather_offset(svbool_t, float16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) +svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) +svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) +svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) +svuint64_t svld1udq(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) +svfloat64_t svld1udq(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) +svint64_t svld1udq(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) +svuint64_t svld1udq_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) +svfloat64_t svld1udq_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) +svint64_t svld1udq_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) +svuint32_t svld1uwq(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) +svfloat32_t svld1uwq(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) +svint32_t svld1uwq(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) +svuint32_t svld1uwq_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) +svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) +svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) +svuint8x2_t svld2q(svbool_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) +svuint32x2_t svld2q(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) +svuint64x2_t svld2q(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) +svuint16x2_t svld2q(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) +svint8x2_t svld2q(svbool_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) +svfloat64x2_t svld2q(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) +svfloat32x2_t svld2q(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) +svfloat16x2_t svld2q(svbool_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) +svint32x2_t svld2q(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) +svint64x2_t svld2q(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) +svint16x2_t svld2q(svbool_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) +svbfloat16x2_t svld2q(svbool_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) +svuint8x2_t svld2q_vnum(svbool_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) +svuint32x2_t svld2q_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) +svuint64x2_t svld2q_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) +svuint16x2_t svld2q_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) +svint8x2_t svld2q_vnum(svbool_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) +svfloat64x2_t svld2q_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) +svfloat32x2_t svld2q_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) +svfloat16x2_t svld2q_vnum(svbool_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) +svint32x2_t svld2q_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) +svint64x2_t svld2q_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) +svint16x2_t svld2q_vnum(svbool_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) +svbfloat16x2_t svld2q_vnum(svbool_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) +svuint8x3_t svld3q(svbool_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) +svuint32x3_t svld3q(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) +svuint64x3_t svld3q(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) +svuint16x3_t svld3q(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) +svint8x3_t svld3q(svbool_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) +svfloat64x3_t svld3q(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) +svfloat32x3_t svld3q(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) +svfloat16x3_t svld3q(svbool_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) +svint32x3_t svld3q(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) +svint64x3_t svld3q(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) +svint16x3_t svld3q(svbool_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) +svbfloat16x3_t svld3q(svbool_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) +svuint8x3_t svld3q_vnum(svbool_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) +svuint32x3_t svld3q_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) +svuint64x3_t svld3q_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) +svuint16x3_t svld3q_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) +svint8x3_t svld3q_vnum(svbool_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) +svfloat64x3_t svld3q_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) +svfloat32x3_t svld3q_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) +svfloat16x3_t svld3q_vnum(svbool_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) +svint32x3_t svld3q_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) +svint64x3_t svld3q_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) +svint16x3_t svld3q_vnum(svbool_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) +svbfloat16x3_t svld3q_vnum(svbool_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) +svuint8x4_t svld4q(svbool_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) +svuint32x4_t svld4q(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) +svuint64x4_t svld4q(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) +svuint16x4_t svld4q(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) +svint8x4_t svld4q(svbool_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) +svfloat64x4_t svld4q(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) +svfloat32x4_t svld4q(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) +svfloat16x4_t svld4q(svbool_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) +svint32x4_t svld4q(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) +svint64x4_t svld4q(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) +svint16x4_t svld4q(svbool_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) +svbfloat16x4_t svld4q(svbool_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) +svuint8x4_t svld4q_vnum(svbool_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) +svuint32x4_t svld4q_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) +svuint64x4_t svld4q_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) +svuint16x4_t svld4q_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) +svint8x4_t svld4q_vnum(svbool_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) +svfloat64x4_t svld4q_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) +svfloat32x4_t svld4q_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) +svfloat16x4_t svld4q_vnum(svbool_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) +svint32x4_t svld4q_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) +svint64x4_t svld4q_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) +svint16x4_t svld4q_vnum(svbool_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) +svbfloat16x4_t svld4q_vnum(svbool_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) +float64x2_t svmaxnmqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) +float32x4_t svmaxnmqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) +float16x8_t svmaxnmqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) +float64x2_t svmaxqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) +float32x4_t svmaxqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) +float16x8_t svmaxqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) +int8x16_t svmaxqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) +int32x4_t svmaxqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) +int64x2_t svmaxqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) +int16x8_t svmaxqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) +uint8x16_t svmaxqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) +uint32x4_t svmaxqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) +uint64x2_t svmaxqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) +uint16x8_t svmaxqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) +float64x2_t svminnmqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) +float32x4_t svminnmqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) +float16x8_t svminnmqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) +float64x2_t svminqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) +float32x4_t svminqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) +float16x8_t svminqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) +int8x16_t svminqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) +int32x4_t svminqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) +int64x2_t svminqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) +int16x8_t svminqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) +uint8x16_t svminqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) +uint32x4_t svminqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) +uint64x2_t svminqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) +uint16x8_t svminqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) +uint8x16_t svorqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) +uint32x4_t svorqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) +uint64x2_t svorqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) +uint16x8_t svorqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) +int8x16_t svorqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) +int32x4_t svorqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) +int64x2_t svorqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) +int16x8_t svorqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) +svbool_t svpmov(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) +svbool_t svpmov(svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) +svbool_t svpmov(svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) +svbool_t svpmov(svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) +svbool_t svpmov(svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) +svbool_t svpmov(svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) +svbool_t svpmov(svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) +svbool_t svpmov(svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) +svbool_t svpmov_lane(svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) +svbool_t svpmov_lane(svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) +svbool_t svpmov_lane(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) +svbool_t svpmov_lane(svint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) +svbool_t svpmov_lane(svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) +svbool_t svpmov_lane(svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) +svbool_t svpmov_lane(svuint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) +svbool_t svpmov_lane(svint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) +svuint64_t svpmov_lane_m(svuint64_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) +svint64_t svpmov_lane_m(svint64_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) +svuint16_t svpmov_lane_m(svuint16_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) +svint16_t svpmov_lane_m(svint16_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) +svuint32_t svpmov_lane_m(svuint32_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) +svint32_t svpmov_lane_m(svint32_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) +void svst1dq(svbool_t, uint64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) +void svst1dq(svbool_t, float64_t const *, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) +void svst1dq(svbool_t, int64_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) +void svst1dq_vnum(svbool_t, uint64_t const *, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) +void svst1dq_vnum(svbool_t, float64_t const *, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) +void svst1dq_vnum(svbool_t, int64_t const *, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) +void svst1q_scatter(svbool_t, svuint64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) +void svst1q_scatter(svbool_t, svuint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) +void svst1q_scatter(svbool_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) +void svst1q_scatter(svbool_t, svuint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) +void svst1q_scatter(svbool_t, svuint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) +void svst1q_scatter(svbool_t, svuint64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) +void svst1q_scatter(svbool_t, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) +void svst1q_scatter(svbool_t, svuint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) +void svst1q_scatter(svbool_t, svuint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) +void svst1q_scatter(svbool_t, svuint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) +void svst1q_scatter(svbool_t, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) +void svst1q_scatter(svbool_t, svuint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) +void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) +void svst1q_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) +void svst1q_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) +void svst1q_scatter_index(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) +void svst1q_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) +void svst1q_scatter_index(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) +void svst1q_scatter_index(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) +void svst1q_scatter_index(svbool_t, int32_t *, svuint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) +void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) +void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) +void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) +void svst1q_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) +void svst1q_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) +void svst1q_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) +void svst1q_scatter_offset(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) +void svst1q_scatter_offset(svbool_t, int8_t *, svuint64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) +void svst1q_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) +void svst1q_scatter_offset(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) +void svst1q_scatter_offset(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) +void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) +void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) +void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) +void svst1wq(svbool_t, uint32_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) +void svst1wq(svbool_t, float32_t const *, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) +void svst1wq(svbool_t, int32_t const *, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) +void svst1wq_vnum(svbool_t, uint32_t const *, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) +void svst1wq_vnum(svbool_t, float32_t const *, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) +void svst1wq_vnum(svbool_t, int32_t const *, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) +void svst2q(svbool_t, uint8_t const *, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) +void svst2q(svbool_t, uint32_t const *, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) +void svst2q(svbool_t, uint64_t const *, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) +void svst2q(svbool_t, uint16_t const *, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) +void svst2q(svbool_t, int8_t const *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) +void svst2q(svbool_t, float64_t const *, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) +void svst2q(svbool_t, float32_t const *, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) +void svst2q(svbool_t, float16_t const *, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) +void svst2q(svbool_t, int32_t const *, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) +void svst2q(svbool_t, int64_t const *, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) +void svst2q(svbool_t, int16_t const *, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) +void svst2q(svbool_t, bfloat16_t const *, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) +void svst2q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) +void svst2q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) +void svst2q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) +void svst2q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) +void svst2q_vnum(svbool_t, int8_t const *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) +void svst2q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) +void svst2q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) +void svst2q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) +void svst2q_vnum(svbool_t, int32_t const *, int64_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) +void svst2q_vnum(svbool_t, int64_t const *, int64_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) +void svst2q_vnum(svbool_t, int16_t const *, int64_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) +void svst2q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) +void svst3q(svbool_t, uint8_t const *, svuint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) +void svst3q(svbool_t, uint32_t const *, svuint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) +void svst3q(svbool_t, uint64_t const *, svuint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) +void svst3q(svbool_t, uint16_t const *, svuint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) +void svst3q(svbool_t, int8_t const *, svint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) +void svst3q(svbool_t, float64_t const *, svfloat64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) +void svst3q(svbool_t, float32_t const *, svfloat32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) +void svst3q(svbool_t, float16_t const *, svfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) +void svst3q(svbool_t, int32_t const *, svint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) +void svst3q(svbool_t, int64_t const *, svint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) +void svst3q(svbool_t, int16_t const *, svint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) +void svst3q(svbool_t, bfloat16_t const *, svbfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) +void svst3q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) +void svst3q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) +void svst3q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) +void svst3q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) +void svst3q_vnum(svbool_t, int8_t const *, int64_t, svint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) +void svst3q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) +void svst3q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) +void svst3q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) +void svst3q_vnum(svbool_t, int32_t const *, int64_t, svint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) +void svst3q_vnum(svbool_t, int64_t const *, int64_t, svint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) +void svst3q_vnum(svbool_t, int16_t const *, int64_t, svint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) +void svst3q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) +void svst4q(svbool_t, uint8_t const *, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) +void svst4q(svbool_t, uint32_t const *, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) +void svst4q(svbool_t, uint64_t const *, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) +void svst4q(svbool_t, uint16_t const *, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) +void svst4q(svbool_t, int8_t const *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) +void svst4q(svbool_t, float64_t const *, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) +void svst4q(svbool_t, float32_t const *, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) +void svst4q(svbool_t, float16_t const *, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) +void svst4q(svbool_t, int32_t const *, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) +void svst4q(svbool_t, int64_t const *, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) +void svst4q(svbool_t, int16_t const *, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) +void svst4q(svbool_t, bfloat16_t const *, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) +void svst4q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) +void svst4q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) +void svst4q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) +void svst4q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) +void svst4q_vnum(svbool_t, int8_t const *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) +void svst4q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) +void svst4q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) +void svst4q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) +void svst4q_vnum(svbool_t, int32_t const *, int64_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) +void svst4q_vnum(svbool_t, int64_t const *, int64_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) +void svst4q_vnum(svbool_t, int16_t const *, int64_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) +void svst4q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) +svuint8_t svtblq(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) +svuint32_t svtblq(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) +svuint64_t svtblq(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) +svuint16_t svtblq(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) +svbfloat16_t svtblq(svbfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) +svint8_t svtblq(svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) +svfloat64_t svtblq(svfloat64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) +svfloat32_t svtblq(svfloat32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) +svfloat16_t svtblq(svfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) +svint32_t svtblq(svint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) +svint64_t svtblq(svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) +svint16_t svtblq(svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) +svuint8_t svtbxq(svuint8_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) +svuint32_t svtbxq(svuint32_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) +svuint64_t svtbxq(svuint64_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) +svuint16_t svtbxq(svuint16_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) +svbfloat16_t svtbxq(svbfloat16_t, svbfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) +svint8_t svtbxq(svint8_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) +svfloat64_t svtbxq(svfloat64_t, svfloat64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) +svfloat32_t svtbxq(svfloat32_t, svfloat32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) +svfloat16_t svtbxq(svfloat16_t, svfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) +svint32_t svtbxq(svint32_t, svint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) +svint64_t svtbxq(svint64_t, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) +svint16_t svtbxq(svint16_t, svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) +svuint8_t svuzpq1(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) +svuint32_t svuzpq1(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) +svuint64_t svuzpq1(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) +svuint16_t svuzpq1(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) +svbfloat16_t svuzpq1(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) +svint8_t svuzpq1(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) +svfloat64_t svuzpq1(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) +svfloat32_t svuzpq1(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) +svfloat16_t svuzpq1(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) +svint32_t svuzpq1(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) +svint64_t svuzpq1(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) +svint16_t svuzpq1(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) +svuint8_t svuzpq2(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) +svuint32_t svuzpq2(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) +svuint64_t svuzpq2(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) +svuint16_t svuzpq2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) +svbfloat16_t svuzpq2(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) +svint8_t svuzpq2(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) +svfloat64_t svuzpq2(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) +svfloat32_t svuzpq2(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) +svfloat16_t svuzpq2(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) +svint32_t svuzpq2(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) +svint64_t svuzpq2(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) +svint16_t svuzpq2(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) +svuint8_t svzipq1(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) +svuint32_t svzipq1(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) +svuint64_t svzipq1(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) +svuint16_t svzipq1(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) +svbfloat16_t svzipq1(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) +svint8_t svzipq1(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) +svfloat64_t svzipq1(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) +svfloat32_t svzipq1(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) +svfloat16_t svzipq1(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) +svint32_t svzipq1(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) +svint64_t svzipq1(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) +svint16_t svzipq1(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) +svuint8_t svzipq2(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) +svuint32_t svzipq2(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) +svuint64_t svzipq2(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) +svuint16_t svzipq2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) +svbfloat16_t svzipq2(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) +svint8_t svzipq2(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) +svfloat64_t svzipq2(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) +svfloat32_t svzipq2(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) +svfloat16_t svzipq2(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) +svint32_t svzipq2(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) +svint64_t svzipq2(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) +svint16_t svzipq2(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b16))) +svbool_t svpsel_lane_b16(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b32))) +svbool_t svpsel_lane_b32(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b64))) +svbool_t svpsel_lane_b64(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b8))) +svbool_t svpsel_lane_b8(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) +svfloat32_t svbfmlslb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) +svfloat32_t svbfmlslb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) +svfloat32_t svbfmlslt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) +svfloat32_t svbfmlslt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) svfloat64_t svclamp_f64(svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) @@ -23899,6 +29660,252 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) svuint64_t svclamp_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) svuint16_t svclamp_u16(svuint16_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8))) +uint64_t svcntp_c8(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32))) +uint64_t svcntp_c32(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64))) +uint64_t svcntp_c64(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16))) +uint64_t svcntp_c16(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) +svboolx2_t svcreate2_b(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) +svboolx4_t svcreate4_b(svbool_t, svbool_t, svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) +svfloat32_t svdot_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) +svint32_t svdot_s32_s16(svint32_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) +svuint32_t svdot_u32_u16(svuint32_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) +svfloat32_t svdot_lane_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) +svint32_t svdot_lane_s32_s16(svint32_t, svint16_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) +svuint32_t svdot_lane_u32_u16(svuint32_t, svuint16_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) +svbool_t svget2_b(svboolx2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) +svbool_t svget4_b(svboolx4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) +svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) +svint8x2_t svld1_s8_x2(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) +svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) +svfloat64x2_t svld1_f64_x2(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) +svint64x2_t svld1_s64_x2(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) +svuint16x2_t svld1_u16_x2(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) +svbfloat16x2_t svld1_bf16_x2(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) +svfloat16x2_t svld1_f16_x2(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) +svint16x2_t svld1_s16_x2(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) +svuint32x2_t svld1_u32_x2(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) +svfloat32x2_t svld1_f32_x2(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) +svint32x2_t svld1_s32_x2(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) +svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) +svint8x4_t svld1_s8_x4(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) +svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) +svfloat64x4_t svld1_f64_x4(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) +svint64x4_t svld1_s64_x4(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) +svuint16x4_t svld1_u16_x4(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) +svbfloat16x4_t svld1_bf16_x4(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) +svfloat16x4_t svld1_f16_x4(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) +svint16x4_t svld1_s16_x4(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) +svuint32x4_t svld1_u32_x4(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) +svfloat32x4_t svld1_f32_x4(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) +svint32x4_t svld1_s32_x4(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) +svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) +svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) +svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) +svfloat64x2_t svld1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) +svint64x2_t svld1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) +svuint16x2_t svld1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) +svbfloat16x2_t svld1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) +svfloat16x2_t svld1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) +svint16x2_t svld1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) +svuint32x2_t svld1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) +svfloat32x2_t svld1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) +svint32x2_t svld1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) +svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) +svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) +svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) +svfloat64x4_t svld1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) +svint64x4_t svld1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) +svuint16x4_t svld1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) +svbfloat16x4_t svld1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) +svfloat16x4_t svld1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) +svint16x4_t svld1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) +svuint32x4_t svld1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) +svfloat32x4_t svld1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) +svint32x4_t svld1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) +svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) +svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) +svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) +svfloat64x2_t svldnt1_f64_x2(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) +svint64x2_t svldnt1_s64_x2(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) +svuint16x2_t svldnt1_u16_x2(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) +svbfloat16x2_t svldnt1_bf16_x2(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) +svfloat16x2_t svldnt1_f16_x2(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) +svint16x2_t svldnt1_s16_x2(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) +svuint32x2_t svldnt1_u32_x2(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) +svfloat32x2_t svldnt1_f32_x2(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) +svint32x2_t svldnt1_s32_x2(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) +svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) +svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) +svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) +svfloat64x4_t svldnt1_f64_x4(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) +svint64x4_t svldnt1_s64_x4(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) +svuint16x4_t svldnt1_u16_x4(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) +svbfloat16x4_t svldnt1_bf16_x4(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) +svfloat16x4_t svldnt1_f16_x4(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) +svint16x4_t svldnt1_s16_x4(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) +svuint32x4_t svldnt1_u32_x4(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) +svfloat32x4_t svldnt1_f32_x4(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) +svint32x4_t svldnt1_s32_x4(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) +svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) +svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) +svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) +svfloat64x2_t svldnt1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) +svint64x2_t svldnt1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) +svuint16x2_t svldnt1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) +svbfloat16x2_t svldnt1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) +svfloat16x2_t svldnt1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) +svint16x2_t svldnt1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) +svuint32x2_t svldnt1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) +svfloat32x2_t svldnt1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) +svint32x2_t svldnt1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) +svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) +svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) +svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) +svfloat64x4_t svldnt1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) +svint64x4_t svldnt1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) +svuint16x4_t svldnt1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) +svbfloat16x4_t svldnt1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) +svfloat16x4_t svldnt1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) +svint16x4_t svldnt1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) +svuint32x4_t svldnt1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) +svfloat32x4_t svldnt1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) +svint32x4_t svldnt1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8))) +svbool_t svpext_lane_c8(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32))) +svbool_t svpext_lane_c32(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64))) +svbool_t svpext_lane_c64(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16))) +svbool_t svpext_lane_c16(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8_x2))) +svboolx2_t svpext_lane_c8_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32_x2))) +svboolx2_t svpext_lane_c32_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64_x2))) +svboolx2_t svpext_lane_c64_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16_x2))) +svboolx2_t svpext_lane_c16_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_c))) +svcount_t svpfalse_c(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c16))) +svcount_t svpsel_lane_c16(svcount_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c32))) +svcount_t svpsel_lane_c32(svcount_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c64))) +svcount_t svpsel_lane_c64(svcount_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c8))) +svcount_t svpsel_lane_c8(svcount_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8))) svcount_t svptrue_c8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32))) @@ -23907,6 +29914,420 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64))) svcount_t svptrue_c64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16))) svcount_t svptrue_c16(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) +svint16_t svqrshrn_n_s16_s32_x2(svint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) +svuint16_t svqrshrn_n_u16_u32_x2(svuint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) +svuint16_t svqrshrun_n_u16_s32_x2(svint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) +svuint8_t svrevd_u8_m(svuint8_t, svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) +svuint32_t svrevd_u32_m(svuint32_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) +svuint64_t svrevd_u64_m(svuint64_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) +svuint16_t svrevd_u16_m(svuint16_t, svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) +svbfloat16_t svrevd_bf16_m(svbfloat16_t, svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) +svint8_t svrevd_s8_m(svint8_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) +svfloat64_t svrevd_f64_m(svfloat64_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) +svfloat32_t svrevd_f32_m(svfloat32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) +svfloat16_t svrevd_f16_m(svfloat16_t, svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) +svint32_t svrevd_s32_m(svint32_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) +svint64_t svrevd_s64_m(svint64_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) +svint16_t svrevd_s16_m(svint16_t, svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) +svuint8_t svrevd_u8_x(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) +svuint32_t svrevd_u32_x(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) +svuint64_t svrevd_u64_x(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) +svuint16_t svrevd_u16_x(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) +svbfloat16_t svrevd_bf16_x(svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) +svint8_t svrevd_s8_x(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) +svfloat64_t svrevd_f64_x(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) +svfloat32_t svrevd_f32_x(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) +svfloat16_t svrevd_f16_x(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) +svint32_t svrevd_s32_x(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) +svint64_t svrevd_s64_x(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) +svint16_t svrevd_s16_x(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) +svuint8_t svrevd_u8_z(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) +svuint32_t svrevd_u32_z(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) +svuint64_t svrevd_u64_z(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) +svuint16_t svrevd_u16_z(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) +svbfloat16_t svrevd_bf16_z(svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) +svint8_t svrevd_s8_z(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) +svfloat64_t svrevd_f64_z(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) +svfloat32_t svrevd_f32_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) +svfloat16_t svrevd_f16_z(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) +svint32_t svrevd_s32_z(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) +svint64_t svrevd_s64_z(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) +svint16_t svrevd_s16_z(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) +svboolx2_t svset2_b(svboolx2_t, uint64_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) +svboolx4_t svset4_b(svboolx4_t, uint64_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) +void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) +void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) +void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) +void svst1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) +void svst1_s64_x2(svcount_t, int64_t *, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) +void svst1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) +void svst1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) +void svst1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) +void svst1_s16_x2(svcount_t, int16_t *, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) +void svst1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) +void svst1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) +void svst1_s32_x2(svcount_t, int32_t *, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) +void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) +void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) +void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) +void svst1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) +void svst1_s64_x4(svcount_t, int64_t *, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) +void svst1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) +void svst1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) +void svst1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) +void svst1_s16_x4(svcount_t, int16_t *, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) +void svst1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) +void svst1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) +void svst1_s32_x4(svcount_t, int32_t *, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) +void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) +void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) +void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) +void svst1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) +void svst1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) +void svst1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) +void svst1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) +void svst1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) +void svst1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) +void svst1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) +void svst1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) +void svst1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) +void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) +void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) +void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) +void svst1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) +void svst1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) +void svst1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) +void svst1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) +void svst1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) +void svst1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) +void svst1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) +void svst1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) +void svst1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) +void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) +void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) +void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) +void svstnt1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) +void svstnt1_s64_x2(svcount_t, int64_t *, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) +void svstnt1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) +void svstnt1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) +void svstnt1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) +void svstnt1_s16_x2(svcount_t, int16_t *, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) +void svstnt1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) +void svstnt1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) +void svstnt1_s32_x2(svcount_t, int32_t *, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) +void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) +void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) +void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) +void svstnt1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) +void svstnt1_s64_x4(svcount_t, int64_t *, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) +void svstnt1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) +void svstnt1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) +void svstnt1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) +void svstnt1_s16_x4(svcount_t, int16_t *, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) +void svstnt1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) +void svstnt1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) +void svstnt1_s32_x4(svcount_t, int32_t *, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) +void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) +void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) +void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) +void svstnt1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) +void svstnt1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) +void svstnt1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) +void svstnt1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) +void svstnt1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) +void svstnt1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) +void svstnt1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) +void svstnt1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) +void svstnt1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) +void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) +void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) +void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) +void svstnt1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) +void svstnt1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) +void svstnt1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) +void svstnt1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) +void svstnt1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) +void svstnt1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) +void svstnt1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) +void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) +void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_b))) +svboolx2_t svundef2_b(); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_b))) +svboolx4_t svundef4_b(); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) +svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) +svcount_t svwhilege_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) +svcount_t svwhilege_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) +svcount_t svwhilege_c16_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) +svcount_t svwhilege_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) +svcount_t svwhilege_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) +svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) +svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) +svboolx2_t svwhilege_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) +svboolx2_t svwhilege_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) +svboolx2_t svwhilege_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) +svboolx2_t svwhilege_b16_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) +svboolx2_t svwhilege_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) +svboolx2_t svwhilege_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) +svboolx2_t svwhilege_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) +svboolx2_t svwhilege_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) +svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) +svcount_t svwhilegt_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) +svcount_t svwhilegt_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) +svcount_t svwhilegt_c16_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) +svcount_t svwhilegt_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) +svcount_t svwhilegt_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) +svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) +svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) +svboolx2_t svwhilegt_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) +svboolx2_t svwhilegt_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) +svboolx2_t svwhilegt_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) +svboolx2_t svwhilegt_b16_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) +svboolx2_t svwhilegt_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) +svboolx2_t svwhilegt_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) +svboolx2_t svwhilegt_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) +svboolx2_t svwhilegt_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) +svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) +svcount_t svwhilele_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) +svcount_t svwhilele_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) +svcount_t svwhilele_c16_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) +svcount_t svwhilele_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) +svcount_t svwhilele_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) +svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) +svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) +svboolx2_t svwhilele_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) +svboolx2_t svwhilele_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) +svboolx2_t svwhilele_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) +svboolx2_t svwhilele_b16_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) +svboolx2_t svwhilele_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) +svboolx2_t svwhilele_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) +svboolx2_t svwhilele_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) +svboolx2_t svwhilele_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) +svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) +svcount_t svwhilelt_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) +svcount_t svwhilelt_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) +svcount_t svwhilelt_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) +svcount_t svwhilelt_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) +svcount_t svwhilelt_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) +svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) +svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) +svboolx2_t svwhilelt_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) +svboolx2_t svwhilelt_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) +svboolx2_t svwhilelt_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) +svboolx2_t svwhilelt_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) +svboolx2_t svwhilelt_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) +svboolx2_t svwhilelt_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) +svboolx2_t svwhilelt_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) +svboolx2_t svwhilelt_b16_s64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) +svfloat32_t svbfmlslb(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) +svfloat32_t svbfmlslb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) +svfloat32_t svbfmlslt(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) +svfloat32_t svbfmlslt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) svfloat64_t svclamp(svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) @@ -23929,6 +30350,620 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) svuint64_t svclamp(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) svuint16_t svclamp(svuint16_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) +svboolx2_t svcreate2(svbool_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) +svboolx4_t svcreate4(svbool_t, svbool_t, svbool_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) +svfloat32_t svdot(svfloat32_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) +svint32_t svdot(svint32_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) +svuint32_t svdot(svuint32_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) +svfloat32_t svdot_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) +svint32_t svdot_lane(svint32_t, svint16_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) +svuint32_t svdot_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) +svbool_t svget2(svboolx2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) +svbool_t svget4(svboolx4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) +svuint8x2_t svld1_x2(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) +svint8x2_t svld1_x2(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) +svuint64x2_t svld1_x2(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) +svfloat64x2_t svld1_x2(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) +svint64x2_t svld1_x2(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) +svuint16x2_t svld1_x2(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) +svbfloat16x2_t svld1_x2(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) +svfloat16x2_t svld1_x2(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) +svint16x2_t svld1_x2(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) +svuint32x2_t svld1_x2(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) +svfloat32x2_t svld1_x2(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) +svint32x2_t svld1_x2(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) +svuint8x4_t svld1_x4(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) +svint8x4_t svld1_x4(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) +svuint64x4_t svld1_x4(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) +svfloat64x4_t svld1_x4(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) +svint64x4_t svld1_x4(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) +svuint16x4_t svld1_x4(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) +svbfloat16x4_t svld1_x4(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) +svfloat16x4_t svld1_x4(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) +svint16x4_t svld1_x4(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) +svuint32x4_t svld1_x4(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) +svfloat32x4_t svld1_x4(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) +svint32x4_t svld1_x4(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) +svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) +svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) +svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) +svfloat64x2_t svld1_vnum_x2(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) +svint64x2_t svld1_vnum_x2(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) +svuint16x2_t svld1_vnum_x2(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) +svbfloat16x2_t svld1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) +svfloat16x2_t svld1_vnum_x2(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) +svint16x2_t svld1_vnum_x2(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) +svuint32x2_t svld1_vnum_x2(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) +svfloat32x2_t svld1_vnum_x2(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) +svint32x2_t svld1_vnum_x2(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) +svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) +svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) +svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) +svfloat64x4_t svld1_vnum_x4(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) +svint64x4_t svld1_vnum_x4(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) +svuint16x4_t svld1_vnum_x4(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) +svbfloat16x4_t svld1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) +svfloat16x4_t svld1_vnum_x4(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) +svint16x4_t svld1_vnum_x4(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) +svuint32x4_t svld1_vnum_x4(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) +svfloat32x4_t svld1_vnum_x4(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) +svint32x4_t svld1_vnum_x4(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) +svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) +svint8x2_t svldnt1_x2(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) +svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) +svfloat64x2_t svldnt1_x2(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) +svint64x2_t svldnt1_x2(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) +svuint16x2_t svldnt1_x2(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) +svbfloat16x2_t svldnt1_x2(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) +svfloat16x2_t svldnt1_x2(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) +svint16x2_t svldnt1_x2(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) +svuint32x2_t svldnt1_x2(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) +svfloat32x2_t svldnt1_x2(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) +svint32x2_t svldnt1_x2(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) +svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) +svint8x4_t svldnt1_x4(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) +svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) +svfloat64x4_t svldnt1_x4(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) +svint64x4_t svldnt1_x4(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) +svuint16x4_t svldnt1_x4(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) +svbfloat16x4_t svldnt1_x4(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) +svfloat16x4_t svldnt1_x4(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) +svint16x4_t svldnt1_x4(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) +svuint32x4_t svldnt1_x4(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) +svfloat32x4_t svldnt1_x4(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) +svint32x4_t svldnt1_x4(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) +svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) +svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) +svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) +svfloat64x2_t svldnt1_vnum_x2(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) +svint64x2_t svldnt1_vnum_x2(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) +svuint16x2_t svldnt1_vnum_x2(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) +svbfloat16x2_t svldnt1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) +svfloat16x2_t svldnt1_vnum_x2(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) +svint16x2_t svldnt1_vnum_x2(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) +svuint32x2_t svldnt1_vnum_x2(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) +svfloat32x2_t svldnt1_vnum_x2(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) +svint32x2_t svldnt1_vnum_x2(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) +svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) +svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) +svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) +svfloat64x4_t svldnt1_vnum_x4(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) +svint64x4_t svldnt1_vnum_x4(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) +svuint16x4_t svldnt1_vnum_x4(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) +svbfloat16x4_t svldnt1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) +svfloat16x4_t svldnt1_vnum_x4(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) +svint16x4_t svldnt1_vnum_x4(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) +svuint32x4_t svldnt1_vnum_x4(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) +svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) +svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) +svint16_t svqrshrn_s16(svint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) +svuint16_t svqrshrn_u16(svuint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) +svuint16_t svqrshrun_u16(svint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) +svuint8_t svrevd_m(svuint8_t, svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) +svuint32_t svrevd_m(svuint32_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) +svuint64_t svrevd_m(svuint64_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) +svuint16_t svrevd_m(svuint16_t, svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) +svbfloat16_t svrevd_m(svbfloat16_t, svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) +svint8_t svrevd_m(svint8_t, svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) +svfloat64_t svrevd_m(svfloat64_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) +svfloat32_t svrevd_m(svfloat32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) +svfloat16_t svrevd_m(svfloat16_t, svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) +svint32_t svrevd_m(svint32_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) +svint64_t svrevd_m(svint64_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) +svint16_t svrevd_m(svint16_t, svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) +svuint8_t svrevd_x(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) +svuint32_t svrevd_x(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) +svuint64_t svrevd_x(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) +svuint16_t svrevd_x(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) +svbfloat16_t svrevd_x(svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) +svint8_t svrevd_x(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) +svfloat64_t svrevd_x(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) +svfloat32_t svrevd_x(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) +svfloat16_t svrevd_x(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) +svint32_t svrevd_x(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) +svint64_t svrevd_x(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) +svint16_t svrevd_x(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) +svuint8_t svrevd_z(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) +svuint32_t svrevd_z(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) +svuint64_t svrevd_z(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) +svuint16_t svrevd_z(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) +svbfloat16_t svrevd_z(svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) +svint8_t svrevd_z(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) +svfloat64_t svrevd_z(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) +svfloat32_t svrevd_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) +svfloat16_t svrevd_z(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) +svint32_t svrevd_z(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) +svint64_t svrevd_z(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) +svint16_t svrevd_z(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) +svboolx2_t svset2(svboolx2_t, uint64_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) +svboolx4_t svset4(svboolx4_t, uint64_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) +void svst1(svcount_t, uint8_t *, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) +void svst1(svcount_t, int8_t *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) +void svst1(svcount_t, uint64_t *, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) +void svst1(svcount_t, float64_t *, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) +void svst1(svcount_t, int64_t *, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) +void svst1(svcount_t, uint16_t *, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) +void svst1(svcount_t, bfloat16_t *, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) +void svst1(svcount_t, float16_t *, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) +void svst1(svcount_t, int16_t *, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) +void svst1(svcount_t, uint32_t *, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) +void svst1(svcount_t, float32_t *, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) +void svst1(svcount_t, int32_t *, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) +void svst1(svcount_t, uint8_t *, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) +void svst1(svcount_t, int8_t *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) +void svst1(svcount_t, uint64_t *, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) +void svst1(svcount_t, float64_t *, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) +void svst1(svcount_t, int64_t *, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) +void svst1(svcount_t, uint16_t *, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) +void svst1(svcount_t, bfloat16_t *, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) +void svst1(svcount_t, float16_t *, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) +void svst1(svcount_t, int16_t *, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) +void svst1(svcount_t, uint32_t *, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) +void svst1(svcount_t, float32_t *, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) +void svst1(svcount_t, int32_t *, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) +void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) +void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) +void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) +void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) +void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) +void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) +void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) +void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) +void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) +void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) +void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) +void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) +void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) +void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) +void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) +void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) +void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) +void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) +void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) +void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) +void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) +void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) +void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) +void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) +void svstnt1(svcount_t, uint8_t *, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) +void svstnt1(svcount_t, int8_t *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) +void svstnt1(svcount_t, uint64_t *, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) +void svstnt1(svcount_t, float64_t *, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) +void svstnt1(svcount_t, int64_t *, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) +void svstnt1(svcount_t, uint16_t *, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) +void svstnt1(svcount_t, bfloat16_t *, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) +void svstnt1(svcount_t, float16_t *, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) +void svstnt1(svcount_t, int16_t *, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) +void svstnt1(svcount_t, uint32_t *, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) +void svstnt1(svcount_t, float32_t *, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) +void svstnt1(svcount_t, int32_t *, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) +void svstnt1(svcount_t, uint8_t *, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) +void svstnt1(svcount_t, int8_t *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) +void svstnt1(svcount_t, uint64_t *, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) +void svstnt1(svcount_t, float64_t *, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) +void svstnt1(svcount_t, int64_t *, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) +void svstnt1(svcount_t, uint16_t *, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) +void svstnt1(svcount_t, bfloat16_t *, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) +void svstnt1(svcount_t, float16_t *, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) +void svstnt1(svcount_t, int16_t *, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) +void svstnt1(svcount_t, uint32_t *, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) +void svstnt1(svcount_t, float32_t *, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) +void svstnt1(svcount_t, int32_t *, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) +void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) +void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) +void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) +void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) +void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) +void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) +void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) +void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) +void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) +void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) +void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) +void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) +void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) +void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) +void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) +void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) +void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) +void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) +void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) +void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) +void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) +void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) +void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) +void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) +svcount_t svwhilege_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) +svcount_t svwhilege_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) +svcount_t svwhilege_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) +svcount_t svwhilege_c16(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) +svcount_t svwhilege_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) +svcount_t svwhilege_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) +svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) +svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) +svboolx2_t svwhilege_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) +svboolx2_t svwhilege_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) +svboolx2_t svwhilege_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) +svboolx2_t svwhilege_b16_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) +svboolx2_t svwhilege_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) +svboolx2_t svwhilege_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) +svboolx2_t svwhilege_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) +svboolx2_t svwhilege_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) +svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) +svcount_t svwhilegt_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) +svcount_t svwhilegt_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) +svcount_t svwhilegt_c16(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) +svcount_t svwhilegt_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) +svcount_t svwhilegt_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) +svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) +svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) +svboolx2_t svwhilegt_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) +svboolx2_t svwhilegt_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) +svboolx2_t svwhilegt_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) +svboolx2_t svwhilegt_b16_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) +svboolx2_t svwhilegt_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) +svboolx2_t svwhilegt_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) +svboolx2_t svwhilegt_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) +svboolx2_t svwhilegt_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) +svcount_t svwhilele_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) +svcount_t svwhilele_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) +svcount_t svwhilele_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) +svcount_t svwhilele_c16(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) +svcount_t svwhilele_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) +svcount_t svwhilele_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) +svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) +svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) +svboolx2_t svwhilele_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) +svboolx2_t svwhilele_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) +svboolx2_t svwhilele_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) +svboolx2_t svwhilele_b16_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) +svboolx2_t svwhilele_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) +svboolx2_t svwhilele_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) +svboolx2_t svwhilele_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) +svboolx2_t svwhilele_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) +svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) +svcount_t svwhilelt_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) +svcount_t svwhilelt_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) +svcount_t svwhilelt_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) +svcount_t svwhilelt_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) +svcount_t svwhilelt_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) +svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) +svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) +svboolx2_t svwhilelt_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) +svboolx2_t svwhilelt_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) +svboolx2_t svwhilelt_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) +svboolx2_t svwhilelt_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) +svboolx2_t svwhilelt_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) +svboolx2_t svwhilelt_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) +svboolx2_t svwhilelt_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) +svboolx2_t svwhilelt_b16_x2(int64_t, int64_t); #define svcvtnt_bf16_x svcvtnt_bf16_m #define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m #define svcvtnt_f16_x svcvtnt_f16_m diff --git a/lib/include/arm_vector_types.h b/lib/include/arm_vector_types.h new file mode 100644 index 0000000000..b0dd66b07f --- /dev/null +++ b/lib/include/arm_vector_types.h @@ -0,0 +1,345 @@ +/*===---- arm_vector_types - ARM vector type ------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H) +#error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead" + +#endif +#ifndef __ARM_NEON_TYPES_H +#define __ARM_NEON_TYPES_H +typedef float float32_t; +typedef __fp16 float16_t; +#ifdef __aarch64__ +typedef double float64_t; +#endif + +typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; +typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; +typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; +typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; +typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; +typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; +typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; +typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; +typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; +typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; +typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; +typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; +typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; +typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; +typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; +typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; +typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; +typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; +typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; +typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; +#ifdef __aarch64__ +typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t; +typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t; +#endif + +typedef struct int8x8x2_t { + int8x8_t val[2]; +} int8x8x2_t; + +typedef struct int8x16x2_t { + int8x16_t val[2]; +} int8x16x2_t; + +typedef struct int16x4x2_t { + int16x4_t val[2]; +} int16x4x2_t; + +typedef struct int16x8x2_t { + int16x8_t val[2]; +} int16x8x2_t; + +typedef struct int32x2x2_t { + int32x2_t val[2]; +} int32x2x2_t; + +typedef struct int32x4x2_t { + int32x4_t val[2]; +} int32x4x2_t; + +typedef struct int64x1x2_t { + int64x1_t val[2]; +} int64x1x2_t; + +typedef struct int64x2x2_t { + int64x2_t val[2]; +} int64x2x2_t; + +typedef struct uint8x8x2_t { + uint8x8_t val[2]; +} uint8x8x2_t; + +typedef struct uint8x16x2_t { + uint8x16_t val[2]; +} uint8x16x2_t; + +typedef struct uint16x4x2_t { + uint16x4_t val[2]; +} uint16x4x2_t; + +typedef struct uint16x8x2_t { + uint16x8_t val[2]; +} uint16x8x2_t; + +typedef struct uint32x2x2_t { + uint32x2_t val[2]; +} uint32x2x2_t; + +typedef struct uint32x4x2_t { + uint32x4_t val[2]; +} uint32x4x2_t; + +typedef struct uint64x1x2_t { + uint64x1_t val[2]; +} uint64x1x2_t; + +typedef struct uint64x2x2_t { + uint64x2_t val[2]; +} uint64x2x2_t; + +typedef struct float16x4x2_t { + float16x4_t val[2]; +} float16x4x2_t; + +typedef struct float16x8x2_t { + float16x8_t val[2]; +} float16x8x2_t; + +typedef struct float32x2x2_t { + float32x2_t val[2]; +} float32x2x2_t; + +typedef struct float32x4x2_t { + float32x4_t val[2]; +} float32x4x2_t; + +#ifdef __aarch64__ +typedef struct float64x1x2_t { + float64x1_t val[2]; +} float64x1x2_t; + +typedef struct float64x2x2_t { + float64x2_t val[2]; +} float64x2x2_t; + +#endif +typedef struct int8x8x3_t { + int8x8_t val[3]; +} int8x8x3_t; + +typedef struct int8x16x3_t { + int8x16_t val[3]; +} int8x16x3_t; + +typedef struct int16x4x3_t { + int16x4_t val[3]; +} int16x4x3_t; + +typedef struct int16x8x3_t { + int16x8_t val[3]; +} int16x8x3_t; + +typedef struct int32x2x3_t { + int32x2_t val[3]; +} int32x2x3_t; + +typedef struct int32x4x3_t { + int32x4_t val[3]; +} int32x4x3_t; + +typedef struct int64x1x3_t { + int64x1_t val[3]; +} int64x1x3_t; + +typedef struct int64x2x3_t { + int64x2_t val[3]; +} int64x2x3_t; + +typedef struct uint8x8x3_t { + uint8x8_t val[3]; +} uint8x8x3_t; + +typedef struct uint8x16x3_t { + uint8x16_t val[3]; +} uint8x16x3_t; + +typedef struct uint16x4x3_t { + uint16x4_t val[3]; +} uint16x4x3_t; + +typedef struct uint16x8x3_t { + uint16x8_t val[3]; +} uint16x8x3_t; + +typedef struct uint32x2x3_t { + uint32x2_t val[3]; +} uint32x2x3_t; + +typedef struct uint32x4x3_t { + uint32x4_t val[3]; +} uint32x4x3_t; + +typedef struct uint64x1x3_t { + uint64x1_t val[3]; +} uint64x1x3_t; + +typedef struct uint64x2x3_t { + uint64x2_t val[3]; +} uint64x2x3_t; + +typedef struct float16x4x3_t { + float16x4_t val[3]; +} float16x4x3_t; + +typedef struct float16x8x3_t { + float16x8_t val[3]; +} float16x8x3_t; + +typedef struct float32x2x3_t { + float32x2_t val[3]; +} float32x2x3_t; + +typedef struct float32x4x3_t { + float32x4_t val[3]; +} float32x4x3_t; + +#ifdef __aarch64__ +typedef struct float64x1x3_t { + float64x1_t val[3]; +} float64x1x3_t; + +typedef struct float64x2x3_t { + float64x2_t val[3]; +} float64x2x3_t; + +#endif +typedef struct int8x8x4_t { + int8x8_t val[4]; +} int8x8x4_t; + +typedef struct int8x16x4_t { + int8x16_t val[4]; +} int8x16x4_t; + +typedef struct int16x4x4_t { + int16x4_t val[4]; +} int16x4x4_t; + +typedef struct int16x8x4_t { + int16x8_t val[4]; +} int16x8x4_t; + +typedef struct int32x2x4_t { + int32x2_t val[4]; +} int32x2x4_t; + +typedef struct int32x4x4_t { + int32x4_t val[4]; +} int32x4x4_t; + +typedef struct int64x1x4_t { + int64x1_t val[4]; +} int64x1x4_t; + +typedef struct int64x2x4_t { + int64x2_t val[4]; +} int64x2x4_t; + +typedef struct uint8x8x4_t { + uint8x8_t val[4]; +} uint8x8x4_t; + +typedef struct uint8x16x4_t { + uint8x16_t val[4]; +} uint8x16x4_t; + +typedef struct uint16x4x4_t { + uint16x4_t val[4]; +} uint16x4x4_t; + +typedef struct uint16x8x4_t { + uint16x8_t val[4]; +} uint16x8x4_t; + +typedef struct uint32x2x4_t { + uint32x2_t val[4]; +} uint32x2x4_t; + +typedef struct uint32x4x4_t { + uint32x4_t val[4]; +} uint32x4x4_t; + +typedef struct uint64x1x4_t { + uint64x1_t val[4]; +} uint64x1x4_t; + +typedef struct uint64x2x4_t { + uint64x2_t val[4]; +} uint64x2x4_t; + +typedef struct float16x4x4_t { + float16x4_t val[4]; +} float16x4x4_t; + +typedef struct float16x8x4_t { + float16x8_t val[4]; +} float16x8x4_t; + +typedef struct float32x2x4_t { + float32x2_t val[4]; +} float32x2x4_t; + +typedef struct float32x4x4_t { + float32x4_t val[4]; +} float32x4x4_t; + +#ifdef __aarch64__ +typedef struct float64x1x4_t { + float64x1_t val[4]; +} float64x1x4_t; + +typedef struct float64x2x4_t { + float64x2_t val[4]; +} float64x2x4_t; + +#endif +typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t; +typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t; + +typedef struct bfloat16x4x2_t { + bfloat16x4_t val[2]; +} bfloat16x4x2_t; + +typedef struct bfloat16x8x2_t { + bfloat16x8_t val[2]; +} bfloat16x8x2_t; + +typedef struct bfloat16x4x3_t { + bfloat16x4_t val[3]; +} bfloat16x4x3_t; + +typedef struct bfloat16x8x3_t { + bfloat16x8_t val[3]; +} bfloat16x8x3_t; + +typedef struct bfloat16x4x4_t { + bfloat16x4_t val[4]; +} bfloat16x4x4_t; + +typedef struct bfloat16x8x4_t { + bfloat16x8_t val[4]; +} bfloat16x8x4_t; + +#endif // __ARM_NEON_TYPES_H diff --git a/lib/include/avx2intrin.h b/lib/include/avx2intrin.h index 8f2de05674..096cae01b5 100644 --- a/lib/include/avx2intrin.h +++ b/lib/include/avx2intrin.h @@ -15,8 +15,12 @@ #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256))) -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx2,no-evex512"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx2,no-evex512"), __min_vector_width__(128))) /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Computes sixteen sum of absolute difference (SAD) operations on sets of @@ -1307,6 +1311,23 @@ _mm256_min_epu32(__m256i __a, __m256i __b) return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b); } +/// Creates a 32-bit integer mask from the most significant bit of each byte +/// in the 256-bit integer vector in \a __a and returns the result. +/// +/// \code{.operation} +/// FOR i := 0 TO 31 +/// j := i*8 +/// result[i] := __a[j+7] +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VPMOVMSKB instruction. +/// +/// \param __a +/// A 256-bit integer vector containing the source bytes. +/// \returns The 32-bit integer mask. static __inline__ int __DEFAULT_FN_ATTRS256 _mm256_movemask_epi8(__m256i __a) { @@ -2962,7 +2983,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) /// A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(const void *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/lib/include/avx512bf16intrin.h b/lib/include/avx512bf16intrin.h index a864c1e335..b28d2e243f 100644 --- a/lib/include/avx512bf16intrin.h +++ b/lib/include/avx512bf16intrin.h @@ -20,10 +20,11 @@ typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead"))); #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \ __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bf16,no-evex512"))) /// Convert One BF16 Data to One Single Float Data. /// diff --git a/lib/include/avx512bitalgintrin.h b/lib/include/avx512bitalgintrin.h index d4411d156b..bad265ceb7 100644 --- a/lib/include/avx512bitalgintrin.h +++ b/lib/include/avx512bitalgintrin.h @@ -15,7 +15,10 @@ #define __AVX512BITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bitalg,evex512"), \ + __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) diff --git a/lib/include/avx512bwintrin.h b/lib/include/avx512bwintrin.h index aaeb936480..c854720de6 100644 --- a/lib/include/avx512bwintrin.h +++ b/lib/include/avx512bwintrin.h @@ -18,8 +18,12 @@ typedef unsigned int __mmask32; typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,evex512"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,no-evex512"))) static __inline __mmask32 __DEFAULT_FN_ATTRS _knot_mask32(__mmask32 __M) @@ -27,9 +31,7 @@ _knot_mask32(__mmask32 __M) return __builtin_ia32_knotsi(__M); } -static __inline __mmask64 __DEFAULT_FN_ATTRS -_knot_mask64(__mmask64 __M) -{ +static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { return __builtin_ia32_knotdi(__M); } @@ -39,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kand_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } @@ -51,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kandn_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } @@ -63,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kor_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } @@ -75,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kxnor_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); } @@ -87,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kxor_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); } @@ -112,14 +109,12 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } @@ -148,14 +143,12 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); } static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) -{ +_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } @@ -171,9 +164,8 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B) return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_kadd_mask64(__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, + __mmask64 __B) { return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); } @@ -214,8 +206,7 @@ _load_mask32(__mmask32 *__A) { return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_load_mask64(__mmask64 *__A) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) { return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); } @@ -224,8 +215,8 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) { *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); } -static __inline__ void __DEFAULT_FN_ATTRS -_store_mask64(__mmask64 *__A, __mmask64 __B) { +static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, + __mmask64 __B) { *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); } @@ -1714,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) (__v64qi) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_mm512_kunpackd (__mmask64 __A, __mmask64 __B) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, + __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } diff --git a/lib/include/avx512cdintrin.h b/lib/include/avx512cdintrin.h index bfdba84aa2..33b552f6fe 100644 --- a/lib/include/avx512cdintrin.h +++ b/lib/include/avx512cdintrin.h @@ -15,7 +15,9 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512cd,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi64 (__m512i __A) diff --git a/lib/include/avx512dqintrin.h b/lib/include/avx512dqintrin.h index 3ba0a0cfd5..88b48e3a32 100644 --- a/lib/include/avx512dqintrin.h +++ b/lib/include/avx512dqintrin.h @@ -15,8 +15,10 @@ #define __AVX512DQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) +#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512dq,no-evex512"))) static __inline __mmask8 __DEFAULT_FN_ATTRS _knot_mask8(__mmask8 __M) diff --git a/lib/include/avx512fintrin.h b/lib/include/avx512fintrin.h index 88a8cebbee..4f172c74b3 100644 --- a/lib/include/avx512fintrin.h +++ b/lib/include/avx512fintrin.h @@ -167,9 +167,13 @@ typedef enum } _MM_MANTISSA_SIGN_ENUM; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) +#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512f,no-evex512"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512f,no-evex512"))) /* Create vectors with repeated elements */ diff --git a/lib/include/avx512fp16intrin.h b/lib/include/avx512fp16intrin.h index d326586578..4123f10c39 100644 --- a/lib/include/avx512fp16intrin.h +++ b/lib/include/avx512fp16intrin.h @@ -22,13 +22,15 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \ - __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512fp16,evex512"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512fp16,no-evex512"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512fp16,no-evex512"), \ __min_vector_width__(128))) static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) { diff --git a/lib/include/avx512ifmaintrin.h b/lib/include/avx512ifmaintrin.h index 5f7da52f1f..9468d17556 100644 --- a/lib/include/avx512ifmaintrin.h +++ b/lib/include/avx512ifmaintrin.h @@ -15,7 +15,9 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) diff --git a/lib/include/avx512ifmavlintrin.h b/lib/include/avx512ifmavlintrin.h index 3284ee1820..8787cd471d 100644 --- a/lib/include/avx512ifmavlintrin.h +++ b/lib/include/avx512ifmavlintrin.h @@ -15,8 +15,14 @@ #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl,no-evex512"), \ + __min_vector_width__(256))) #define _mm_madd52hi_epu64(X, Y, Z) \ ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ diff --git a/lib/include/avx512pfintrin.h b/lib/include/avx512pfintrin.h index b8bcf49c6b..f853be021a 100644 --- a/lib/include/avx512pfintrin.h +++ b/lib/include/avx512pfintrin.h @@ -14,9 +14,6 @@ #ifndef __AVX512PFINTRIN_H #define __AVX512PFINTRIN_H -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf"))) - #define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \ __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ (void const *)(addr), (int)(scale), \ @@ -92,6 +89,4 @@ __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ (void *)(addr), (int)(scale), (int)(hint)) -#undef __DEFAULT_FN_ATTRS - #endif diff --git a/lib/include/avx512vbmi2intrin.h b/lib/include/avx512vbmi2intrin.h index 17fa77722c..11598c8887 100644 --- a/lib/include/avx512vbmi2intrin.h +++ b/lib/include/avx512vbmi2intrin.h @@ -15,7 +15,7 @@ #define __AVX512VBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512vbmiintrin.h b/lib/include/avx512vbmiintrin.h index c0e0f94d48..e47cd5cadd 100644 --- a/lib/include/avx512vbmiintrin.h +++ b/lib/include/avx512vbmiintrin.h @@ -15,8 +15,9 @@ #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512))) - +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) diff --git a/lib/include/avx512vbmivlintrin.h b/lib/include/avx512vbmivlintrin.h index c5b96ae8ad..848ca2d18c 100644 --- a/lib/include/avx512vbmivlintrin.h +++ b/lib/include/avx512vbmivlintrin.h @@ -15,9 +15,14 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) - +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl,no-evex512"), \ + __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) diff --git a/lib/include/avx512vlbf16intrin.h b/lib/include/avx512vlbf16intrin.h index f5b8911fac..89c9f49c7a 100644 --- a/lib/include/avx512vlbf16intrin.h +++ b/lib/include/avx512vlbf16intrin.h @@ -15,12 +15,14 @@ #ifndef __AVX512VLBF16INTRIN_H #define __AVX512VLBF16INTRIN_H -#define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl, avx512bf16"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl, avx512bf16"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bf16,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bf16,no-evex512"), \ + __min_vector_width__(256))) /// Convert Two Packed Single Data to One Packed BF16 Data. /// diff --git a/lib/include/avx512vlbitalgintrin.h b/lib/include/avx512vlbitalgintrin.h index 5154eae14c..377e3a5ea5 100644 --- a/lib/include/avx512vlbitalgintrin.h +++ b/lib/include/avx512vlbitalgintrin.h @@ -15,8 +15,14 @@ #define __AVX512VLBITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg,no-evex512"), \ + __min_vector_width__(256))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi16(__m256i __A) diff --git a/lib/include/avx512vlbwintrin.h b/lib/include/avx512vlbwintrin.h index 148af5ab9a..9aedba0669 100644 --- a/lib/include/avx512vlbwintrin.h +++ b/lib/include/avx512vlbwintrin.h @@ -15,8 +15,14 @@ #define __AVX512VLBWINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bw,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bw,no-evex512"), \ + __min_vector_width__(256))) /* Integer compare */ diff --git a/lib/include/avx512vlcdintrin.h b/lib/include/avx512vlcdintrin.h index cc8b72528d..923e2c551a 100644 --- a/lib/include/avx512vlcdintrin.h +++ b/lib/include/avx512vlcdintrin.h @@ -14,9 +14,14 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256))) - +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd,no-evex512"), \ + __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmb_epi64 (__mmask8 __A) diff --git a/lib/include/avx512vldqintrin.h b/lib/include/avx512vldqintrin.h index 713e1a18a1..272cdd89e2 100644 --- a/lib/include/avx512vldqintrin.h +++ b/lib/include/avx512vldqintrin.h @@ -15,8 +15,14 @@ #define __AVX512VLDQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512dq,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512dq,no-evex512"), \ + __min_vector_width__(256))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { diff --git a/lib/include/avx512vlfp16intrin.h b/lib/include/avx512vlfp16intrin.h index d4a7d1b1c5..a12acb7d9a 100644 --- a/lib/include/avx512vlfp16intrin.h +++ b/lib/include/avx512vlfp16intrin.h @@ -19,11 +19,11 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512fp16, avx512vl"), \ + __target__("avx512fp16,avx512vl,no-evex512"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512fp16, avx512vl"), \ + __target__("avx512fp16,avx512vl,no-evex512"), \ __min_vector_width__(128))) static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) { diff --git a/lib/include/avx512vlintrin.h b/lib/include/avx512vlintrin.h index 3e8355f145..2a5f7b43f6 100644 --- a/lib/include/avx512vlintrin.h +++ b/lib/include/avx512vlintrin.h @@ -14,8 +14,14 @@ #ifndef __AVX512VLINTRIN_H #define __AVX512VLINTRIN_H -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,no-evex512"), \ + __min_vector_width__(256))) typedef short __v2hi __attribute__((__vector_size__(4))); typedef char __v4qi __attribute__((__vector_size__(4))); diff --git a/lib/include/avx512vlvbmi2intrin.h b/lib/include/avx512vlvbmi2intrin.h index fac1f23241..77af2d5cbd 100644 --- a/lib/include/avx512vlvbmi2intrin.h +++ b/lib/include/avx512vlvbmi2intrin.h @@ -15,8 +15,14 @@ #define __AVX512VLVBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vbmi2,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vbmi2,no-evex512"), \ + __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) diff --git a/lib/include/avx512vlvnniintrin.h b/lib/include/avx512vlvnniintrin.h index 8bc0694e64..d1e5cd9d69 100644 --- a/lib/include/avx512vlvnniintrin.h +++ b/lib/include/avx512vlvnniintrin.h @@ -15,8 +15,14 @@ #define __AVX512VLVNNIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vnni,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vnni,no-evex512"), \ + __min_vector_width__(256))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed diff --git a/lib/include/avx512vlvp2intersectintrin.h b/lib/include/avx512vlvp2intersectintrin.h index 3e0815e5d4..63a31241a5 100644 --- a/lib/include/avx512vlvp2intersectintrin.h +++ b/lib/include/avx512vlvp2intersectintrin.h @@ -28,12 +28,14 @@ #ifndef _AVX512VLVP2INTERSECT_H #define _AVX512VLVP2INTERSECT_H -#define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vp2intersect,no-evex512"), \ __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \ +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vp2intersect,no-evex512"), \ __min_vector_width__(256))) /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between dwords in operands __a and __b. diff --git a/lib/include/avx512vnniintrin.h b/lib/include/avx512vnniintrin.h index 9935a119aa..0fb381a12f 100644 --- a/lib/include/avx512vnniintrin.h +++ b/lib/include/avx512vnniintrin.h @@ -15,8 +15,9 @@ #define __AVX512VNNIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512))) - +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vnni,evex512"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) diff --git a/lib/include/avx512vp2intersectintrin.h b/lib/include/avx512vp2intersectintrin.h index 5d3cb48cfd..16552cae3b 100644 --- a/lib/include/avx512vp2intersectintrin.h +++ b/lib/include/avx512vp2intersectintrin.h @@ -28,8 +28,9 @@ #ifndef _AVX512VP2INTERSECT_H #define _AVX512VP2INTERSECT_H -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vp2intersect,evex512"), \ __min_vector_width__(512))) /// Store, in an even/odd pair of mask registers, the indicators of the diff --git a/lib/include/avx512vpopcntdqintrin.h b/lib/include/avx512vpopcntdqintrin.h index bb435e6233..e73e7e4f71 100644 --- a/lib/include/avx512vpopcntdqintrin.h +++ b/lib/include/avx512vpopcntdqintrin.h @@ -17,7 +17,9 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,evex512"), \ + __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); diff --git a/lib/include/avx512vpopcntdqvlintrin.h b/lib/include/avx512vpopcntdqvlintrin.h index a3cb9b6bcc..b2df2e84d3 100644 --- a/lib/include/avx512vpopcntdqvlintrin.h +++ b/lib/include/avx512vpopcntdqvlintrin.h @@ -17,9 +17,13 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi64(__m128i __A) { diff --git a/lib/include/avxintrin.h b/lib/include/avxintrin.h index 94fac5e6c9..f116d8bc3a 100644 --- a/lib/include/avxintrin.h +++ b/lib/include/avxintrin.h @@ -50,8 +50,12 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); #endif /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256))) -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ + __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ + __min_vector_width__(128))) /* Arithmetic */ /// Adds two 256-bit vectors of [4 x double]. @@ -3563,7 +3567,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3587,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3608,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/lib/include/bmiintrin.h b/lib/include/bmiintrin.h index ffb94bea63..d8e57c0cb4 100644 --- a/lib/include/bmiintrin.h +++ b/lib/include/bmiintrin.h @@ -19,18 +19,17 @@ to use it as a potentially faster version of BSF. */ #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) -#define _tzcnt_u16(a) (__tzcnt_u16((a))) - /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// -/// This intrinsic corresponds to the TZCNT instruction. +/// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 16-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 16-bit integer containing the number of trailing zero /// bits in the operand. +/// \see _tzcnt_u16 static __inline__ unsigned short __RELAXED_FN_ATTRS __tzcnt_u16(unsigned short __X) { @@ -41,13 +40,30 @@ __tzcnt_u16(unsigned short __X) /// /// \headerfile /// -/// This intrinsic corresponds to the TZCNT instruction. +/// \code +/// unsigned short _tzcnt_u16(unsigned short __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +/// An unsigned 16-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 16-bit integer containing the number of trailing zero +/// bits in the operand. +/// \see __tzcnt_u16 +#define _tzcnt_u16 __tzcnt_u16 + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero /// bits in the operand. -/// \see _mm_tzcnt_32 +/// \see { _mm_tzcnt_32 _tzcnt_u32 } static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { @@ -58,20 +74,35 @@ __tzcnt_u32(unsigned int __X) /// /// \headerfile /// -/// This intrinsic corresponds to the TZCNT instruction. +/// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An 32-bit integer containing the number of trailing zero bits in +/// \returns A 32-bit integer containing the number of trailing zero bits in /// the operand. -/// \see __tzcnt_u32 +/// \see { __tzcnt_u32 _tzcnt_u32 } static __inline__ int __RELAXED_FN_ATTRS _mm_tzcnt_32(unsigned int __X) { return (int)__builtin_ia32_tzcnt_u32(__X); } -#define _tzcnt_u32(a) (__tzcnt_u32((a))) +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// \code +/// unsigned int _tzcnt_u32(unsigned int __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +/// An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 32-bit integer containing the number of trailing zero +/// bits in the operand. +/// \see { _mm_tzcnt_32 __tzcnt_u32 } +#define _tzcnt_u32 __tzcnt_u32 #ifdef __x86_64__ @@ -79,13 +110,13 @@ _mm_tzcnt_32(unsigned int __X) /// /// \headerfile /// -/// This intrinsic corresponds to the TZCNT instruction. +/// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero /// bits in the operand. -/// \see _mm_tzcnt_64 +/// \see { _mm_tzcnt_64 _tzcnt_u64 } static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { @@ -96,20 +127,35 @@ __tzcnt_u64(unsigned long long __X) /// /// \headerfile /// -/// This intrinsic corresponds to the TZCNT instruction. +/// This intrinsic corresponds to the \c TZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An 64-bit integer containing the number of trailing zero bits in /// the operand. -/// \see __tzcnt_u64 +/// \see { __tzcnt_u64 _tzcnt_u64 } static __inline__ long long __RELAXED_FN_ATTRS _mm_tzcnt_64(unsigned long long __X) { return (long long)__builtin_ia32_tzcnt_u64(__X); } -#define _tzcnt_u64(a) (__tzcnt_u64((a))) +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// \code +/// unsigned long long _tzcnt_u64(unsigned long long __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c TZCNT instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 64-bit integer containing the number of trailing zero +/// bits in the operand. +/// \see { _mm_tzcnt_64 __tzcnt_u64 +#define _tzcnt_u64 __tzcnt_u64 #endif /* __x86_64__ */ @@ -121,21 +167,12 @@ _mm_tzcnt_64(unsigned long long __X) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) -#define _andn_u32(a, b) (__andn_u32((a), (b))) - -/* _bextr_u32 != __bextr_u32 */ -#define _blsi_u32(a) (__blsi_u32((a))) - -#define _blsmsk_u32(a) (__blsmsk_u32((a))) - -#define _blsr_u32(a) (__blsr_u32((a))) - /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// -/// This intrinsic corresponds to the ANDN instruction. +/// This intrinsic corresponds to the \c ANDN instruction. /// /// \param __X /// An unsigned integer containing one of the operands. @@ -143,19 +180,40 @@ _mm_tzcnt_64(unsigned long long __X) /// An unsigned integer containing one of the operands. /// \returns An unsigned integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. +/// \see _andn_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __andn_u32(unsigned int __X, unsigned int __Y) { return ~__X & __Y; } +/// Performs a bitwise AND of the second operand with the one's +/// complement of the first operand. +/// +/// \headerfile +/// +/// \code +/// unsigned int _andn_u32(unsigned int __X, unsigned int __Y); +/// \endcode +/// +/// This intrinsic corresponds to the \c ANDN instruction. +/// +/// \param __X +/// An unsigned integer containing one of the operands. +/// \param __Y +/// An unsigned integer containing one of the operands. +/// \returns An unsigned integer containing the bitwise AND of the second +/// operand with the one's complement of the first operand. +/// \see __andn_u32 +#define _andn_u32 __andn_u32 + /* AMD-specified, double-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// -/// This intrinsic corresponds to the BEXTR instruction. +/// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. @@ -178,7 +236,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y) /// /// \headerfile /// -/// This intrinsic corresponds to the BEXTR instruction. +/// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. @@ -203,7 +261,7 @@ _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) /// /// \headerfile /// -/// This intrinsic corresponds to the BEXTR instruction. +/// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. @@ -224,33 +282,89 @@ _bextr2_u32(unsigned int __X, unsigned int __Y) { /// /// \headerfile /// -/// This intrinsic corresponds to the BLSI instruction. +/// This intrinsic corresponds to the \c BLSI instruction. /// /// \param __X /// An unsigned integer whose bits are to be cleared. /// \returns An unsigned integer containing the result of clearing the bits from /// the source operand. +/// \see _blsi_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsi_u32(unsigned int __X) { return __X & -__X; } +/// Clears all bits in the source except for the least significant bit +/// containing a value of 1 and returns the result. +/// +/// \headerfile +/// +/// \code +/// unsigned int _blsi_u32(unsigned int __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c BLSI instruction. +/// +/// \param __X +/// An unsigned integer whose bits are to be cleared. +/// \returns An unsigned integer containing the result of clearing the bits from +/// the source operand. +/// \see __blsi_u32 +#define _blsi_u32 __blsi_u32 + +/// Creates a mask whose bits are set to 1, using bit 0 up to and +/// including the least significant bit that is set to 1 in the source +/// operand and returns the result. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BLSMSK instruction. +/// +/// \param __X +/// An unsigned integer used to create the mask. +/// \returns An unsigned integer containing the newly created mask. +/// \see _blsmsk_u32 +static __inline__ unsigned int __DEFAULT_FN_ATTRS +__blsmsk_u32(unsigned int __X) +{ + return __X ^ (__X - 1); +} + /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// -/// This intrinsic corresponds to the BLSMSK instruction. +/// \code +/// unsigned int _blsmsk_u32(unsigned int __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c BLSMSK instruction. /// /// \param __X /// An unsigned integer used to create the mask. /// \returns An unsigned integer containing the newly created mask. +/// \see __blsmsk_u32 +#define _blsmsk_u32 __blsmsk_u32 + +/// Clears the least significant bit that is set to 1 in the source +/// operand and returns the result. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BLSR instruction. +/// +/// \param __X +/// An unsigned integer containing the operand to be cleared. +/// \returns An unsigned integer containing the result of clearing the source +/// operand. +/// \see _blsr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blsmsk_u32(unsigned int __X) +__blsr_u32(unsigned int __X) { - return __X ^ (__X - 1); + return __X & (__X - 1); } /// Clears the least significant bit that is set to 1 in the source @@ -258,35 +372,27 @@ __blsmsk_u32(unsigned int __X) /// /// \headerfile /// -/// This intrinsic corresponds to the BLSR instruction. +/// \code +/// unsigned int _bls4_u32(unsigned int __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c BLSR instruction. /// /// \param __X /// An unsigned integer containing the operand to be cleared. /// \returns An unsigned integer containing the result of clearing the source /// operand. -static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blsr_u32(unsigned int __X) -{ - return __X & (__X - 1); -} +/// \see __blsr_u32 +#define _blsr_u32 __blsr_u32 #ifdef __x86_64__ -#define _andn_u64(a, b) (__andn_u64((a), (b))) - -/* _bextr_u64 != __bextr_u64 */ -#define _blsi_u64(a) (__blsi_u64((a))) - -#define _blsmsk_u64(a) (__blsmsk_u64((a))) - -#define _blsr_u64(a) (__blsr_u64((a))) - /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// -/// This intrinsic corresponds to the ANDN instruction. +/// This intrinsic corresponds to the \c ANDN instruction. /// /// \param __X /// An unsigned 64-bit integer containing one of the operands. @@ -294,19 +400,41 @@ __blsr_u32(unsigned int __X) /// An unsigned 64-bit integer containing one of the operands. /// \returns An unsigned 64-bit integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. +/// \see _andn_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __andn_u64 (unsigned long long __X, unsigned long long __Y) { return ~__X & __Y; } +/// Performs a bitwise AND of the second operand with the one's +/// complement of the first operand. +/// +/// \headerfile +/// +/// \code +/// unsigned long long _andn_u64(unsigned long long __X, +/// unsigned long long __Y); +/// \endcode +/// +/// This intrinsic corresponds to the \c ANDN instruction. +/// +/// \param __X +/// An unsigned 64-bit integer containing one of the operands. +/// \param __Y +/// An unsigned 64-bit integer containing one of the operands. +/// \returns An unsigned 64-bit integer containing the bitwise AND of the second +/// operand with the one's complement of the first operand. +/// \see __andn_u64 +#define _andn_u64 __andn_u64 + /* AMD-specified, double-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// -/// This intrinsic corresponds to the BEXTR instruction. +/// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. @@ -329,7 +457,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y) /// /// \headerfile /// -/// This intrinsic corresponds to the BEXTR instruction. +/// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. @@ -354,7 +482,7 @@ _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) /// /// \headerfile /// -/// This intrinsic corresponds to the BEXTR instruction. +/// This intrinsic corresponds to the \c BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. @@ -375,33 +503,89 @@ _bextr2_u64(unsigned long long __X, unsigned long long __Y) { /// /// \headerfile /// -/// This intrinsic corresponds to the BLSI instruction. +/// This intrinsic corresponds to the \c BLSI instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// bits from the source operand. +/// \see _blsi_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsi_u64(unsigned long long __X) { return __X & -__X; } +/// Clears all bits in the source except for the least significant bit +/// containing a value of 1 and returns the result. +/// +/// \headerfile +/// +/// \code +/// unsigned long long _blsi_u64(unsigned long long __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c BLSI instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose bits are to be cleared. +/// \returns An unsigned 64-bit integer containing the result of clearing the +/// bits from the source operand. +/// \see __blsi_u64 +#define _blsi_u64 __blsi_u64 + +/// Creates a mask whose bits are set to 1, using bit 0 up to and +/// including the least significant bit that is set to 1 in the source +/// operand and returns the result. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BLSMSK instruction. +/// +/// \param __X +/// An unsigned 64-bit integer used to create the mask. +/// \returns An unsigned 64-bit integer containing the newly created mask. +/// \see _blsmsk_u64 +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__blsmsk_u64(unsigned long long __X) +{ + return __X ^ (__X - 1); +} + /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// -/// This intrinsic corresponds to the BLSMSK instruction. +/// \code +/// unsigned long long _blsmsk_u64(unsigned long long __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c BLSMSK instruction. /// /// \param __X /// An unsigned 64-bit integer used to create the mask. /// \returns An unsigned 64-bit integer containing the newly created mask. +/// \see __blsmsk_u64 +#define _blsmsk_u64 __blsmsk_u64 + +/// Clears the least significant bit that is set to 1 in the source +/// operand and returns the result. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BLSR instruction. +/// +/// \param __X +/// An unsigned 64-bit integer containing the operand to be cleared. +/// \returns An unsigned 64-bit integer containing the result of clearing the +/// source operand. +/// \see _blsr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blsmsk_u64(unsigned long long __X) +__blsr_u64(unsigned long long __X) { - return __X ^ (__X - 1); + return __X & (__X - 1); } /// Clears the least significant bit that is set to 1 in the source @@ -409,17 +593,18 @@ __blsmsk_u64(unsigned long long __X) /// /// \headerfile /// -/// This intrinsic corresponds to the BLSR instruction. +/// \code +/// unsigned long long _blsr_u64(unsigned long long __X); +/// \endcode +/// +/// This intrinsic corresponds to the \c BLSR instruction. /// /// \param __X /// An unsigned 64-bit integer containing the operand to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// source operand. -static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blsr_u64(unsigned long long __X) -{ - return __X & (__X - 1); -} +/// \see __blsr_u64 +#define _blsr_u64 __blsr_u64 #endif /* __x86_64__ */ diff --git a/lib/include/cuda_wrappers/bits/basic_string.h b/lib/include/cuda_wrappers/bits/basic_string.h new file mode 100644 index 0000000000..64f50d9f6a --- /dev/null +++ b/lib/include/cuda_wrappers/bits/basic_string.h @@ -0,0 +1,9 @@ +// CUDA headers define __noinline__ which interferes with libstdc++'s use of +// `__attribute((__noinline__))`. In order to avoid compilation error, +// temporarily unset __noinline__ when we include affected libstdc++ header. + +#pragma push_macro("__noinline__") +#undef __noinline__ +#include_next "bits/basic_string.h" + +#pragma pop_macro("__noinline__") diff --git a/lib/include/cuda_wrappers/bits/basic_string.tcc b/lib/include/cuda_wrappers/bits/basic_string.tcc new file mode 100644 index 0000000000..90c7fe34d9 --- /dev/null +++ b/lib/include/cuda_wrappers/bits/basic_string.tcc @@ -0,0 +1,9 @@ +// CUDA headers define __noinline__ which interferes with libstdc++'s use of +// `__attribute((__noinline__))`. In order to avoid compilation error, +// temporarily unset __noinline__ when we include affected libstdc++ header. + +#pragma push_macro("__noinline__") +#undef __noinline__ +#include_next "bits/basic_string.tcc" + +#pragma pop_macro("__noinline__") diff --git a/lib/include/emmintrin.h b/lib/include/emmintrin.h index 064d974936..96e3ebdecb 100644 --- a/lib/include/emmintrin.h +++ b/lib/include/emmintrin.h @@ -50,11 +50,11 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ - __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("sse2,no-evex512"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_MMX \ - __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \ - __min_vector_width__(64))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("mmx,sse2,no-evex512"), __min_vector_width__(64))) /// Adds lower double-precision values in both operands and returns the /// sum in the lower 64 bits of the result. The upper 64 bits of the result @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, /// A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) - _mm_stream_si32(int *__p, int __a) { - __builtin_ia32_movnti(__p, __a); + _mm_stream_si32(void *__p, int __a) { + __builtin_ia32_movnti((int *)__p, __a); } #ifdef __x86_64__ @@ -4003,8 +4003,8 @@ static __inline__ void /// A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) - _mm_stream_si64(long long *__p, long long __a) { - __builtin_ia32_movnti64(__p, __a); + _mm_stream_si64(void *__p, long long __a) { + __builtin_ia32_movnti64((long long *)__p, __a); } #endif diff --git a/lib/include/gfniintrin.h b/lib/include/gfniintrin.h index 5ec53c54fc..73b04a824a 100644 --- a/lib/include/gfniintrin.h +++ b/lib/include/gfniintrin.h @@ -15,19 +15,36 @@ #define __GFNIINTRIN_H /* Default attributes for simple form (no masking). */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("gfni,no-evex512"), __min_vector_width__(128))) /* Default attributes for YMM unmasked form. */ -#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS_Y \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx,gfni,no-evex512"), \ + __min_vector_width__(256))) /* Default attributes for ZMM unmasked forms. */ -#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS_Z \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512f,evex512,gfni"), \ + __min_vector_width__(512))) /* Default attributes for ZMM masked forms. */ -#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS_Z_MASK \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,evex512,gfni"), \ + __min_vector_width__(512))) /* Default attributes for VLX masked forms. */ -#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS_VL128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,avx512vl,gfni,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_VL256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,avx512vl,gfni,no-evex512"), \ + __min_vector_width__(256))) #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ diff --git a/lib/include/ia32intrin.h b/lib/include/ia32intrin.h index f1904efd71..1b979770e1 100644 --- a/lib/include/ia32intrin.h +++ b/lib/include/ia32intrin.h @@ -26,167 +26,271 @@ #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif -/** Find the first set bit starting from the lsb. Result is undefined if - * input is 0. - * - * \headerfile - * - * This intrinsic corresponds to the BSF instruction or the - * TZCNT instruction. - * - * \param __A - * A 32-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the lsb. Result is undefined if +/// input is 0. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BSF instruction or the +/// \c TZCNT instruction. +/// +/// \param __A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the bit number. +/// \see _bit_scan_forward static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfd(int __A) { return __builtin_ctz((unsigned int)__A); } -/** Find the first set bit starting from the msb. Result is undefined if - * input is 0. - * - * \headerfile - * - * This intrinsic corresponds to the BSR instruction or the - * LZCNT instruction and an XOR . - * - * \param __A - * A 32-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the msb. Result is undefined if +/// input is 0. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BSR instruction or the +/// \c LZCNT instruction and an \c XOR. +/// +/// \param __A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the bit number. +/// \see _bit_scan_reverse static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrd(int __A) { return 31 - __builtin_clz((unsigned int)__A); } -/** Swaps the bytes in the input. Converting little endian to big endian or - * vice versa. - * - * \headerfile - * - * This intrinsic corresponds to the BSWAP instruction. - * - * \param __A - * A 32-bit integer operand. - * \returns A 32-bit integer containing the swapped bytes. - */ +/// Swaps the bytes in the input, converting little endian to big endian or +/// vice versa. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BSWAP instruction. +/// +/// \param __A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the swapped bytes. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bswapd(int __A) { return (int)__builtin_bswap32((unsigned int)__A); } +/// Swaps the bytes in the input, converting little endian to big endian or +/// vice versa. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BSWAP instruction. +/// +/// \param __A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the swapped bytes. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _bswap(int __A) { return (int)__builtin_bswap32((unsigned int)__A); } +/// Find the first set bit starting from the lsb. Result is undefined if +/// input is 0. +/// +/// \headerfile +/// +/// \code +/// int _bit_scan_forward(int A); +/// \endcode +/// +/// This intrinsic corresponds to the \c BSF instruction or the +/// \c TZCNT instruction. +/// +/// \param A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the bit number. +/// \see __bsfd #define _bit_scan_forward(A) __bsfd((A)) + +/// Find the first set bit starting from the msb. Result is undefined if +/// input is 0. +/// +/// \headerfile +/// +/// \code +/// int _bit_scan_reverse(int A); +/// \endcode +/// +/// This intrinsic corresponds to the \c BSR instruction or the +/// \c LZCNT instruction and an \c XOR. +/// +/// \param A +/// A 32-bit integer operand. +/// \returns A 32-bit integer containing the bit number. +/// \see __bsrd #define _bit_scan_reverse(A) __bsrd((A)) #ifdef __x86_64__ -/** Find the first set bit starting from the lsb. Result is undefined if - * input is 0. - * - * \headerfile - * - * This intrinsic corresponds to the BSF instruction or the - * TZCNT instruction. - * - * \param __A - * A 64-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the lsb. Result is undefined if +/// input is 0. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BSF instruction or the +/// \c TZCNT instruction. +/// +/// \param __A +/// A 64-bit integer operand. +/// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfq(long long __A) { return (long long)__builtin_ctzll((unsigned long long)__A); } -/** Find the first set bit starting from the msb. Result is undefined if - * input is 0. - * - * \headerfile - * - * This intrinsic corresponds to the BSR instruction or the - * LZCNT instruction and an XOR . - * - * \param __A - * A 64-bit integer operand. - * \returns A 32-bit integer containing the bit number. - */ +/// Find the first set bit starting from the msb. Result is undefined if +/// input is 0. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BSR instruction or the +/// \c LZCNT instruction and an \c XOR. +/// +/// \param __A +/// A 64-bit integer operand. +/// \returns A 32-bit integer containing the bit number. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrq(long long __A) { return 63 - __builtin_clzll((unsigned long long)__A); } -/** Swaps the bytes in the input. Converting little endian to big endian or - * vice versa. - * - * \headerfile - * - * This intrinsic corresponds to the BSWAP instruction. - * - * \param __A - * A 64-bit integer operand. - * \returns A 64-bit integer containing the swapped bytes. - */ +/// Swaps the bytes in the input. Converting little endian to big endian or +/// vice versa. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c BSWAP instruction. +/// +/// \param __A +/// A 64-bit integer operand. +/// \returns A 64-bit integer containing the swapped bytes. +/// \see _bswap64 static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __bswapq(long long __A) { return (long long)__builtin_bswap64((unsigned long long)__A); } +/// Swaps the bytes in the input. Converting little endian to big endian or +/// vice versa. +/// +/// \headerfile +/// +/// \code +/// long long _bswap64(long long A); +/// \endcode +/// +/// This intrinsic corresponds to the \c BSWAP instruction. +/// +/// \param A +/// A 64-bit integer operand. +/// \returns A 64-bit integer containing the swapped bytes. +/// \see __bswapq #define _bswap64(A) __bswapq((A)) -#endif +#endif /* __x86_64__ */ -/** Counts the number of bits in the source operand having a value of 1. - * - * \headerfile - * - * This intrinsic corresponds to the POPCNT instruction or a - * a sequence of arithmetic and logic ops to calculate it. - * - * \param __A - * An unsigned 32-bit integer operand. - * \returns A 32-bit integer containing the number of bits with value 1 in the - * source operand. - */ +/// Counts the number of bits in the source operand having a value of 1. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c POPCNT instruction or a +/// a sequence of arithmetic and logic ops to calculate it. +/// +/// \param __A +/// An unsigned 32-bit integer operand. +/// \returns A 32-bit integer containing the number of bits with value 1 in the +/// source operand. +/// \see _popcnt32 static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __popcntd(unsigned int __A) { return __builtin_popcount(__A); } +/// Counts the number of bits in the source operand having a value of 1. +/// +/// \headerfile +/// +/// \code +/// int _popcnt32(int A); +/// \endcode +/// +/// This intrinsic corresponds to the \c POPCNT instruction or a +/// a sequence of arithmetic and logic ops to calculate it. +/// +/// \param A +/// An unsigned 32-bit integer operand. +/// \returns A 32-bit integer containing the number of bits with value 1 in the +/// source operand. +/// \see __popcntd #define _popcnt32(A) __popcntd((A)) #ifdef __x86_64__ -/** Counts the number of bits in the source operand having a value of 1. - * - * \headerfile - * - * This intrinsic corresponds to the POPCNT instruction or a - * a sequence of arithmetic and logic ops to calculate it. - * - * \param __A - * An unsigned 64-bit integer operand. - * \returns A 64-bit integer containing the number of bits with value 1 in the - * source operand. - */ +/// Counts the number of bits in the source operand having a value of 1. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c POPCNT instruction or a +/// a sequence of arithmetic and logic ops to calculate it. +/// +/// \param __A +/// An unsigned 64-bit integer operand. +/// \returns A 64-bit integer containing the number of bits with value 1 in the +/// source operand. +/// \see _popcnt64 static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __popcntq(unsigned long long __A) { return __builtin_popcountll(__A); } +/// Counts the number of bits in the source operand having a value of 1. +/// +/// \headerfile +/// +/// \code +/// long long _popcnt64(unsigned long long A); +/// \endcode +/// +/// This intrinsic corresponds to the \c POPCNT instruction or a +/// a sequence of arithmetic and logic ops to calculate it. +/// +/// \param A +/// An unsigned 64-bit integer operand. +/// \returns A 64-bit integer containing the number of bits with value 1 in the +/// source operand. +/// \see __popcntq #define _popcnt64(A) __popcntq((A)) #endif /* __x86_64__ */ #ifdef __x86_64__ +/// Returns the program status and control \c RFLAGS register with the \c VM +/// and \c RF flags cleared. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c PUSHFQ + \c POP instruction sequence. +/// +/// \returns The 64-bit value of the RFLAGS register. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u64(); } +/// Writes the specified value to the program status and control \c RFLAGS +/// register. Reserved bits are not affected. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c PUSH + \c POPFQ instruction sequence. +/// +/// \param __f +/// The 64-bit value to write to \c RFLAGS. static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned long long __f) { @@ -194,12 +298,29 @@ __writeeflags(unsigned long long __f) } #else /* !__x86_64__ */ +/// Returns the program status and control \c EFLAGS register with the \c VM +/// and \c RF flags cleared. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c PUSHFD + \c POP instruction sequence. +/// +/// \returns The 32-bit value of the EFLAGS register. static __inline__ unsigned int __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u32(); } +/// Writes the specified value to the program status and control \c EFLAGS +/// register. Reserved bits are not affected. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c PUSH + \c POPFD instruction sequence. +/// +/// \param __f +/// The 32-bit value to write to \c EFLAGS. static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned int __f) { @@ -207,123 +328,120 @@ __writeeflags(unsigned int __f) } #endif /* !__x86_64__ */ -/** Cast a 32-bit float value to a 32-bit unsigned integer value - * - * \headerfile - * This intrinsic corresponds to the VMOVD / MOVD instruction in x86_64, - * and corresponds to the VMOVL / MOVL instruction in ia32. - * - * \param __A - * A 32-bit float value. - * \returns a 32-bit unsigned integer containing the converted value. - */ +/// Cast a 32-bit float value to a 32-bit unsigned integer value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VMOVD / \c MOVD instruction in x86_64, +/// and corresponds to the \c VMOVL / \c MOVL instruction in ia32. +/// +/// \param __A +/// A 32-bit float value. +/// \returns a 32-bit unsigned integer containing the converted value. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST _castf32_u32(float __A) { return __builtin_bit_cast(unsigned int, __A); } -/** Cast a 64-bit float value to a 64-bit unsigned integer value - * - * \headerfile - * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, - * and corresponds to the VMOVL / MOVL instruction in ia32. - * - * \param __A - * A 64-bit float value. - * \returns a 64-bit unsigned integer containing the converted value. - */ +/// Cast a 64-bit float value to a 64-bit unsigned integer value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, +/// and corresponds to the \c VMOVL / \c MOVL instruction in ia32. +/// +/// \param __A +/// A 64-bit float value. +/// \returns a 64-bit unsigned integer containing the converted value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST _castf64_u64(double __A) { return __builtin_bit_cast(unsigned long long, __A); } -/** Cast a 32-bit unsigned integer value to a 32-bit float value - * - * \headerfile - * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, - * and corresponds to the FLDS instruction in ia32. - * - * \param __A - * A 32-bit unsigned integer value. - * \returns a 32-bit float value containing the converted value. - */ +/// Cast a 32-bit unsigned integer value to a 32-bit float value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, +/// and corresponds to the \c FLDS instruction in ia32. +/// +/// \param __A +/// A 32-bit unsigned integer value. +/// \returns a 32-bit float value containing the converted value. static __inline__ float __DEFAULT_FN_ATTRS_CAST _castu32_f32(unsigned int __A) { return __builtin_bit_cast(float, __A); } -/** Cast a 64-bit unsigned integer value to a 64-bit float value - * - * \headerfile - * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, - * and corresponds to the FLDL instruction in ia32. - * - * \param __A - * A 64-bit unsigned integer value. - * \returns a 64-bit float value containing the converted value. - */ +/// Cast a 64-bit unsigned integer value to a 64-bit float value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VMOVQ / \c MOVQ instruction in x86_64, +/// and corresponds to the \c FLDL instruction in ia32. +/// +/// \param __A +/// A 64-bit unsigned integer value. +/// \returns a 64-bit float value containing the converted value. static __inline__ double __DEFAULT_FN_ATTRS_CAST _castu64_f64(unsigned long long __A) { return __builtin_bit_cast(double, __A); } -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * unsigned char operand. - * - * \headerfile - * - * This intrinsic corresponds to the CRC32B instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 8-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned char operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c CRC32B instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32b(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); } -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * unsigned short operand. - * - * \headerfile - * - * This intrinsic corresponds to the CRC32W instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 16-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned short operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c CRC32W instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32w(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); } -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * second unsigned integer operand. - * - * \headerfile - * - * This intrinsic corresponds to the CRC32D instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 32-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// second unsigned integer operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c CRC32D instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32d(unsigned int __C, unsigned int __D) { @@ -331,21 +449,20 @@ __crc32d(unsigned int __C, unsigned int __D) } #ifdef __x86_64__ -/** Adds the unsigned integer operand to the CRC-32C checksum of the - * unsigned 64-bit integer operand. - * - * \headerfile - * - * This intrinsic corresponds to the CRC32Q instruction. - * - * \param __C - * An unsigned integer operand to add to the CRC-32C checksum of operand - * \a __D. - * \param __D - * An unsigned 64-bit integer operand used to compute the CRC-32C checksum. - * \returns The result of adding operand \a __C to the CRC-32C checksum of - * operand \a __D. - */ +/// Adds the unsigned integer operand to the CRC-32C checksum of the +/// unsigned 64-bit integer operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c CRC32Q instruction. +/// +/// \param __C +/// An unsigned integer operand to add to the CRC-32C checksum of operand +/// \a __D. +/// \param __D +/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. +/// \returns The result of adding operand \a __C to the CRC-32C checksum of +/// operand \a __D. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32 __crc32q(unsigned long long __C, unsigned long long __D) { @@ -353,19 +470,67 @@ __crc32q(unsigned long long __C, unsigned long long __D) } #endif /* __x86_64__ */ +/// Reads the specified performance monitoring counter. Refer to your +/// processor's documentation to determine which performance counters are +/// supported. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c RDPMC instruction. +/// +/// \param __A +/// The performance counter to read. +/// \returns The 64-bit value read from the performance counter. +/// \see _rdpmc static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdpmc(int __A) { return __builtin_ia32_rdpmc(__A); } -/* __rdtscp */ +/// Reads the processor's time stamp counter and the \c IA32_TSC_AUX MSR +/// \c (0xc0000103). +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c RDTSCP instruction. +/// +/// \param __A +/// Address of where to store the 32-bit \c IA32_TSC_AUX value. +/// \returns The 64-bit value of the time stamp counter. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdtscp(unsigned int *__A) { return __builtin_ia32_rdtscp(__A); } +/// Reads the processor's time stamp counter. +/// +/// \headerfile +/// +/// \code +/// unsigned long long _rdtsc(); +/// \endcode +/// +/// This intrinsic corresponds to the \c RDTSC instruction. +/// +/// \returns The 64-bit value of the time stamp counter. #define _rdtsc() __rdtsc() +/// Reads the specified performance monitoring counter. Refer to your +/// processor's documentation to determine which performance counters are +/// supported. +/// +/// \headerfile +/// +/// \code +/// unsigned long long _rdpmc(int A); +/// \endcode +/// +/// This intrinsic corresponds to the \c RDPMC instruction. +/// +/// \param A +/// The performance counter to read. +/// \returns The 64-bit value read from the performance counter. +/// \see __rdpmc #define _rdpmc(A) __rdpmc(A) static __inline__ void __DEFAULT_FN_ATTRS @@ -373,42 +538,150 @@ _wbinvd(void) { __builtin_ia32_wbinvd(); } +/// Rotates an 8-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param __X +/// The unsigned 8-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rolb(unsigned char __X, int __C) { return __builtin_rotateleft8(__X, __C); } +/// Rotates an 8-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param __X +/// The unsigned 8-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rorb(unsigned char __X, int __C) { return __builtin_rotateright8(__X, __C); } +/// Rotates a 16-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param __X +/// The unsigned 16-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see _rotwl static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rolw(unsigned short __X, int __C) { return __builtin_rotateleft16(__X, __C); } +/// Rotates a 16-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param __X +/// The unsigned 16-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see _rotwr static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rorw(unsigned short __X, int __C) { return __builtin_rotateright16(__X, __C); } +/// Rotates a 32-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param __X +/// The unsigned 32-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see _rotl static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rold(unsigned int __X, int __C) { return __builtin_rotateleft32(__X, (unsigned int)__C); } +/// Rotates a 32-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param __X +/// The unsigned 32-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see _rotr static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rord(unsigned int __X, int __C) { return __builtin_rotateright32(__X, (unsigned int)__C); } #ifdef __x86_64__ +/// Rotates a 64-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param __X +/// The unsigned 64-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rolq(unsigned long long __X, int __C) { return __builtin_rotateleft64(__X, (unsigned long long)__C); } +/// Rotates a 64-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param __X +/// The unsigned 64-bit value to be rotated. +/// \param __C +/// The number of bits to rotate the value. +/// \returns The rotated value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rorq(unsigned long long __X, int __C) { return __builtin_rotateright64(__X, (unsigned long long)__C); @@ -419,18 +692,167 @@ __rorq(unsigned long long __X, int __C) { /* These are already provided as builtins for MSVC. */ /* Select the correct function based on the size of long. */ #ifdef __LP64__ +/// Rotates a 64-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned long long _lrotl(unsigned long long a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param a +/// The unsigned 64-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rolq #define _lrotl(a,b) __rolq((a), (b)) + +/// Rotates a 64-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned long long _lrotr(unsigned long long a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param a +/// The unsigned 64-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rorq #define _lrotr(a,b) __rorq((a), (b)) -#else +#else // __LP64__ +/// Rotates a 32-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned int _lrotl(unsigned int a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param a +/// The unsigned 32-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rold #define _lrotl(a,b) __rold((a), (b)) + +/// Rotates a 32-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned int _lrotr(unsigned int a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param a +/// The unsigned 32-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rord #define _lrotr(a,b) __rord((a), (b)) -#endif +#endif // __LP64__ + +/// Rotates a 32-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned int _rotl(unsigned int a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param a +/// The unsigned 32-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rold #define _rotl(a,b) __rold((a), (b)) + +/// Rotates a 32-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned int _rotr(unsigned int a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param a +/// The unsigned 32-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rord #define _rotr(a,b) __rord((a), (b)) #endif // _MSC_VER /* These are not builtins so need to be provided in all modes. */ +/// Rotates a 16-bit value to the left by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned short _rotwl(unsigned short a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROL instruction. +/// +/// \param a +/// The unsigned 16-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rolw #define _rotwl(a,b) __rolw((a), (b)) + +/// Rotates a 16-bit value to the right by the specified number of bits. +/// This operation is undefined if the number of bits exceeds the size of +/// the value. +/// +/// \headerfile +/// +/// \code +/// unsigned short _rotwr(unsigned short a, int b); +/// \endcode +/// +/// This intrinsic corresponds to the \c ROR instruction. +/// +/// \param a +/// The unsigned 16-bit value to be rotated. +/// \param b +/// The number of bits to rotate the value. +/// \returns The rotated value. +/// \see __rorw #define _rotwr(a,b) __rorw((a), (b)) #undef __DEFAULT_FN_ATTRS diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h index 642602be14..27800f7a82 100644 --- a/lib/include/immintrin.h +++ b/lib/include/immintrin.h @@ -291,11 +291,13 @@ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPID__) -/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). +/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile /// /// This intrinsic corresponds to the RDPID instruction. +/// +/// \returns The 32-bit contents of the MSR. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) _rdpid_u32(void) { return __builtin_ia32_rdpid(); @@ -488,6 +490,15 @@ _writegsbase_u64(unsigned long long __V) * field inside of it. */ +/// Load a 16-bit value from memory and swap its bytes. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 16-bit value to load. +/// \returns The byte-swapped value. static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i16(void const * __P) { struct __loadu_i16 { @@ -496,6 +507,16 @@ _loadbe_i16(void const * __P) { return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); } +/// Swap the bytes of a 16-bit value and store it to memory. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 16-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i16(void * __P, short __D) { struct __storeu_i16 { @@ -504,6 +525,15 @@ _storebe_i16(void * __P, short __D) { ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); } +/// Load a 32-bit value from memory and swap its bytes. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 32-bit value to load. +/// \returns The byte-swapped value. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i32(void const * __P) { struct __loadu_i32 { @@ -512,6 +542,16 @@ _loadbe_i32(void const * __P) { return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); } +/// Swap the bytes of a 32-bit value and store it to memory. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 32-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i32(void * __P, int __D) { struct __storeu_i32 { @@ -521,6 +561,15 @@ _storebe_i32(void * __P, int __D) { } #ifdef __x86_64__ +/// Load a 64-bit value from memory and swap its bytes. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the 64-bit value to load. +/// \returns The byte-swapped value. static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i64(void const * __P) { struct __loadu_i64 { @@ -529,6 +578,16 @@ _loadbe_i64(void const * __P) { return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); } +/// Swap the bytes of a 64-bit value and store it to memory. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the MOVBE instruction. +/// +/// \param __P +/// A pointer to the memory for storing the swapped value. +/// \param __D +/// The 64-bit value to be byte-swapped. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i64(void * __P, long long __D) { struct __storeu_i64 { @@ -578,9 +637,13 @@ _storebe_i64(void * __P, long long __D) { #include #endif -/* Some intrinsics inside adxintrin.h are available only on processors with ADX, - * whereas others are also available at all times. */ +/* Intrinsics inside adcintrin.h are available at all times. */ +#include + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__ADX__) #include +#endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDSEED__) diff --git a/lib/include/intrin.h b/lib/include/intrin.h index de68b07491..9ebaea9fee 100644 --- a/lib/include/intrin.h +++ b/lib/include/intrin.h @@ -572,6 +572,22 @@ unsigned char __readx18byte(unsigned long offset); unsigned short __readx18word(unsigned long offset); unsigned long __readx18dword(unsigned long offset); unsigned __int64 __readx18qword(unsigned long offset); + +double _CopyDoubleFromInt64(__int64); +float _CopyFloatFromInt32(__int32); +__int32 _CopyInt32FromFloat(float); +__int64 _CopyInt64FromDouble(double); + +unsigned int _CountLeadingOnes(unsigned long); +unsigned int _CountLeadingOnes64(unsigned __int64); +unsigned int _CountLeadingSigns(long); +unsigned int _CountLeadingSigns64(__int64); +unsigned int _CountLeadingZeros(unsigned long); +unsigned int _CountLeadingZeros64(unsigned _int64); +unsigned int _CountOneBits(unsigned long); +unsigned int _CountOneBits64(unsigned __int64); + +void __cdecl __prefetch(void *); #endif /*----------------------------------------------------------------------------*\ diff --git a/lib/include/larchintrin.h b/lib/include/larchintrin.h index c5c533ee0b..f421829591 100644 --- a/lib/include/larchintrin.h +++ b/lib/include/larchintrin.h @@ -156,7 +156,7 @@ extern __inline unsigned char return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); } -extern __inline unsigned char +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_h(unsigned int _1) { return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); @@ -228,6 +228,18 @@ extern __inline void ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) #endif +#define __frecipe_s(/*float*/ _1) \ + (float)__builtin_loongarch_frecipe_s((float)_1) + +#define __frecipe_d(/*double*/ _1) \ + (double)__builtin_loongarch_frecipe_d((double)_1) + +#define __frsqrte_s(/*float*/ _1) \ + (float)__builtin_loongarch_frsqrte_s((float)_1) + +#define __frsqrte_d(/*double*/ _1) \ + (double)__builtin_loongarch_frsqrte_d((double)_1) + #ifdef __cplusplus } #endif diff --git a/lib/include/lasxintrin.h b/lib/include/lasxintrin.h new file mode 100644 index 0000000000..dafc2a2f3e --- /dev/null +++ b/lib/include/lasxintrin.h @@ -0,0 +1,3884 @@ +/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_ASXINTRIN_H +#define _LOONGSON_ASXINTRIN_H 1 + +#if defined(__loongarch_asx) + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); +typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); +typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) + +#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) + +#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) + +#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) + +#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) + +#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) + +#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) + +#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) + +#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) + +#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) + +#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) + +#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) + +#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) + +#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) + +#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) + +#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) + +#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) + +#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) + +#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) + +#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) + +#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) + +#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) + +#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) + +#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) + +#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) + +#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) + +#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) + +#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvand_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvnor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvxor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); +} + +#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) + +#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) + +#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) + +#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_b(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_h(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_w(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_d(long int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfadd_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfadd_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsub_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsub_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmul_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmul_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfdiv_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfdiv_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { + return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmin_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmin_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmina_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmina_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmax_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmax_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecip_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecip_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecipe_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecipe_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrint_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrint_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrte_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrte_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvflogb_s(__m256 _1) { + return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvflogb_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvth_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvth_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvtl_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvtl_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_w(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_l(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_wu(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_lu(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_b(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_h(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_w(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_d(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); +} + +#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvandn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) + +#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) + +#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) + +#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) + +#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) + +#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_l(__m256i _1, __m256i _2) { + return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftinth_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffinth_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffintl_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrph_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrpl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrmh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrml_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrneh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrnel_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrne_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrne_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrz_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrz_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrp_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrp_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrm_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrm_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); +} + +#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) + +#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) + +#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvorn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); +} + +#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvldx(void const *_1, long int _2) { + return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lasx_xvstx(__m256i _1, void *_2, long int _3) { + return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); +} + +#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) + +#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_q(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); +} + +#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvperm_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); +} + +#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) + +#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) + +#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ + ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskgez_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsknz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); +} + +#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) + +#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) + +#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) + +#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); +} + +#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) + +#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) + +#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) + +#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) + +#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) + +#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) + +#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) + +#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) + +#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) + +#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); +} + +#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ + ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) + +#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ + ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) + +#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) + +#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) + +#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) + +#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) + +#endif /* defined(__loongarch_asx). */ +#endif /* _LOONGSON_ASXINTRIN_H. */ diff --git a/lib/include/limits.h b/lib/include/limits.h index 354e031a9d..15e6bbe0ab 100644 --- a/lib/include/limits.h +++ b/lib/include/limits.h @@ -66,10 +66,8 @@ #define CHAR_BIT __CHAR_BIT__ -/* C2x 5.2.4.2.1 */ -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +/* C23 5.2.4.2.1 */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define BOOL_WIDTH __BOOL_WIDTH__ #define CHAR_WIDTH CHAR_BIT #define SCHAR_WIDTH CHAR_BIT diff --git a/lib/include/llvm_libc_wrappers/assert.h b/lib/include/llvm_libc_wrappers/assert.h new file mode 100644 index 0000000000..de650ca844 --- /dev/null +++ b/lib/include/llvm_libc_wrappers/assert.h @@ -0,0 +1,34 @@ +//===-- Wrapper for C standard assert.h declarations on the GPU ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__ +#define __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__ + +#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) +#error "This file is for GPU offloading compilation only" +#endif + +#include_next + +#if __has_include() + +#if defined(__HIP__) || defined(__CUDA__) +#define __LIBC_ATTRS __attribute__((device)) +#endif + +#pragma omp begin declare target + +#include + +#pragma omp end declare target + +#undef __LIBC_ATTRS + +#endif + +#endif // __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__ diff --git a/lib/include/llvm_libc_wrappers/ctype.h b/lib/include/llvm_libc_wrappers/ctype.h index e20b7bb58f..49c2af9347 100644 --- a/lib/include/llvm_libc_wrappers/ctype.h +++ b/lib/include/llvm_libc_wrappers/ctype.h @@ -13,8 +13,19 @@ #error "This file is for GPU offloading compilation only" #endif +// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is +// necessary to prevent it from doing that and remapping our implementation. +#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__) +#pragma push_macro("__USE_EXTERN_INLINES") +#undef __USE_EXTERN_INLINES +#endif + #include_next +#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__) +#pragma pop_macro("__USE_EXTERN_INLINES") +#endif + #if __has_include() #if defined(__HIP__) || defined(__CUDA__) @@ -26,6 +37,7 @@ #pragma push_macro("isalnum") #pragma push_macro("isalpha") +#pragma push_macro("isascii") #pragma push_macro("isblank") #pragma push_macro("iscntrl") #pragma push_macro("isdigit") @@ -36,11 +48,13 @@ #pragma push_macro("isspace") #pragma push_macro("isupper") #pragma push_macro("isxdigit") +#pragma push_macro("toascii") #pragma push_macro("tolower") #pragma push_macro("toupper") #undef isalnum #undef isalpha +#undef isascii #undef iscntrl #undef isdigit #undef islower @@ -51,6 +65,7 @@ #undef isupper #undef isblank #undef isxdigit +#undef toascii #undef tolower #undef toupper @@ -64,6 +79,7 @@ #if !defined(__NVPTX__) && !defined(__AMDGPU__) #pragma pop_macro("isalnum") #pragma pop_macro("isalpha") +#pragma pop_macro("isascii") #pragma pop_macro("isblank") #pragma pop_macro("iscntrl") #pragma pop_macro("isdigit") @@ -74,6 +90,7 @@ #pragma pop_macro("isspace") #pragma pop_macro("isupper") #pragma pop_macro("isxdigit") +#pragma pop_macro("toascii") #pragma pop_macro("tolower") #pragma pop_macro("toupper") #endif diff --git a/lib/include/llvm_libc_wrappers/stdio.h b/lib/include/llvm_libc_wrappers/stdio.h index 51b0f0e330..950f91b376 100644 --- a/lib/include/llvm_libc_wrappers/stdio.h +++ b/lib/include/llvm_libc_wrappers/stdio.h @@ -6,21 +6,58 @@ // //===----------------------------------------------------------------------===// -#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ -#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ - #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next +// In some old versions of glibc, other standard headers sometimes define +// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h +// to produce special definitions. Future includes of stdio.h when those +// special macros are undefined are expected to produce the normal definitions +// from stdio.h. +// +// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__) +// unconditionally to the above include_next. Otherwise, after an occurrence of +// the first glibc stdio.h use case described above, the include_next would be +// skipped for remaining includes of stdio.h, leaving required symbols +// undefined. +// +// We make the following assumptions to handle all use cases: +// +// 1. If the above include_next produces special glibc definitions, then (a) it +// does not produce the normal definitions that we must intercept below, (b) +// the current file was included from a glibc header that already defined +// __GLIBC__ (usually by including glibc's ), and (c) the above +// include_next does not define _STDIO_H. In that case, we skip the rest of +// the current file and don't guard against future includes. +// 2. If the above include_next produces the normal stdio.h definitions, then +// either (a) __GLIBC__ is not defined because C headers are from some other +// libc implementation or (b) the above include_next defines _STDIO_H to +// prevent the above include_next from having any effect in the future. +#if !defined(__GLIBC__) || defined(_STDIO_H) + +#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ +#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ + #if __has_include() #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) #endif +// Some headers provide these as macros. Temporarily undefine them so they do +// not conflict with any definitions for the GPU. + +#pragma push_macro("stdout") +#pragma push_macro("stdin") +#pragma push_macro("stderr") + +#undef stdout +#undef stderr +#undef stdin + #pragma omp begin declare target #include @@ -29,6 +66,15 @@ #undef __LIBC_ATTRS +// Restore the original macros when compiling on the host. +#if !defined(__NVPTX__) && !defined(__AMDGPU__) +#pragma pop_macro("stdout") +#pragma pop_macro("stderr") +#pragma pop_macro("stdin") +#endif + #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ + +#endif diff --git a/lib/include/llvm_libc_wrappers/stdlib.h b/lib/include/llvm_libc_wrappers/stdlib.h index 9cb2b4e64a..7fce5a1a31 100644 --- a/lib/include/llvm_libc_wrappers/stdlib.h +++ b/lib/include/llvm_libc_wrappers/stdlib.h @@ -23,8 +23,11 @@ #pragma omp begin declare target -// The LLVM C library uses this type so we forward declare it. +// The LLVM C library uses these named types so we forward declare them. typedef void (*__atexithandler_t)(void); +typedef int (*__bsearchcompare_t)(const void *, const void *); +typedef int (*__qsortcompare_t)(const void *, const void *); +typedef int (*__qsortrcompare_t)(const void *, const void *, void *); // Enforce ABI compatibility with the structs used by the LLVM C library. _Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!"); diff --git a/lib/include/llvm_libc_wrappers/string.h b/lib/include/llvm_libc_wrappers/string.h index 027c415c1d..0ea49cb137 100644 --- a/lib/include/llvm_libc_wrappers/string.h +++ b/lib/include/llvm_libc_wrappers/string.h @@ -13,9 +13,6 @@ #error "This file is for GPU offloading compilation only" #endif -// FIXME: The GNU headers provide C++ standard compliant headers when in C++ -// mode and the LLVM libc does not. We cannot enable memchr, strchr, strchrnul, -// strpbrk, strrchr, strstr, or strcasestr until this is addressed. #include_next #if __has_include() @@ -26,8 +23,70 @@ #pragma omp begin declare target +// The GNU headers provide C++ standard compliant headers when in C++ mode and +// the LLVM libc does not. We need to manually provide the definitions using the +// same prototypes. +#if defined(__cplusplus) && defined(__GLIBC__) && \ + defined(__CORRECT_ISO_CPP_STRING_H_PROTO) + +#ifndef __LIBC_ATTRS +#define __LIBC_ATTRS +#endif + +extern "C" { +void *memccpy(void *__restrict, const void *__restrict, int, + size_t) __LIBC_ATTRS; +int memcmp(const void *, const void *, size_t) __LIBC_ATTRS; +void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS; +void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS; +void *memmove(void *, const void *, size_t) __LIBC_ATTRS; +void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS; +void *memset(void *, int, size_t) __LIBC_ATTRS; +char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS; +char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; +char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS; +int strcmp(const char *, const char *) __LIBC_ATTRS; +int strcoll(const char *, const char *) __LIBC_ATTRS; +char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS; +size_t strcspn(const char *, const char *) __LIBC_ATTRS; +char *strdup(const char *) __LIBC_ATTRS; +size_t strlen(const char *) __LIBC_ATTRS; +char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; +int strncmp(const char *, const char *, size_t) __LIBC_ATTRS; +char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; +char *strndup(const char *, size_t) __LIBC_ATTRS; +size_t strnlen(const char *, size_t) __LIBC_ATTRS; +size_t strspn(const char *, const char *) __LIBC_ATTRS; +char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS; +char *strtok_r(char *__restrict, const char *__restrict, + char **__restrict) __LIBC_ATTRS; +size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; +} + +extern "C++" { +char *strstr(char *, const char *) noexcept __LIBC_ATTRS; +const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS; +char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS; +const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS; +char *strrchr(char *, int) noexcept __LIBC_ATTRS; +const char *strrchr(const char *, int) noexcept __LIBC_ATTRS; +char *strchr(char *, int) noexcept __LIBC_ATTRS; +const char *strchr(const char *, int) noexcept __LIBC_ATTRS; +char *strchrnul(char *, int) noexcept __LIBC_ATTRS; +const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS; +char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS; +const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS; +void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; +const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; +void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; +const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; +} + +#else #include +#endif + #pragma omp end declare target #undef __LIBC_ATTRS diff --git a/lib/include/llvm_libc_wrappers/time.h b/lib/include/llvm_libc_wrappers/time.h new file mode 100644 index 0000000000..9d1340c4eb --- /dev/null +++ b/lib/include/llvm_libc_wrappers/time.h @@ -0,0 +1,34 @@ +//===-- Wrapper for C standard time.h declarations on the GPU -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__ +#define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__ + +#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) +#error "This file is for GPU offloading compilation only" +#endif + +#include_next + +#if __has_include() + +#if defined(__HIP__) || defined(__CUDA__) +#define __LIBC_ATTRS __attribute__((device)) +#endif + +#pragma omp begin declare target + +_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!"); + +#include + +#pragma omp end declare target + +#endif + +#endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__ diff --git a/lib/include/lsxintrin.h b/lib/include/lsxintrin.h new file mode 100644 index 0000000000..f347955ce6 --- /dev/null +++ b/lib/include/lsxintrin.h @@ -0,0 +1,3750 @@ +/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_SXINTRIN_H +#define _LOONGSON_SXINTRIN_H 1 + +#if defined(__loongarch_sx) +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) + +#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) + +#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) + +#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) + +#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) + +#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) + +#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) + +#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) + +#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) + +#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) + +#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) + +#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) + +#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) + +#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) + +#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) + +#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) + +#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) + +#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) + +#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) + +#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) + +#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) + +#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) + +#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) + +#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) + +#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) + +#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) + +#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) + +#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) + +#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) + +#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) + +#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) + +#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) + +#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) + +#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) + +#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) + +#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) + +#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) + +#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) + +#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) + +#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) + +#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) + +#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) + +#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) + +#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) + +#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) + +#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) + +#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) + +#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) + +#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) + +#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) + +#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) + +#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) + +#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) + +#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) + +#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) + +#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) + +#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) + +#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) + +#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) + +#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) + +#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) + +#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) + +#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) + +#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) + +#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) + +#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_b(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_h(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_w(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_d(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); +} + +#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) + +#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) + +#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) + +#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vand_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vnor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vxor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); +} + +#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) + +#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) + +#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) + +#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_b(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_h(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_w(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_d(long int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_b(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_h(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_w(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_d(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); +} + +#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) + +#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ + ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) + +#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfadd_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfadd_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsub_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsub_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmul_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmul_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfdiv_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfdiv_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { + return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmin_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmin_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmina_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmina_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmax_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmax_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmaxa_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmaxa_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_s(__m128 _1) { + return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_d(__m128d _1) { + return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecip_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecip_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecipe_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecipe_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrint_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrint_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrte_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrte_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vflogb_s(__m128 _1) { + return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vflogb_d(__m128d _1) { + return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvth_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvth_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvtl_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvtl_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_w(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_l(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_wu(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_lu(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vandn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_b(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_h(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_w(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_d(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) + +#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) + +#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) + +#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) + +#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) + +#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) + +#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) + +#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_l(__m128i _1, __m128i _2) { + return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftinth_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffinth_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffintl_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrpl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrph_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrml_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrmh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrnel_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrneh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrne_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrne_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrz_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrz_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrp_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrp_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrm_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrm_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); +} + +#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) + +#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) + +#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) + +#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskgez_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsknz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_h_b(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_w_h(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_d_w(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_hu_bu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_wu_hu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_du_wu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); +} + +#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) + +#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) + +#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) + +#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); +} + +#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) + +#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vorn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); +} + +#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vldx(void const *_1, long int _2) { + return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lsx_vstx(__m128i _1, void *_2, long int _3) { + return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); +} + +#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) + +#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) + +#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) + +#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) + +#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) + +#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) + +#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) + +#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) + +#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) + +#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); +} + +#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) + +#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) + +#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) + +#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) + +#endif /* defined(__loongarch_sx) */ +#endif /* _LOONGSON_SXINTRIN_H */ diff --git a/lib/include/mmintrin.h b/lib/include/mmintrin.h index 03bac92198..08849f0107 100644 --- a/lib/include/mmintrin.h +++ b/lib/include/mmintrin.h @@ -22,7 +22,9 @@ typedef short __v4hi __attribute__((__vector_size__(8))); typedef char __v8qi __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \ + __min_vector_width__(64))) /// Clears the MMX state by setting the state of the x87 stack registers /// to empty. @@ -31,10 +33,10 @@ typedef char __v8qi __attribute__((__vector_size__(8))); /// /// This intrinsic corresponds to the EMMS instruction. /// -static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) -_mm_empty(void) -{ - __builtin_ia32_emms(); +static __inline__ void __attribute__((__always_inline__, __nodebug__, + __target__("mmx,no-evex512"))) +_mm_empty(void) { + __builtin_ia32_emms(); } /// Constructs a 64-bit integer vector, setting the lower 32 bits to the diff --git a/lib/include/module.modulemap b/lib/include/module.modulemap index 6894672ef0..56a13f69bc 100644 --- a/lib/include/module.modulemap +++ b/lib/include/module.modulemap @@ -153,10 +153,163 @@ module _Builtin_intrinsics [system] [extern_c] { } } -module _Builtin_stddef_max_align_t [system] [extern_c] { - header "__stddef_max_align_t.h" +// Start -fbuiltin-headers-in-system-modules affected modules + +// The following modules all ignore their headers when +// -fbuiltin-headers-in-system-modules is passed, and many of +// those headers join system modules when present. + +// e.g. if -fbuiltin-headers-in-system-modules is passed, then +// float.h will not be in the _Builtin_float module (that module +// will be empty). If there is a system module that declares +// `header "float.h"`, then the builtin float.h will join +// that module. The system float.h (if present) will be treated +// as a textual header in the sytem module. +module _Builtin_float [system] { + header "float.h" + export * } +module _Builtin_inttypes [system] { + header "inttypes.h" + export * +} + +module _Builtin_iso646 [system] { + header "iso646.h" + export * +} + +module _Builtin_limits [system] { + header "limits.h" + export * +} + +module _Builtin_stdalign [system] { + header "stdalign.h" + export * +} + +module _Builtin_stdarg [system] { + textual header "stdarg.h" + + explicit module __gnuc_va_list { + header "__stdarg___gnuc_va_list.h" + export * + } + + explicit module __va_copy { + header "__stdarg___va_copy.h" + export * + } + + explicit module va_arg { + header "__stdarg_va_arg.h" + export * + } + + explicit module va_copy { + header "__stdarg_va_copy.h" + export * + } + + explicit module va_list { + header "__stdarg_va_list.h" + export * + } +} + +module _Builtin_stdatomic [system] { + header "stdatomic.h" + export * +} + +module _Builtin_stdbool [system] { + header "stdbool.h" + export * +} + +module _Builtin_stddef [system] { + textual header "stddef.h" + + // __stddef_max_align_t.h is always in this module, even if + // -fbuiltin-headers-in-system-modules is passed. + explicit module max_align_t { + header "__stddef_max_align_t.h" + export * + } + + explicit module null { + header "__stddef_null.h" + export * + } + + explicit module nullptr_t { + header "__stddef_nullptr_t.h" + export * + } + + explicit module offsetof { + header "__stddef_offsetof.h" + export * + } + + explicit module ptrdiff_t { + header "__stddef_ptrdiff_t.h" + export * + } + + explicit module rsize_t { + header "__stddef_rsize_t.h" + export * + } + + explicit module size_t { + header "__stddef_size_t.h" + export * + } + + explicit module unreachable { + header "__stddef_unreachable.h" + export * + } + + explicit module wchar_t { + header "__stddef_wchar_t.h" + export * + } +} + +// wint_t is provided by and not . It's here +// for compatibility, but must be explicitly requested. Therefore +// __stddef_wint_t.h is not part of _Builtin_stddef. It is always in +// this module even if -fbuiltin-headers-in-system-modules is passed. +module _Builtin_stddef_wint_t [system] { + header "__stddef_wint_t.h" + export * +} + +module _Builtin_stdint [system] { + header "stdint.h" + export * +} + +module _Builtin_stdnoreturn [system] { + header "stdnoreturn.h" + export * +} + +module _Builtin_tgmath [system] { + header "tgmath.h" + export * +} + +module _Builtin_unwind [system] { + header "unwind.h" + export * +} +// End -fbuiltin-headers-in-system-modules affected modules + module opencl_c { requires opencl header "opencl-c.h" diff --git a/lib/include/opencl-c-base.h b/lib/include/opencl-c-base.h index af3deae892..2494f6213f 100644 --- a/lib/include/opencl-c-base.h +++ b/lib/include/opencl-c-base.h @@ -45,6 +45,7 @@ #define __opencl_c_ext_fp32_local_atomic_add 1 #define __opencl_c_ext_fp32_global_atomic_min_max 1 #define __opencl_c_ext_fp32_local_atomic_min_max 1 +#define __opencl_c_ext_image_raw10_raw12 1 #endif // defined(__SPIR__) || defined(__SPIRV__) #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) @@ -477,6 +478,10 @@ typedef enum memory_order #if __OPENCL_C_VERSION__ >= CL_VERSION_3_0 #define CLK_UNORM_INT_101010_2 0x10E0 #endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0 +#ifdef __opencl_c_ext_image_raw10_raw12 +#define CLK_UNSIGNED_INT_RAW10_EXT 0x10E3 +#define CLK_UNSIGNED_INT_RAW12_EXT 0x10E4 +#endif // __opencl_c_ext_image_raw10_raw12 // Channel order, numbering must be aligned with cl_channel_order in cl.h // diff --git a/lib/include/openmp_wrappers/cmath b/lib/include/openmp_wrappers/cmath index 22a720aca9..e1b71516e7 100644 --- a/lib/include/openmp_wrappers/cmath +++ b/lib/include/openmp_wrappers/cmath @@ -1,4 +1,4 @@ -/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -=== +/*===-- __clang_openmp_device_functions.h - OpenMP math declares -*- c++ -*-=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. diff --git a/lib/include/pmmintrin.h b/lib/include/pmmintrin.h index 203c0aa0f8..91cee1edda 100644 --- a/lib/include/pmmintrin.h +++ b/lib/include/pmmintrin.h @@ -17,8 +17,9 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("sse3,no-evex512"), __min_vector_width__(128))) /// Loads data from an unaligned memory location to elements in a 128-bit /// vector. diff --git a/lib/include/ppc_wrappers/nmmintrin.h b/lib/include/ppc_wrappers/nmmintrin.h new file mode 100644 index 0000000000..789bba6bc0 --- /dev/null +++ b/lib/include/ppc_wrappers/nmmintrin.h @@ -0,0 +1,26 @@ +/*===---- nmmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ +#endif + +#ifndef NMMINTRIN_H_ +#define NMMINTRIN_H_ + +/* We just include SSE4.1 header file. */ +#include + +#endif /* NMMINTRIN_H_ */ diff --git a/lib/include/ppc_wrappers/smmintrin.h b/lib/include/ppc_wrappers/smmintrin.h index 349b395c4f..19cdecb18d 100644 --- a/lib/include/ppc_wrappers/smmintrin.h +++ b/lib/include/ppc_wrappers/smmintrin.h @@ -14,7 +14,7 @@ #ifndef NO_WARN_X86_INTRINSICS /* This header is distributed to simplify porting x86_64 code that - makes explicit use of Intel intrinsics to powerp64/powerpc64le. + makes explicit use of Intel intrinsics to powerpc64/powerpc64le. It is the user's responsibility to determine if the results are acceptable and make additional changes as necessary. @@ -68,10 +68,10 @@ extern __inline __m128d __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -83,10 +83,15 @@ extern __inline __m128d switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -102,7 +107,7 @@ extern __inline __m128d This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -128,9 +133,14 @@ extern __inline __m128d */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128d)__r; } @@ -159,10 +169,10 @@ extern __inline __m128 __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; #else - __fpscr_save.__fr = __builtin_mffs(); + __fpscr_save.__fr = __builtin_ppc_mffs(); __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; __fpscr_save.__fpscr &= ~0xf8; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); #endif /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule @@ -174,10 +184,15 @@ extern __inline __m128 switch (__rounding) { case _MM_FROUND_TO_NEAREST_INT: - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __attribute__((fallthrough)); case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: - __builtin_set_fpscr_rn(0b00); + __builtin_ppc_set_fpscr_rn(0b00); /* Insert an artificial "read/write" reference to the variable read below, to ensure the compiler does not schedule a read/use of the variable before the FPSCR is modified, above. @@ -193,7 +208,7 @@ extern __inline __m128 This can be removed if and when GCC PR102783 is fixed. */ __asm__("" : : "wa"(__r)); - __builtin_set_fpscr_rn(__fpscr_save.__fpscr); + __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); break; case _MM_FROUND_TO_NEG_INF: case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: @@ -219,9 +234,14 @@ extern __inline __m128 */ __asm__("" : : "wa"(__r)); /* Restore enabled exceptions. */ - __fpscr_save.__fr = __builtin_mffsl(); +#ifdef _ARCH_PWR9 + __fpscr_save.__fr = __builtin_ppc_mffsl(); +#else + __fpscr_save.__fr = __builtin_ppc_mffs(); + __fpscr_save.__fpscr &= 0x70007f0ffL; +#endif __fpscr_save.__fpscr |= __enables_save.__fpscr; - __builtin_mtfsf(0b00000011, __fpscr_save.__fr); + __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); } return (__m128)__r; } diff --git a/lib/include/riscv_bitmanip.h b/lib/include/riscv_bitmanip.h new file mode 100644 index 0000000000..2bc7ee022a --- /dev/null +++ b/lib/include/riscv_bitmanip.h @@ -0,0 +1,195 @@ +/*===---- riscv_bitmanip.h - RISC-V Zb* intrinsics --------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RISCV_BITMANIP_H +#define __RISCV_BITMANIP_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +#if defined(__riscv_zbb) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_orc_b_32(uint32_t __x) { + return __builtin_riscv_orc_b_32(__x); +} + +static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) +__riscv_clz_32(uint32_t __x) { + return __builtin_riscv_clz_32(__x); +} + +static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) +__riscv_ctz_32(uint32_t __x) { + return __builtin_riscv_ctz_32(__x); +} + +static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) +__riscv_cpop_32(uint32_t __x) { + return __builtin_popcount(__x); +} + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_orc_b_64(uint64_t __x) { + return __builtin_riscv_orc_b_64(__x); +} + +static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) +__riscv_clz_64(uint64_t __x) { + return __builtin_riscv_clz_64(__x); +} + +static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) +__riscv_ctz_64(uint64_t __x) { + return __builtin_riscv_ctz_64(__x); +} + +static __inline__ unsigned __attribute__((__always_inline__, __nodebug__)) +__riscv_cpop_64(uint64_t __x) { + return __builtin_popcountll(__x); +} +#endif +#endif // defined(__riscv_zbb) + +#if defined(__riscv_zbb) || defined(__riscv_zbkb) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_rev8_32(uint32_t __x) { + return __builtin_bswap32(__x); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_rol_32(uint32_t __x, uint32_t __y) { + return __builtin_rotateleft32(__x, __y); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_ror_32(uint32_t __x, uint32_t __y) { + return __builtin_rotateright32(__x, __y); +} + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_rev8_64(uint64_t __x) { + return __builtin_bswap64(__x); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_rol_64(uint64_t __x, uint32_t __y) { + return __builtin_rotateleft64(__x, __y); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_ror_64(uint64_t __x, uint32_t __y) { + return __builtin_rotateright64(__x, __y); +} +#endif +#endif // defined(__riscv_zbb) || defined(__riscv_zbkb) + +#if defined(__riscv_zbkb) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_brev8_32(uint32_t __x) { + return __builtin_riscv_brev8_32(__x); +} + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_brev8_64(uint64_t __x) { + return __builtin_riscv_brev8_64(__x); +} +#endif + +#if __riscv_xlen == 32 +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_unzip_32(uint32_t __x) { + return __builtin_riscv_unzip_32(__x); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_zip_32(uint32_t __x) { + return __builtin_riscv_zip_32(__x); +} +#endif +#endif // defined(__riscv_zbkb) + +#if defined(__riscv_zbc) +#if __riscv_xlen == 32 +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_clmulr_32(uint32_t __x, uint32_t __y) { + return __builtin_riscv_clmulr_32(__x, __y); +} +#endif + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_clmulr_64(uint64_t __x, uint64_t __y) { + return __builtin_riscv_clmulr_64(__x, __y); +} +#endif +#endif // defined(__riscv_zbc) + +#if defined(__riscv_zbkc) || defined(__riscv_zbc) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_clmul_32(uint32_t __x, uint32_t __y) { + return __builtin_riscv_clmul_32(__x, __y); +} + +#if __riscv_xlen == 32 +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_clmulh_32(uint32_t __x, uint32_t __y) { + return __builtin_riscv_clmulh_32(__x, __y); +} +#endif + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_clmul_64(uint64_t __x, uint64_t __y) { + return __builtin_riscv_clmul_64(__x, __y); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_clmulh_64(uint64_t __x, uint64_t __y) { + return __builtin_riscv_clmulh_64(__x, __y); +} +#endif +#endif // defined(__riscv_zbkc) || defined(__riscv_zbc) + +#if defined(__riscv_zbkx) +#if __riscv_xlen == 32 +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_xperm4_32(uint32_t __x, uint32_t __y) { + return __builtin_riscv_xperm4_32(__x, __y); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_xperm8_32(uint32_t __x, uint32_t __y) { + return __builtin_riscv_xperm8_32(__x, __y); +} +#endif + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_xperm4_64(uint64_t __x, uint64_t __y) { + return __builtin_riscv_xperm4_64(__x, __y); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_xperm8_64(uint64_t __x, uint64_t __y) { + return __builtin_riscv_xperm8_64(__x, __y); +} +#endif +#endif // defined(__riscv_zbkx) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/lib/include/riscv_crypto.h b/lib/include/riscv_crypto.h new file mode 100644 index 0000000000..7cd2a708f5 --- /dev/null +++ b/lib/include/riscv_crypto.h @@ -0,0 +1,170 @@ +/*===---- riscv_crypto.h - RISC-V Zk* intrinsics ---------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RISCV_CRYPTO_H +#define __RISCV_CRYPTO_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +#if defined(__riscv_zknd) +#if __riscv_xlen == 32 +#define __riscv_aes32dsi(x, y, bs) __builtin_riscv_aes32dsi(x, y, bs) +#define __riscv_aes32dsmi(x, y, bs) __builtin_riscv_aes32dsmi(x, y, bs) +#endif + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_aes64ds(uint64_t __x, uint64_t __y) { + return __builtin_riscv_aes64ds(__x, __y); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_aes64dsm(uint64_t __x, uint64_t __y) { + return __builtin_riscv_aes64dsm(__x, __y); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_aes64im(uint64_t __x) { + return __builtin_riscv_aes64im(__x); +} +#endif +#endif // defined(__riscv_zknd) + +#if defined(__riscv_zkne) +#if __riscv_xlen == 32 +#define __riscv_aes32esi(x, y, bs) __builtin_riscv_aes32esi(x, y, bs) +#define __riscv_aes32esmi(x, y, bs) __builtin_riscv_aes32esmi(x, y, bs) +#endif + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_aes64es(uint64_t __x, uint64_t __y) { + return __builtin_riscv_aes64es(__x, __y); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_aes64esm(uint64_t __x, uint64_t __y) { + return __builtin_riscv_aes64esm(__x, __y); +} +#endif +#endif // defined(__riscv_zkne) + +#if defined(__riscv_zknd) || defined(__riscv_zkne) +#if __riscv_xlen == 64 +#define __riscv_aes64ks1i(x, rnum) __builtin_riscv_aes64ks1i(x, rnum) + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_aes64ks2(uint64_t __x, uint64_t __y) { + return __builtin_riscv_aes64ks2(__x, __y); +} +#endif +#endif // defined(__riscv_zknd) || defined(__riscv_zkne) + +#if defined(__riscv_zknh) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha256sig0(uint32_t __x) { + return __builtin_riscv_sha256sig0(__x); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha256sig1(uint32_t __x) { + return __builtin_riscv_sha256sig1(__x); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha256sum0(uint32_t __x) { + return __builtin_riscv_sha256sum0(__x); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha256sum1(uint32_t __x) { + return __builtin_riscv_sha256sum1(__x); +} + +#if __riscv_xlen == 32 +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sig0h(uint32_t __x, uint32_t __y) { + return __builtin_riscv_sha512sig0h(__x, __y); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sig0l(uint32_t __x, uint32_t __y) { + return __builtin_riscv_sha512sig0l(__x, __y); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sig1h(uint32_t __x, uint32_t __y) { + return __builtin_riscv_sha512sig1h(__x, __y); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sig1l(uint32_t __x, uint32_t __y) { + return __builtin_riscv_sha512sig1l(__x, __y); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sum0r(uint32_t __x, uint32_t __y) { + return __builtin_riscv_sha512sum0r(__x, __y); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sum1r(uint32_t __x, uint32_t __y) { + return __builtin_riscv_sha512sum1r(__x, __y); +} +#endif + +#if __riscv_xlen == 64 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sig0(uint64_t __x) { + return __builtin_riscv_sha512sig0(__x); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sig1(uint64_t __x) { + return __builtin_riscv_sha512sig1(__x); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sum0(uint64_t __x) { + return __builtin_riscv_sha512sum0(__x); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sha512sum1(uint64_t __x) { + return __builtin_riscv_sha512sum1(__x); +} +#endif +#endif // defined(__riscv_zknh) + +#if defined(__riscv_zksh) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sm3p0(uint32_t __x) { + return __builtin_riscv_sm3p0(__x); +} + +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +__riscv_sm3p1(uint32_t __x) { + return __builtin_riscv_sm3p1(__x); +} +#endif // defined(__riscv_zksh) + +#if defined(__riscv_zksed) +#define __riscv_sm4ed(x, y, bs) __builtin_riscv_sm4ed(x, y, bs); +#define __riscv_sm4ks(x, y, bs) __builtin_riscv_sm4ks(x, y, bs); +#endif // defined(__riscv_zksed) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/lib/include/riscv_ntlh.h b/lib/include/riscv_ntlh.h index 9ce1709205..c92e580a0a 100644 --- a/lib/include/riscv_ntlh.h +++ b/lib/include/riscv_ntlh.h @@ -21,8 +21,6 @@ enum { __RISCV_NTLH_ALL }; -#define __riscv_ntl_load(PTR, DOMAIN) __builtin_riscv_ntl_load((PTR), (DOMAIN)) -#define __riscv_ntl_store(PTR, VAL, DOMAIN) \ - __builtin_riscv_ntl_store((PTR), (VAL), (DOMAIN)) - -#endif \ No newline at end of file +#define __riscv_ntl_load __builtin_riscv_ntl_load +#define __riscv_ntl_store __builtin_riscv_ntl_store +#endif diff --git a/lib/include/riscv_vector.h b/lib/include/riscv_vector.h index 2a214a194a..083a135877 100644 --- a/lib/include/riscv_vector.h +++ b/lib/include/riscv_vector.h @@ -392,6 +392,37 @@ typedef __rvv_float64m2x4_t vfloat64m2x4_t; typedef __rvv_float64m4_t vfloat64m4_t; typedef __rvv_float64m4x2_t vfloat64m4x2_t; typedef __rvv_float64m8_t vfloat64m8_t; +typedef __rvv_bfloat16mf4_t vbfloat16mf4_t; +typedef __rvv_bfloat16mf4x2_t vbfloat16mf4x2_t; +typedef __rvv_bfloat16mf4x3_t vbfloat16mf4x3_t; +typedef __rvv_bfloat16mf4x4_t vbfloat16mf4x4_t; +typedef __rvv_bfloat16mf4x5_t vbfloat16mf4x5_t; +typedef __rvv_bfloat16mf4x6_t vbfloat16mf4x6_t; +typedef __rvv_bfloat16mf4x7_t vbfloat16mf4x7_t; +typedef __rvv_bfloat16mf4x8_t vbfloat16mf4x8_t; +typedef __rvv_bfloat16mf2_t vbfloat16mf2_t; +typedef __rvv_bfloat16mf2x2_t vbfloat16mf2x2_t; +typedef __rvv_bfloat16mf2x3_t vbfloat16mf2x3_t; +typedef __rvv_bfloat16mf2x4_t vbfloat16mf2x4_t; +typedef __rvv_bfloat16mf2x5_t vbfloat16mf2x5_t; +typedef __rvv_bfloat16mf2x6_t vbfloat16mf2x6_t; +typedef __rvv_bfloat16mf2x7_t vbfloat16mf2x7_t; +typedef __rvv_bfloat16mf2x8_t vbfloat16mf2x8_t; +typedef __rvv_bfloat16m1_t vbfloat16m1_t; +typedef __rvv_bfloat16m1x2_t vbfloat16m1x2_t; +typedef __rvv_bfloat16m1x3_t vbfloat16m1x3_t; +typedef __rvv_bfloat16m1x4_t vbfloat16m1x4_t; +typedef __rvv_bfloat16m1x5_t vbfloat16m1x5_t; +typedef __rvv_bfloat16m1x6_t vbfloat16m1x6_t; +typedef __rvv_bfloat16m1x7_t vbfloat16m1x7_t; +typedef __rvv_bfloat16m1x8_t vbfloat16m1x8_t; +typedef __rvv_bfloat16m2_t vbfloat16m2_t; +typedef __rvv_bfloat16m2x2_t vbfloat16m2x2_t; +typedef __rvv_bfloat16m2x3_t vbfloat16m2x3_t; +typedef __rvv_bfloat16m2x4_t vbfloat16m2x4_t; +typedef __rvv_bfloat16m4_t vbfloat16m4_t; +typedef __rvv_bfloat16m4x2_t vbfloat16m4x2_t; +typedef __rvv_bfloat16m8_t vbfloat16m8_t; #define __riscv_v_intrinsic_overloading 1 #ifdef __cplusplus diff --git a/lib/include/smmintrin.h b/lib/include/smmintrin.h index 16d8855a1c..005d7db9c3 100644 --- a/lib/include/smmintrin.h +++ b/lib/include/smmintrin.h @@ -18,8 +18,8 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), \ - __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("sse4.1,no-evex512"), __min_vector_width__(128))) /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 @@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, /// \returns A 128-bit integer vector containing the data stored at the /// specified memory location. static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_stream_load_si128(__m128i const *__V) { +_mm_stream_load_si128(const void *__V) { return (__m128i)__builtin_nontemporal_load((const __v2di *)__V); } diff --git a/lib/include/stdalign.h b/lib/include/stdalign.h index 8ae6e658dd..158508e65d 100644 --- a/lib/include/stdalign.h +++ b/lib/include/stdalign.h @@ -10,10 +10,8 @@ #ifndef __STDALIGN_H #define __STDALIGN_H -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ #if defined(__cplusplus) || \ - (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L) + (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L) #ifndef __cplusplus #define alignas _Alignas #define alignof _Alignof diff --git a/lib/include/stdarg.h b/lib/include/stdarg.h index ba978721f1..94b066566f 100644 --- a/lib/include/stdarg.h +++ b/lib/include/stdarg.h @@ -7,45 +7,73 @@ *===-----------------------------------------------------------------------=== */ -#ifndef __STDARG_H +/* + * This header is designed to be included multiple times. If any of the __need_ + * macros are defined, then only that subset of interfaces are provided. This + * can be useful for POSIX headers that need to not expose all of stdarg.h, but + * need to use some of its interfaces. Otherwise this header provides all of + * the expected interfaces. + * + * When clang modules are enabled, this header is a textual header. It ignores + * its header guard so that multiple submodules can export its interfaces. + * Take module SM with submodules A and B, whose headers both include stdarg.h + * When SM.A builds, __STDARG_H will be defined. When SM.B builds, the + * definition from SM.A will leak when building without local submodule + * visibility. stdarg.h wouldn't include any of its implementation headers, and + * SM.B wouldn't import any of the stdarg modules, and SM.B's `export *` + * wouldn't export any stdarg interfaces as expected. However, since stdarg.h + * ignores its header guard when building with modules, it all works as + * expected. + * + * When clang modules are not enabled, the header guards can function in the + * normal simple fashion. + */ +#if !defined(__STDARG_H) || __has_feature(modules) || \ + defined(__need___va_list) || defined(__need_va_list) || \ + defined(__need_va_arg) || defined(__need___va_copy) || \ + defined(__need_va_copy) -#ifndef __GNUC_VA_LIST -#define __GNUC_VA_LIST -typedef __builtin_va_list __gnuc_va_list; -#endif - -#ifdef __need___va_list -#undef __need___va_list -#else +#if !defined(__need___va_list) && !defined(__need_va_list) && \ + !defined(__need_va_arg) && !defined(__need___va_copy) && \ + !defined(__need_va_copy) #define __STDARG_H -#ifndef _VA_LIST -typedef __builtin_va_list va_list; -#define _VA_LIST -#endif - -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L -/* C2x does not require the second parameter for va_start. */ -#define va_start(ap, ...) __builtin_va_start(ap, 0) -#else -/* Versions before C2x do require the second parameter. */ -#define va_start(ap, param) __builtin_va_start(ap, param) -#endif -#define va_end(ap) __builtin_va_end(ap) -#define va_arg(ap, type) __builtin_va_arg(ap, type) - +#define __need___va_list +#define __need_va_list +#define __need_va_arg +#define __need___va_copy /* GCC always defines __va_copy, but does not define va_copy unless in c99 mode * or -ansi is not specified, since it was not part of C90. */ -#define __va_copy(d,s) __builtin_va_copy(d,s) - #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ !defined(__STRICT_ANSI__) -#define va_copy(dest, src) __builtin_va_copy(dest, src) +#define __need_va_copy +#endif #endif -#endif /* __STDARG_H */ +#ifdef __need___va_list +#include <__stdarg___gnuc_va_list.h> +#undef __need___va_list +#endif /* defined(__need___va_list) */ -#endif /* not __STDARG_H */ +#ifdef __need_va_list +#include <__stdarg_va_list.h> +#undef __need_va_list +#endif /* defined(__need_va_list) */ + +#ifdef __need_va_arg +#include <__stdarg_va_arg.h> +#undef __need_va_arg +#endif /* defined(__need_va_arg) */ + +#ifdef __need___va_copy +#include <__stdarg___va_copy.h> +#undef __need___va_copy +#endif /* defined(__need___va_copy) */ + +#ifdef __need_va_copy +#include <__stdarg_va_copy.h> +#undef __need_va_copy +#endif /* defined(__need_va_copy) */ + +#endif diff --git a/lib/include/stdatomic.h b/lib/include/stdatomic.h index aed33d4333..521c473dd1 100644 --- a/lib/include/stdatomic.h +++ b/lib/include/stdatomic.h @@ -45,16 +45,14 @@ extern "C" { #define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE /* 7.17.2 Initialization */ -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L) || \ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L) || \ defined(__cplusplus) -/* ATOMIC_VAR_INIT was removed in C2x, but still remains in C++23. */ +/* ATOMIC_VAR_INIT was removed in C23, but still remains in C++23. */ #define ATOMIC_VAR_INIT(value) (value) #endif #if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L && \ - __STDC_VERSION__ < 202000L) || \ + __STDC_VERSION__ < 202311L) || \ (defined(__cplusplus) && __cplusplus >= 202002L)) && \ !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) /* ATOMIC_VAR_INIT was deprecated in C17 and C++20. */ diff --git a/lib/include/stdckdint.h b/lib/include/stdckdint.h new file mode 100644 index 0000000000..20bc34ffb3 --- /dev/null +++ b/lib/include/stdckdint.h @@ -0,0 +1,42 @@ +/*===---- stdckdint.h - Standard header for checking integer----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDCKDINT_H +#define __STDCKDINT_H + +/* If we're hosted, fall back to the system's stdckdint.h. FreeBSD, for + * example, already has a Clang-compatible stdckdint.h header. + * + * The `stdckdint.h` header requires C 23 or newer. + */ +#if __STDC_HOSTED__ && __has_include_next() +#include_next +#else + +/* C23 7.20.1 Defines several macros for performing checked integer arithmetic*/ + +#define __STDC_VERSION_STDCKDINT_H__ 202311L + +// Both A and B shall be any integer type other than "plain" char, bool, a bit- +// precise integer type, or an enumerated type, and they need not be the same. + +// R shall be a modifiable lvalue of any integer type other than "plain" char, +// bool, a bit-precise integer type, or an enumerated type. It shouldn't be +// short type, either. Otherwise, it may be unable to hold two the result of +// operating two 'int's. + +// A diagnostic message will be produced if A or B are not suitable integer +// types, or if R is not a modifiable lvalue of a suitable integer type or R +// is short type. +#define ckd_add(R, A, B) __builtin_add_overflow((A), (B), (R)) +#define ckd_sub(R, A, B) __builtin_sub_overflow((A), (B), (R)) +#define ckd_mul(R, A, B) __builtin_mul_overflow((A), (B), (R)) + +#endif /* __STDC_HOSTED__ */ +#endif /* __STDCKDINT_H */ diff --git a/lib/include/stddef.h b/lib/include/stddef.h index 539541f0ed..e0ad7b8d17 100644 --- a/lib/include/stddef.h +++ b/lib/include/stddef.h @@ -7,126 +7,116 @@ *===-----------------------------------------------------------------------=== */ -#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \ - defined(__need_size_t) || defined(__need_wchar_t) || \ - defined(__need_NULL) || defined(__need_wint_t) +/* + * This header is designed to be included multiple times. If any of the __need_ + * macros are defined, then only that subset of interfaces are provided. This + * can be useful for POSIX headers that need to not expose all of stddef.h, but + * need to use some of its interfaces. Otherwise this header provides all of + * the expected interfaces. + * + * When clang modules are enabled, this header is a textual header. It ignores + * its header guard so that multiple submodules can export its interfaces. + * Take module SM with submodules A and B, whose headers both include stddef.h + * When SM.A builds, __STDDEF_H will be defined. When SM.B builds, the + * definition from SM.A will leak when building without local submodule + * visibility. stddef.h wouldn't include any of its implementation headers, and + * SM.B wouldn't import any of the stddef modules, and SM.B's `export *` + * wouldn't export any stddef interfaces as expected. However, since stddef.h + * ignores its header guard when building with modules, it all works as + * expected. + * + * When clang modules are not enabled, the header guards can function in the + * normal simple fashion. + */ +#if !defined(__STDDEF_H) || __has_feature(modules) || \ + (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1) || \ + defined(__need_ptrdiff_t) || defined(__need_size_t) || \ + defined(__need_rsize_t) || defined(__need_wchar_t) || \ + defined(__need_NULL) || defined(__need_nullptr_t) || \ + defined(__need_unreachable) || defined(__need_max_align_t) || \ + defined(__need_offsetof) || defined(__need_wint_t) #if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ - !defined(__need_wchar_t) && !defined(__need_NULL) && \ - !defined(__need_wint_t) -/* Always define miscellaneous pieces when modules are available. */ -#if !__has_feature(modules) + !defined(__need_rsize_t) && !defined(__need_wchar_t) && \ + !defined(__need_NULL) && !defined(__need_nullptr_t) && \ + !defined(__need_unreachable) && !defined(__need_max_align_t) && \ + !defined(__need_offsetof) && !defined(__need_wint_t) #define __STDDEF_H -#endif #define __need_ptrdiff_t #define __need_size_t +/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is + * enabled. */ +#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 +#define __need_rsize_t +#endif #define __need_wchar_t #define __need_NULL -#define __need_STDDEF_H_misc -/* __need_wint_t is intentionally not defined here. */ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ + defined(__cplusplus) +#define __need_nullptr_t +#endif +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +#define __need_unreachable +#endif +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) +#define __need_max_align_t +#endif +#define __need_offsetof +/* wint_t is provided by and not . It's here + * for compatibility, but must be explicitly requested. Therefore + * __need_wint_t is intentionally not defined here. */ #endif #if defined(__need_ptrdiff_t) -#if !defined(_PTRDIFF_T) || __has_feature(modules) -/* Always define ptrdiff_t when modules are available. */ -#if !__has_feature(modules) -#define _PTRDIFF_T -#endif -typedef __PTRDIFF_TYPE__ ptrdiff_t; -#endif +#include <__stddef_ptrdiff_t.h> #undef __need_ptrdiff_t #endif /* defined(__need_ptrdiff_t) */ #if defined(__need_size_t) -#if !defined(_SIZE_T) || __has_feature(modules) -/* Always define size_t when modules are available. */ -#if !__has_feature(modules) -#define _SIZE_T -#endif -typedef __SIZE_TYPE__ size_t; -#endif +#include <__stddef_size_t.h> #undef __need_size_t #endif /*defined(__need_size_t) */ -#if defined(__need_STDDEF_H_misc) -/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is - * enabled. */ -#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ - !defined(_RSIZE_T)) || __has_feature(modules) -/* Always define rsize_t when modules are available. */ -#if !__has_feature(modules) -#define _RSIZE_T -#endif -typedef __SIZE_TYPE__ rsize_t; -#endif -#endif /* defined(__need_STDDEF_H_misc) */ +#if defined(__need_rsize_t) +#include <__stddef_rsize_t.h> +#undef __need_rsize_t +#endif /* defined(__need_rsize_t) */ #if defined(__need_wchar_t) -#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED) -/* Always define wchar_t when modules are available. */ -#if !defined(_WCHAR_T) || __has_feature(modules) -#if !__has_feature(modules) -#define _WCHAR_T -#if defined(_MSC_EXTENSIONS) -#define _WCHAR_T_DEFINED -#endif -#endif -typedef __WCHAR_TYPE__ wchar_t; -#endif -#endif +#include <__stddef_wchar_t.h> #undef __need_wchar_t #endif /* defined(__need_wchar_t) */ #if defined(__need_NULL) -#undef NULL -#ifdef __cplusplus -# if !defined(__MINGW32__) && !defined(_MSC_VER) -# define NULL __null -# else -# define NULL 0 -# endif -#else -# define NULL ((void*)0) -#endif -#ifdef __cplusplus -#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) -namespace std { typedef decltype(nullptr) nullptr_t; } -using ::std::nullptr_t; -#endif -#endif +#include <__stddef_null.h> #undef __need_NULL #endif /* defined(__need_NULL) */ -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L -typedef typeof(nullptr) nullptr_t; -#endif /* defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L */ +#if defined(__need_nullptr_t) +#include <__stddef_nullptr_t.h> +#undef __need_nullptr_t +#endif /* defined(__need_nullptr_t) */ -#if defined(__need_STDDEF_H_misc) && defined(__STDC_VERSION__) && \ - __STDC_VERSION__ >= 202000L -#define unreachable() __builtin_unreachable() -#endif /* defined(__need_STDDEF_H_misc) && >= C23 */ +#if defined(__need_unreachable) +#include <__stddef_unreachable.h> +#undef __need_unreachable +#endif /* defined(__need_unreachable) */ -#if defined(__need_STDDEF_H_misc) -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ - (defined(__cplusplus) && __cplusplus >= 201103L) -#include "__stddef_max_align_t.h" -#endif -#define offsetof(t, d) __builtin_offsetof(t, d) -#undef __need_STDDEF_H_misc -#endif /* defined(__need_STDDEF_H_misc) */ +#if defined(__need_max_align_t) +#include <__stddef_max_align_t.h> +#undef __need_max_align_t +#endif /* defined(__need_max_align_t) */ + +#if defined(__need_offsetof) +#include <__stddef_offsetof.h> +#undef __need_offsetof +#endif /* defined(__need_offsetof) */ /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ #if defined(__need_wint_t) -/* Always define wint_t when modules are available. */ -#if !defined(_WINT_T) || __has_feature(modules) -#if !__has_feature(modules) -#define _WINT_T -#endif -typedef __WINT_TYPE__ wint_t; -#endif +#include <__stddef_wint_t.h> #undef __need_wint_t #endif /* __need_wint_t */ diff --git a/lib/include/stdint.h b/lib/include/stdint.h index a47e91be18..b6699b6ca3 100644 --- a/lib/include/stdint.h +++ b/lib/include/stdint.h @@ -499,9 +499,8 @@ typedef __UINTMAX_TYPE__ uintmax_t; # define INT64_MAX INT64_C( 9223372036854775807) # define INT64_MIN (-INT64_C( 9223372036854775807)-1) # define UINT64_MAX UINT64_C(18446744073709551615) -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT64_WIDTH 64 # define INT64_WIDTH UINT64_WIDTH @@ -545,9 +544,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # define INT_FAST64_MAX __INT_LEAST64_MAX # define UINT_FAST64_MAX __UINT_LEAST64_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST64_WIDTH __UINT_LEAST64_WIDTH # define INT_LEAST64_WIDTH UINT_LEAST64_WIDTH # define UINT_FAST64_WIDTH __UINT_LEAST64_WIDTH @@ -586,9 +583,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT56_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT56_WIDTH 56 # define INT56_WIDTH UINT56_WIDTH # define UINT_LEAST56_WIDTH UINT56_WIDTH @@ -635,9 +630,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT48_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define UINT48_WIDTH 48 #define INT48_WIDTH UINT48_WIDTH #define UINT_LEAST48_WIDTH UINT48_WIDTH @@ -684,9 +677,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT40_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT40_WIDTH 40 # define INT40_WIDTH UINT40_WIDTH # define UINT_LEAST40_WIDTH UINT40_WIDTH @@ -727,9 +718,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT32_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT32_WIDTH 32 # define INT32_WIDTH UINT32_WIDTH # undef __UINT_LEAST32_WIDTH @@ -749,9 +738,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # define INT_FAST32_MAX __INT_LEAST32_MAX # define UINT_FAST32_MAX __UINT_LEAST32_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST32_WIDTH __UINT_LEAST32_WIDTH # define INT_LEAST32_WIDTH UINT_LEAST32_WIDTH # define UINT_FAST32_WIDTH __UINT_LEAST32_WIDTH @@ -784,9 +771,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT24_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT24_WIDTH 24 # define INT24_WIDTH UINT24_WIDTH # define UINT_LEAST24_WIDTH UINT24_WIDTH @@ -819,9 +804,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT16_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT16_WIDTH 16 # define INT16_WIDTH UINT16_WIDTH # undef __UINT_LEAST16_WIDTH @@ -839,9 +822,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # define INT_FAST16_MAX __INT_LEAST16_MAX # define UINT_FAST16_MAX __UINT_LEAST16_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST16_WIDTH __UINT_LEAST16_WIDTH # define INT_LEAST16_WIDTH UINT_LEAST16_WIDTH # define UINT_FAST16_WIDTH __UINT_LEAST16_WIDTH @@ -862,9 +843,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT8_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT8_WIDTH 8 # define INT8_WIDTH UINT8_WIDTH # undef __UINT_LEAST8_WIDTH @@ -880,9 +859,7 @@ typedef __UINTMAX_TYPE__ uintmax_t; # define INT_FAST8_MAX __INT_LEAST8_MAX # define UINT_FAST8_MAX __UINT_LEAST8_MAX -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define UINT_LEAST8_WIDTH __UINT_LEAST8_WIDTH # define INT_LEAST8_WIDTH UINT_LEAST8_WIDTH # define UINT_FAST8_WIDTH __UINT_LEAST8_WIDTH @@ -907,10 +884,8 @@ typedef __UINTMAX_TYPE__ uintmax_t; #define PTRDIFF_MAX __PTRDIFF_MAX__ #define SIZE_MAX __SIZE_MAX__ -/* C2x 7.20.2.4 Width of integer types capable of holding object pointers. */ -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +/* C23 7.22.2.4 Width of integer types capable of holding object pointers. */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* NB: The C standard requires that these be the same value, but the compiler exposes separate internal width macros. */ #define INTPTR_WIDTH __INTPTR_WIDTH__ @@ -928,10 +903,8 @@ typedef __UINTMAX_TYPE__ uintmax_t; #define INTMAX_MAX __INTMAX_MAX__ #define UINTMAX_MAX __UINTMAX_MAX__ -/* C2x 7.20.2.5 Width of greatest-width integer types. */ -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +/* C23 7.22.2.5 Width of greatest-width integer types. */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* NB: The C standard requires that these be the same value, but the compiler exposes separate internal width macros. */ #define INTMAX_WIDTH __INTMAX_WIDTH__ @@ -964,10 +937,8 @@ typedef __UINTMAX_TYPE__ uintmax_t; #define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__) #define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__) -/* C2x 7.20.3.x Width of other integer types. */ -/* FIXME: This is using the placeholder dates Clang produces for these macros - in C2x mode; switch to the correct values once they've been published. */ -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L +/* C23 7.22.3.x Width of other integer types. */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define PTRDIFF_WIDTH __PTRDIFF_WIDTH__ #define SIG_ATOMIC_WIDTH __SIG_ATOMIC_WIDTH__ #define SIZE_WIDTH __SIZE_WIDTH__ diff --git a/lib/include/stdnoreturn.h b/lib/include/stdnoreturn.h index 967be94762..c90bf77e84 100644 --- a/lib/include/stdnoreturn.h +++ b/lib/include/stdnoreturn.h @@ -15,8 +15,8 @@ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \ !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) -/* The noreturn macro is deprecated in C2x. We do not mark it as such because - including the header file in C2x is also deprecated and we do not want to +/* The noreturn macro is deprecated in C23. We do not mark it as such because + including the header file in C23 is also deprecated and we do not want to issue a confusing diagnostic for code which includes followed by code that writes [[noreturn]]. The issue with such code is not with the attribute, or the use of 'noreturn', but the inclusion of the diff --git a/lib/include/tmmintrin.h b/lib/include/tmmintrin.h index cb9be2349d..7d8dc46c57 100644 --- a/lib/include/tmmintrin.h +++ b/lib/include/tmmintrin.h @@ -17,8 +17,13 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("ssse3,no-evex512"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS_MMX \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("mmx,ssse3,no-evex512"), \ + __min_vector_width__(64))) /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer diff --git a/lib/include/usermsrintrin.h b/lib/include/usermsrintrin.h new file mode 100644 index 0000000000..6138837670 --- /dev/null +++ b/lib/include/usermsrintrin.h @@ -0,0 +1,51 @@ +/*===--------------- usermsrintrin.h - USERMSR intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __X86GPRINTRIN_H +#error "Never use directly; include instead." +#endif // __X86GPRINTRIN_H + +#ifndef __USERMSRINTRIN_H +#define __USERMSRINTRIN_H +#ifdef __x86_64__ + +/// Reads the contents of a 64-bit MSR specified in \a __A into \a dst. +/// +/// This intrinsic corresponds to the URDMSR instruction. +/// \param __A +/// An unsigned long long. +/// +/// \code{.operation} +/// DEST := MSR[__A] +/// \endcode +static __inline__ unsigned long long + __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) + _urdmsr(unsigned long long __A) { + return __builtin_ia32_urdmsr(__A); +} + +/// Writes the contents of \a __B into the 64-bit MSR specified in \a __A. +/// +/// This intrinsic corresponds to the UWRMSR instruction. +/// +/// \param __A +/// An unsigned long long. +/// \param __B +/// An unsigned long long. +/// +/// \code{.operation} +/// MSR[__A] := __B +/// \endcode +static __inline__ void + __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) + _uwrmsr(unsigned long long __A, unsigned long long __B) { + return __builtin_ia32_uwrmsr(__A, __B); +} + +#endif // __x86_64__ +#endif // __USERMSRINTRIN_H diff --git a/lib/include/vaesintrin.h b/lib/include/vaesintrin.h index 294dcff2ad..d7c162f5c0 100644 --- a/lib/include/vaesintrin.h +++ b/lib/include/vaesintrin.h @@ -18,8 +18,10 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512))) - +#define __DEFAULT_FN_ATTRS_F \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512f,evex512,vaes"), \ + __min_vector_width__(512))) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesenc_epi128(__m256i __A, __m256i __B) diff --git a/lib/include/vecintrin.h b/lib/include/vecintrin.h index ec1dbfd015..1f51e32c0d 100644 --- a/lib/include/vecintrin.h +++ b/lib/include/vecintrin.h @@ -1543,7 +1543,7 @@ vec_load_len(const double *__ptr, unsigned int __len) { #if __ARCH__ >= 12 static inline __ATTRS_ai __vector unsigned char vec_load_len_r(const unsigned char *__ptr, unsigned int __len) { - return (__vector unsigned char)__builtin_s390_vlrl(__len, __ptr); + return (__vector unsigned char)__builtin_s390_vlrlr(__len, __ptr); } #endif @@ -1617,7 +1617,7 @@ vec_store_len(__vector double __vec, double *__ptr, static inline __ATTRS_ai void vec_store_len_r(__vector unsigned char __vec, unsigned char *__ptr, unsigned int __len) { - __builtin_s390_vstrl((__vector signed char)__vec, __len, __ptr); + __builtin_s390_vstrlr((__vector signed char)__vec, __len, __ptr); } #endif @@ -2689,7 +2689,8 @@ vec_cmplt(__vector double __a, __vector double __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector signed char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs(__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 0; } @@ -2697,7 +2698,8 @@ vec_all_eq(__vector signed char __a, __vector signed char __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 0; } @@ -2705,15 +2707,15 @@ vec_all_eq(__vector signed char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, __b, &__cc); return __cc == 0; } @@ -2721,8 +2723,7 @@ vec_all_eq(__vector unsigned char __a, __vector unsigned char __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } @@ -2730,23 +2731,23 @@ vec_all_eq(__vector unsigned char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector signed short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs(__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 0; } @@ -2754,7 +2755,8 @@ vec_all_eq(__vector signed short __a, __vector signed short __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 0; } @@ -2762,15 +2764,15 @@ vec_all_eq(__vector signed short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, __b, &__cc); return __cc == 0; } @@ -2778,8 +2780,7 @@ vec_all_eq(__vector unsigned short __a, __vector unsigned short __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } @@ -2787,23 +2788,23 @@ vec_all_eq(__vector unsigned short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector signed int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs(__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 0; } @@ -2811,7 +2812,8 @@ vec_all_eq(__vector signed int __a, __vector signed int __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 0; } @@ -2819,15 +2821,15 @@ vec_all_eq(__vector signed int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, __b, &__cc); return __cc == 0; } @@ -2835,8 +2837,7 @@ vec_all_eq(__vector unsigned int __a, __vector unsigned int __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } @@ -2844,23 +2845,23 @@ vec_all_eq(__vector unsigned int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector signed long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs(__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 0; } @@ -2868,7 +2869,8 @@ vec_all_eq(__vector signed long long __a, __vector signed long long __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 0; } @@ -2876,15 +2878,15 @@ vec_all_eq(__vector signed long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, __b, &__cc); return __cc == 0; } @@ -2892,8 +2894,7 @@ vec_all_eq(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } @@ -2901,16 +2902,15 @@ vec_all_eq(__vector unsigned long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_all_eq(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int vec_all_eq(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 0; } @@ -2935,7 +2935,8 @@ vec_all_eq(__vector double __a, __vector double __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector signed char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs(__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 3; } @@ -2943,7 +2944,8 @@ vec_all_ne(__vector signed char __a, __vector signed char __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 3; } @@ -2951,15 +2953,16 @@ vec_all_ne(__vector signed char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 3; } @@ -2967,8 +2970,7 @@ vec_all_ne(__vector unsigned char __a, __vector unsigned char __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } @@ -2976,23 +2978,23 @@ vec_all_ne(__vector unsigned char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector signed short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs(__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 3; } @@ -3000,7 +3002,8 @@ vec_all_ne(__vector signed short __a, __vector signed short __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 3; } @@ -3008,15 +3011,15 @@ vec_all_ne(__vector signed short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, __b, &__cc); return __cc == 3; } @@ -3024,8 +3027,7 @@ vec_all_ne(__vector unsigned short __a, __vector unsigned short __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } @@ -3033,23 +3035,23 @@ vec_all_ne(__vector unsigned short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector signed int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs(__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 3; } @@ -3057,7 +3059,8 @@ vec_all_ne(__vector signed int __a, __vector signed int __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 3; } @@ -3065,15 +3068,15 @@ vec_all_ne(__vector signed int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, __b, &__cc); return __cc == 3; } @@ -3081,8 +3084,7 @@ vec_all_ne(__vector unsigned int __a, __vector unsigned int __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } @@ -3090,23 +3092,23 @@ vec_all_ne(__vector unsigned int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector signed long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs(__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 3; } @@ -3114,7 +3116,8 @@ vec_all_ne(__vector signed long long __a, __vector signed long long __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 3; } @@ -3122,15 +3125,15 @@ vec_all_ne(__vector signed long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, __b, &__cc); return __cc == 3; } @@ -3138,8 +3141,7 @@ vec_all_ne(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } @@ -3147,16 +3149,15 @@ vec_all_ne(__vector unsigned long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_all_ne(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int vec_all_ne(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 3; } @@ -4241,7 +4242,8 @@ vec_all_numeric(__vector double __a) { static inline __ATTRS_o_ai int vec_any_eq(__vector signed char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs(__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc <= 1; } @@ -4249,7 +4251,8 @@ vec_any_eq(__vector signed char __a, __vector signed char __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc <= 1; } @@ -4257,15 +4260,15 @@ vec_any_eq(__vector signed char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, __b, &__cc); return __cc <= 1; } @@ -4273,8 +4276,7 @@ vec_any_eq(__vector unsigned char __a, __vector unsigned char __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } @@ -4282,23 +4284,23 @@ vec_any_eq(__vector unsigned char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector signed short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs(__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc <= 1; } @@ -4306,7 +4308,8 @@ vec_any_eq(__vector signed short __a, __vector signed short __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc <= 1; } @@ -4314,15 +4317,15 @@ vec_any_eq(__vector signed short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, __b, &__cc); return __cc <= 1; } @@ -4330,8 +4333,7 @@ vec_any_eq(__vector unsigned short __a, __vector unsigned short __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } @@ -4339,23 +4341,23 @@ vec_any_eq(__vector unsigned short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector signed int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs(__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc <= 1; } @@ -4363,7 +4365,8 @@ vec_any_eq(__vector signed int __a, __vector signed int __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc <= 1; } @@ -4371,15 +4374,15 @@ vec_any_eq(__vector signed int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, __b, &__cc); return __cc <= 1; } @@ -4387,8 +4390,7 @@ vec_any_eq(__vector unsigned int __a, __vector unsigned int __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } @@ -4396,23 +4398,23 @@ vec_any_eq(__vector unsigned int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector signed long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs(__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc <= 1; } @@ -4420,7 +4422,8 @@ vec_any_eq(__vector signed long long __a, __vector signed long long __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc <= 1; } @@ -4428,15 +4431,15 @@ vec_any_eq(__vector signed long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, __b, &__cc); return __cc <= 1; } @@ -4444,8 +4447,7 @@ vec_any_eq(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } @@ -4453,16 +4455,15 @@ vec_any_eq(__vector unsigned long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_any_eq(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int vec_any_eq(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc <= 1; } @@ -4487,7 +4488,8 @@ vec_any_eq(__vector double __a, __vector double __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector signed char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs(__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc != 0; } @@ -4495,7 +4497,8 @@ vec_any_ne(__vector signed char __a, __vector signed char __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc != 0; } @@ -4503,15 +4506,15 @@ vec_any_ne(__vector signed char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, __b, &__cc); return __cc != 0; } @@ -4519,8 +4522,7 @@ vec_any_ne(__vector unsigned char __a, __vector unsigned char __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } @@ -4528,23 +4530,23 @@ vec_any_ne(__vector unsigned char __a, __vector __bool char __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((__vector signed char)__a, - (__vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector signed short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs(__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc != 0; } @@ -4552,7 +4554,8 @@ vec_any_ne(__vector signed short __a, __vector signed short __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc != 0; } @@ -4560,15 +4563,15 @@ vec_any_ne(__vector signed short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, __b, &__cc); return __cc != 0; } @@ -4576,8 +4579,7 @@ vec_any_ne(__vector unsigned short __a, __vector unsigned short __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } @@ -4585,23 +4587,23 @@ vec_any_ne(__vector unsigned short __a, __vector __bool short __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((__vector signed short)__a, - (__vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector signed int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs(__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc != 0; } @@ -4609,7 +4611,8 @@ vec_any_ne(__vector signed int __a, __vector signed int __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc != 0; } @@ -4617,15 +4620,15 @@ vec_any_ne(__vector signed int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, __b, &__cc); return __cc != 0; } @@ -4633,8 +4636,7 @@ vec_any_ne(__vector unsigned int __a, __vector unsigned int __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } @@ -4642,23 +4644,23 @@ vec_any_ne(__vector unsigned int __a, __vector __bool int __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((__vector signed int)__a, - (__vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector signed long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs(__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc != 0; } @@ -4666,7 +4668,8 @@ vec_any_ne(__vector signed long long __a, __vector signed long long __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc != 0; } @@ -4674,15 +4677,15 @@ vec_any_ne(__vector signed long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, __b, &__cc); return __cc != 0; } @@ -4690,8 +4693,7 @@ vec_any_ne(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } @@ -4699,16 +4701,15 @@ vec_any_ne(__vector unsigned long long __a, __vector __bool long long __b) { static inline __ATTRS_o_ai int vec_any_ne(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int vec_any_ne(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((__vector signed long long)__a, - (__vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc != 0; } @@ -6565,45 +6566,45 @@ vec_rl(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_o_ai __vector signed char vec_rli(__vector signed char __a, unsigned long __b) { return (__vector signed char)__builtin_s390_verllb( - (__vector unsigned char)__a, (int)__b); + (__vector unsigned char)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned char vec_rli(__vector unsigned char __a, unsigned long __b) { - return __builtin_s390_verllb(__a, (int)__b); + return __builtin_s390_verllb(__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector signed short vec_rli(__vector signed short __a, unsigned long __b) { return (__vector signed short)__builtin_s390_verllh( - (__vector unsigned short)__a, (int)__b); + (__vector unsigned short)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned short vec_rli(__vector unsigned short __a, unsigned long __b) { - return __builtin_s390_verllh(__a, (int)__b); + return __builtin_s390_verllh(__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector signed int vec_rli(__vector signed int __a, unsigned long __b) { return (__vector signed int)__builtin_s390_verllf( - (__vector unsigned int)__a, (int)__b); + (__vector unsigned int)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned int vec_rli(__vector unsigned int __a, unsigned long __b) { - return __builtin_s390_verllf(__a, (int)__b); + return __builtin_s390_verllf(__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector signed long long vec_rli(__vector signed long long __a, unsigned long __b) { return (__vector signed long long)__builtin_s390_verllg( - (__vector unsigned long long)__a, (int)__b); + (__vector unsigned long long)__a, (unsigned char)__b); } static inline __ATTRS_o_ai __vector unsigned long long vec_rli(__vector unsigned long long __a, unsigned long __b) { - return __builtin_s390_verllg(__a, (int)__b); + return __builtin_s390_verllg(__a, (unsigned char)__b); } /*-- vec_rl_mask ------------------------------------------------------------*/ @@ -8358,7 +8359,7 @@ vec_min(__vector double __a, __vector double __b) { static inline __ATTRS_ai __vector unsigned char vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) { - return __builtin_s390_vaq(__a, __b); + return (__vector unsigned char)((__int128)__a + (__int128)__b); } /*-- vec_addc ---------------------------------------------------------------*/ @@ -8387,7 +8388,8 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_ai __vector unsigned char vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) { - return __builtin_s390_vaccq(__a, __b); + return (__vector unsigned char) + __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b); } /*-- vec_adde_u128 ----------------------------------------------------------*/ @@ -8395,7 +8397,9 @@ vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) { static inline __ATTRS_ai __vector unsigned char vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return __builtin_s390_vacq(__a, __b, __c); + return (__vector unsigned char) + __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); } /*-- vec_addec_u128 ---------------------------------------------------------*/ @@ -8403,7 +8407,9 @@ vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b, static inline __ATTRS_ai __vector unsigned char vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return __builtin_s390_vacccq(__a, __b, __c); + return (__vector unsigned char) + __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); } /*-- vec_avg ----------------------------------------------------------------*/ @@ -8477,7 +8483,7 @@ vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) { static inline __ATTRS_o_ai __vector unsigned char vec_gfmsum_128(__vector unsigned long long __a, __vector unsigned long long __b) { - return __builtin_s390_vgfmg(__a, __b); + return (__vector unsigned char)__builtin_s390_vgfmg(__a, __b); } /*-- vec_gfmsum_accum -------------------------------------------------------*/ @@ -8506,7 +8512,8 @@ static inline __ATTRS_o_ai __vector unsigned char vec_gfmsum_accum_128(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned char __c) { - return __builtin_s390_vgfmag(__a, __b, __c); + return (__vector unsigned char) + __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c); } /*-- vec_mladd --------------------------------------------------------------*/ @@ -8796,15 +8803,21 @@ vec_mulo(__vector unsigned int __a, __vector unsigned int __b) { /*-- vec_msum_u128 ----------------------------------------------------------*/ #if __ARCH__ >= 12 +extern __ATTRS_o __vector unsigned char +vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned char __c, int __d) + __constant_range(__d, 0, 15); + #define vec_msum_u128(X, Y, Z, W) \ - ((__vector unsigned char)__builtin_s390_vmslg((X), (Y), (Z), (W))); + ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \ + __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W))) #endif /*-- vec_sub_u128 -----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned char vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) { - return __builtin_s390_vsq(__a, __b); + return (__vector unsigned char)((__int128)__a - (__int128)__b); } /*-- vec_subc ---------------------------------------------------------------*/ @@ -8833,7 +8846,8 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) { static inline __ATTRS_ai __vector unsigned char vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) { - return __builtin_s390_vscbiq(__a, __b); + return (__vector unsigned char) + __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b); } /*-- vec_sube_u128 ----------------------------------------------------------*/ @@ -8841,7 +8855,9 @@ vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) { static inline __ATTRS_ai __vector unsigned char vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return __builtin_s390_vsbiq(__a, __b, __c); + return (__vector unsigned char) + __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); } /*-- vec_subec_u128 ---------------------------------------------------------*/ @@ -8849,7 +8865,9 @@ vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b, static inline __ATTRS_ai __vector unsigned char vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return __builtin_s390_vsbcbiq(__a, __b, __c); + return (__vector unsigned char) + __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); } /*-- vec_sum2 ---------------------------------------------------------------*/ @@ -8868,12 +8886,12 @@ vec_sum2(__vector unsigned int __a, __vector unsigned int __b) { static inline __ATTRS_o_ai __vector unsigned char vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) { - return __builtin_s390_vsumqf(__a, __b); + return (__vector unsigned char)__builtin_s390_vsumqf(__a, __b); } static inline __ATTRS_o_ai __vector unsigned char vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) { - return __builtin_s390_vsumqg(__a, __b); + return (__vector unsigned char)__builtin_s390_vsumqg(__a, __b); } /*-- vec_sum4 ---------------------------------------------------------------*/ diff --git a/lib/include/x86gprintrin.h b/lib/include/x86gprintrin.h index f9a765be43..ed141879fb 100644 --- a/lib/include/x86gprintrin.h +++ b/lib/include/x86gprintrin.h @@ -20,6 +20,11 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__USERMSR__) +#include +#endif + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CRC32__) #include diff --git a/lib/include/xmmintrin.h b/lib/include/xmmintrin.h index 80aa2a817f..47368f3c23 100644 --- a/lib/include/xmmintrin.h +++ b/lib/include/xmmintrin.h @@ -32,8 +32,12 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16))); #endif /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("mmx,sse,no-evex512"), __min_vector_width__(64))) /// Adds the 32-bit float values in the low-order bits of the operands. /// @@ -2121,9 +2125,9 @@ _mm_storer_ps(float *__p, __m128 __a) /// \param __a /// A 64-bit integer containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS_MMX -_mm_stream_pi(__m64 *__p, __m64 __a) +_mm_stream_pi(void *__p, __m64 __a) { - __builtin_ia32_movntq(__p, __a); + __builtin_ia32_movntq((__m64 *)__p, __a); } /// Moves packed float values from a 128-bit vector of [4 x float] to a @@ -2140,7 +2144,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a) /// \param __a /// A 128-bit vector of [4 x float] containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_ps(float *__p, __m128 __a) +_mm_stream_ps(void *__p, __m128 __a) { __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p); }