update C language headers to LLVM 18

release/18.x branch, commit 78b99c73ee4b96fe9ce0e294d4632326afb2db42
This commit is contained in:
Andrew Kelley 2024-04-25 14:59:29 -07:00
parent e4029b9943
commit 70c85b1bf1
103 changed files with 21299 additions and 2595 deletions

View File

@ -502,8 +502,8 @@ __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
__DEVICE__ int __popc(unsigned int __a) { return __nv_popc(__a); }
__DEVICE__ int __popcll(unsigned long long __a) { return __nv_popcll(__a); }
__DEVICE__ float __powf(float __a, float __b) {
return __nv_fast_powf(__a, __b);
}

View File

@ -285,8 +285,8 @@ __DEVICE__ double __nv_normcdfinv(double __a);
__DEVICE__ float __nv_normcdfinvf(float __a);
__DEVICE__ float __nv_normf(int __a, const float *__b);
__DEVICE__ double __nv_norm(int __a, const double *__b);
__DEVICE__ int __nv_popc(int __a);
__DEVICE__ int __nv_popcll(long long __a);
__DEVICE__ int __nv_popc(unsigned int __a);
__DEVICE__ int __nv_popcll(unsigned long long __a);
__DEVICE__ double __nv_pow(double __a, double __b);
__DEVICE__ float __nv_powf(float __a, float __b);
__DEVICE__ double __nv_powi(double __a, int __b);

View File

@ -36,7 +36,7 @@
// because the OpenMP overlay requires constexpr functions here but prior to
// c++14 void return functions could not be constexpr.
#pragma push_macro("__DEVICE_VOID__")
#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
#if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L
#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
#else
#define __DEVICE_VOID__ __DEVICE__
@ -45,9 +45,9 @@
// libdevice provides fast low precision and slow full-recision implementations
// for some functions. Which one gets selected depends on
// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
// -ffast-math or -fcuda-approx-transcendentals are in effect.
// -ffast-math or -fgpu-approx-transcendentals are in effect.
#pragma push_macro("__FAST_OR_SLOW")
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __FAST_OR_SLOW(fast, slow) fast
#else
#define __FAST_OR_SLOW(fast, slow) slow

View File

@ -196,12 +196,12 @@ inline __host__ double __signbitd(double x) {
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.
// sin and exp. This is controlled in clang by -fgpu-approx-transcendentals.
//
// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.
// slow divides), so we need to scope our define carefully here.
#pragma push_macro("__USE_FAST_MATH__")
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __USE_FAST_MATH__ 1
#endif

View File

@ -14,9 +14,6 @@
#endif
#if !defined(__HIPCC_RTC__)
#if defined(__cplusplus)
#include <algorithm>
#endif
#include <limits.h>
#include <stdint.h>
#ifdef __OPENMP_AMDGCN__
@ -32,6 +29,17 @@
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
#endif
// Device library provides fast low precision and slow full-recision
// implementations for some functions. Which one gets selected depends on
// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
// -ffast-math or -fgpu-approx-transcendentals are in effect.
#pragma push_macro("__FAST_OR_SLOW")
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __FAST_OR_SLOW(fast, slow) fast
#else
#define __FAST_OR_SLOW(fast, slow) slow
#endif
// A few functions return bool type starting only in C++11.
#pragma push_macro("__RETURN_TYPE")
#ifdef __OPENMP_AMDGCN__
@ -139,21 +147,180 @@ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
}
// BEGIN FLOAT
// BEGIN INTRINSICS
__DEVICE__
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
__DEVICE__
float __exp10f(float __x) {
const float __log2_10 = 0x1.a934f0p+1f;
return __builtin_amdgcn_exp2f(__log2_10 * __x);
}
__DEVICE__
float __expf(float __x) {
const float __log2_e = 0x1.715476p+0;
return __builtin_amdgcn_exp2f(__log2_e * __x);
}
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
__DEVICE__
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
__DEVICE__
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
__DEVICE__
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fadd_rn(float __x, float __y) { return __x + __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
__DEVICE__
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
__DEVICE__
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __x / __y; }
#endif
__DEVICE__
float __fdividef(float __x, float __y) { return __x / __y; }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmaf_rd(float __x, float __y, float __z) {
return __ocml_fma_rtn_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __ocml_fma_rte_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_ru(float __x, float __y, float __z) {
return __ocml_fma_rtp_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rz(float __x, float __y, float __z) {
return __ocml_fma_rtz_f32(__x, __y, __z);
}
#else
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __builtin_fmaf(__x, __y, __z);
}
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
__DEVICE__
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
__DEVICE__
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
__DEVICE__
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fmul_rn(float __x, float __y) { return __x * __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
__DEVICE__
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
__DEVICE__
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
__DEVICE__
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
#else
__DEVICE__
float __frcp_rn(float __x) { return 1.0f / __x; }
#endif
__DEVICE__
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
__DEVICE__
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
__DEVICE__
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
#else
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
__DEVICE__
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
__DEVICE__
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
__DEVICE__
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fsub_rn(float __x, float __y) { return __x - __y; }
#endif
__DEVICE__
float __log10f(float __x) { return __builtin_log10f(__x); }
__DEVICE__
float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
__DEVICE__
float __logf(float __x) { return __builtin_logf(__x); }
__DEVICE__
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
__DEVICE__
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
__DEVICE__
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
*__sinptr = __ocml_native_sin_f32(__x);
*__cosptr = __ocml_native_cos_f32(__x);
}
__DEVICE__
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
__DEVICE__
float __tanf(float __x) {
return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
}
// END INTRINSICS
#if defined(__cplusplus)
__DEVICE__
int abs(int __x) {
int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
return __builtin_abs(__x);
}
__DEVICE__
long labs(long __x) {
long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
return __builtin_labs(__x);
}
__DEVICE__
long long llabs(long long __x) {
long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
return __builtin_llabs(__x);
}
#endif
@ -188,7 +355,7 @@ __DEVICE__
float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
__DEVICE__
float cosf(float __x) { return __ocml_cos_f32(__x); }
float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
__DEVICE__
float coshf(float __x) { return __ocml_cosh_f32(__x); }
@ -321,13 +488,13 @@ __DEVICE__
float log1pf(float __x) { return __ocml_log1p_f32(__x); }
__DEVICE__
float log2f(float __x) { return __builtin_log2f(__x); }
float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); }
__DEVICE__
float logbf(float __x) { return __ocml_logb_f32(__x); }
__DEVICE__
float logf(float __x) { return __builtin_logf(__x); }
float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); }
__DEVICE__
long int lrintf(float __x) { return __builtin_rintf(__x); }
@ -401,7 +568,7 @@ float normf(int __dim,
++__a;
}
return __ocml_sqrt_f32(__r);
return __builtin_sqrtf(__r);
}
__DEVICE__
@ -483,9 +650,13 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
__sincosf(__x, __sinptr, __cosptr);
#else
*__sinptr =
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
#endif
}
__DEVICE__
@ -500,7 +671,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) {
}
__DEVICE__
float sinf(float __x) { return __ocml_sin_f32(__x); }
float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
__DEVICE__
float sinhf(float __x) { return __ocml_sinh_f32(__x); }
@ -509,7 +680,7 @@ __DEVICE__
float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
__DEVICE__
float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
float sqrtf(float __x) { return __builtin_sqrtf(__x); }
__DEVICE__
float tanf(float __x) { return __ocml_tan_f32(__x); }
@ -551,158 +722,7 @@ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
return __x1;
}
// BEGIN INTRINSICS
__DEVICE__
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
__DEVICE__
float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
__DEVICE__
float __expf(float __x) { return __ocml_native_exp_f32(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
__DEVICE__
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
__DEVICE__
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
__DEVICE__
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fadd_rn(float __x, float __y) { return __x + __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
__DEVICE__
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
__DEVICE__
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __x / __y; }
#endif
__DEVICE__
float __fdividef(float __x, float __y) { return __x / __y; }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmaf_rd(float __x, float __y, float __z) {
return __ocml_fma_rtn_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __ocml_fma_rte_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_ru(float __x, float __y, float __z) {
return __ocml_fma_rtp_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rz(float __x, float __y, float __z) {
return __ocml_fma_rtz_f32(__x, __y, __z);
}
#else
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __builtin_fmaf(__x, __y, __z);
}
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
__DEVICE__
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
__DEVICE__
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
__DEVICE__
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fmul_rn(float __x, float __y) { return __x * __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
__DEVICE__
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
__DEVICE__
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
__DEVICE__
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
#else
__DEVICE__
float __frcp_rn(float __x) { return 1.0f / __x; }
#endif
__DEVICE__
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
__DEVICE__
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
__DEVICE__
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
#else
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
__DEVICE__
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
__DEVICE__
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
__DEVICE__
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fsub_rn(float __x, float __y) { return __x - __y; }
#endif
__DEVICE__
float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
__DEVICE__
float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
__DEVICE__
float __logf(float __x) { return __ocml_native_log_f32(__x); }
__DEVICE__
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
__DEVICE__
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
__DEVICE__
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
*__sinptr = __ocml_native_sin_f32(__x);
*__cosptr = __ocml_native_cos_f32(__x);
}
__DEVICE__
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
__DEVICE__
float __tanf(float __x) { return __ocml_tan_f32(__x); }
// END INTRINSICS
// END FLOAT
// BEGIN DOUBLE
@ -941,7 +961,7 @@ double norm(int __dim,
++__a;
}
return __ocml_sqrt_f64(__r);
return __builtin_sqrt(__r);
}
__DEVICE__
@ -1064,7 +1084,7 @@ __DEVICE__
double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
__DEVICE__
double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
double sqrt(double __x) { return __builtin_sqrt(__x); }
__DEVICE__
double tan(double __x) { return __ocml_tan_f64(__x); }
@ -1198,7 +1218,7 @@ __DEVICE__
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
#else
__DEVICE__
double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
@ -1288,16 +1308,17 @@ double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
__host__ inline static int min(int __arg1, int __arg2) {
return std::min(__arg1, __arg2);
return __arg1 < __arg2 ? __arg1 : __arg2;
}
__host__ inline static int max(int __arg1, int __arg2) {
return std::max(__arg1, __arg2);
return __arg1 > __arg2 ? __arg1 : __arg2;
}
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
#endif
#pragma pop_macro("__DEVICE__")
#pragma pop_macro("__RETURN_TYPE")
#pragma pop_macro("__FAST_OR_SLOW")
#endif // __CLANG_HIP_MATH_H__

View File

@ -46,6 +46,67 @@ extern "C" {
}
#endif //__cplusplus
#if !defined(__HIPCC_RTC__)
#if __has_include("hip/hip_version.h")
#include "hip/hip_version.h"
#endif // __has_include("hip/hip_version.h")
#endif // __HIPCC_RTC__
typedef __SIZE_TYPE__ __hip_size_t;
#ifdef __cplusplus
extern "C" {
#endif //__cplusplus
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
#if __has_feature(address_sanitizer)
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
unsigned long long __pc);
__device__ void __asan_free_impl(unsigned long long __addr,
unsigned long long __pc);
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
return (void *)__asan_malloc_impl(__size, __pc);
}
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
__asan_free_impl((unsigned long long)__ptr, __pc);
}
#else // __has_feature(address_sanitizer)
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return (void *) __ockl_dm_alloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__ockl_dm_dealloc((unsigned long long)__ptr);
}
#endif // __has_feature(address_sanitizer)
#else // HIP version check
#if __HIP_ENABLE_DEVICE_MALLOC__
__device__ void *__hip_malloc(__hip_size_t __size);
__device__ void *__hip_free(void *__ptr);
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return __hip_malloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__hip_free(__ptr);
}
#else // __HIP_ENABLE_DEVICE_MALLOC__
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__builtin_trap();
return (void *)0;
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__builtin_trap();
}
#endif // __HIP_ENABLE_DEVICE_MALLOC__
#endif // HIP version check
#ifdef __cplusplus
} // extern "C"
#endif //__cplusplus
#if !defined(__HIPCC_RTC__)
#include <cmath>
#include <cstdlib>
@ -71,59 +132,6 @@ typedef __SIZE_TYPE__ size_t;
#define INT_MAX __INTMAX_MAX__
#endif // __HIPCC_RTC__
typedef __SIZE_TYPE__ __hip_size_t;
#ifdef __cplusplus
extern "C" {
#endif //__cplusplus
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
#if __has_feature(address_sanitizer)
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
return (void *)__asan_malloc_impl(__size, __pc);
}
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
__asan_free_impl((unsigned long long)__ptr, __pc);
}
#else
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return (void *) __ockl_dm_alloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__ockl_dm_dealloc((unsigned long long)__ptr);
}
#endif // __has_feature(address_sanitizer)
#else // HIP version check
#if __HIP_ENABLE_DEVICE_MALLOC__
__device__ void *__hip_malloc(__hip_size_t __size);
__device__ void *__hip_free(void *__ptr);
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return __hip_malloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__hip_free(__ptr);
}
#else
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__builtin_trap();
return (void *)0;
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__builtin_trap();
}
#endif
#endif // HIP version check
#ifdef __cplusplus
} // extern "C"
#endif //__cplusplus
#include <__clang_hip_libdevice_declares.h>
#include <__clang_hip_math.h>
#include <__clang_hip_stdlib.h>

13
lib/include/__stdarg___gnuc_va_list.h vendored Normal file
View File

@ -0,0 +1,13 @@
/*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __GNUC_VA_LIST
#define __GNUC_VA_LIST
typedef __builtin_va_list __gnuc_va_list;
#endif

12
lib/include/__stdarg___va_copy.h vendored Normal file
View File

@ -0,0 +1,12 @@
/*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __va_copy
#define __va_copy(d, s) __builtin_va_copy(d, s)
#endif

22
lib/include/__stdarg_va_arg.h vendored Normal file
View File

@ -0,0 +1,22 @@
/*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef va_arg
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
/* C23 does not require the second parameter for va_start. */
#define va_start(ap, ...) __builtin_va_start(ap, 0)
#else
/* Versions before C23 do require the second parameter. */
#define va_start(ap, param) __builtin_va_start(ap, param)
#endif
#define va_end(ap) __builtin_va_end(ap)
#define va_arg(ap, type) __builtin_va_arg(ap, type)
#endif

12
lib/include/__stdarg_va_copy.h vendored Normal file
View File

@ -0,0 +1,12 @@
/*===---- __stdarg_va_copy.h - Definition of va_copy------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef va_copy
#define va_copy(dest, src) __builtin_va_copy(dest, src)
#endif

13
lib/include/__stdarg_va_list.h vendored Normal file
View File

@ -0,0 +1,13 @@
/*===---- __stdarg_va_list.h - Definition of va_list -----------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef _VA_LIST
#define _VA_LIST
typedef __builtin_va_list va_list;
#endif

View File

@ -1,4 +1,4 @@
/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===
/*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.

29
lib/include/__stddef_null.h vendored Normal file
View File

@ -0,0 +1,29 @@
/*===---- __stddef_null.h - Definition of NULL -----------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if !defined(NULL) || !__building_module(_Builtin_stddef)
/* linux/stddef.h will define NULL to 0. glibc (and other) headers then define
* __need_NULL and rely on stddef.h to redefine NULL to the correct value again.
* Modules don't support redefining macros like that, but support that pattern
* in the non-modules case.
*/
#undef NULL
#ifdef __cplusplus
#if !defined(__MINGW32__) && !defined(_MSC_VER)
#define NULL __null
#else
#define NULL 0
#endif
#else
#define NULL ((void*)0)
#endif
#endif

29
lib/include/__stddef_nullptr_t.h vendored Normal file
View File

@ -0,0 +1,29 @@
/*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_NULLPTR_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _NULLPTR_T
#ifdef __cplusplus
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
namespace std {
typedef decltype(nullptr) nullptr_t;
}
using ::std::nullptr_t;
#endif
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
typedef typeof(nullptr) nullptr_t;
#endif
#endif

17
lib/include/__stddef_offsetof.h vendored Normal file
View File

@ -0,0 +1,17 @@
/*===---- __stddef_offsetof.h - Definition of offsetof ---------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(offsetof) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define offsetof(t, d) __builtin_offsetof(t, d)
#endif

20
lib/include/__stddef_ptrdiff_t.h vendored Normal file
View File

@ -0,0 +1,20 @@
/*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_PTRDIFF_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _PTRDIFF_T
typedef __PTRDIFF_TYPE__ ptrdiff_t;
#endif

20
lib/include/__stddef_rsize_t.h vendored Normal file
View File

@ -0,0 +1,20 @@
/*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_RSIZE_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _RSIZE_T
typedef __SIZE_TYPE__ rsize_t;
#endif

20
lib/include/__stddef_size_t.h vendored Normal file
View File

@ -0,0 +1,20 @@
/*===---- __stddef_size_t.h - Definition of size_t -------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_SIZE_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _SIZE_T
typedef __SIZE_TYPE__ size_t;
#endif

21
lib/include/__stddef_unreachable.h vendored Normal file
View File

@ -0,0 +1,21 @@
/*===---- __stddef_unreachable.h - Definition of unreachable ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __cplusplus
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(unreachable) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define unreachable() __builtin_unreachable()
#endif
#endif

28
lib/include/__stddef_wchar_t.h vendored Normal file
View File

@ -0,0 +1,28 @@
/*===---- __stddef_wchar.h - Definition of wchar_t -------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_WCHAR_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _WCHAR_T
#ifdef _MSC_EXTENSIONS
#define _WCHAR_T_DEFINED
#endif
typedef __WCHAR_TYPE__ wchar_t;
#endif
#endif

15
lib/include/__stddef_wint_t.h vendored Normal file
View File

@ -0,0 +1,15 @@
/*===---- __stddef_wint.h - Definition of wint_t ---------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef _WINT_T
#define _WINT_T
typedef __WINT_TYPE__ wint_t;
#endif

160
lib/include/adcintrin.h vendored Normal file
View File

@ -0,0 +1,160 @@
/*===---- adcintrin.h - ADC intrinsics -------------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __ADCINTRIN_H
#define __ADCINTRIN_H
#if !defined(__i386__) && !defined(__x86_64__)
#error "This header is only meant to be used on x86 and x64 architecture"
#endif
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
/* Use C++ inline semantics in C++, GNU inline for C mode. */
#if defined(__cplusplus)
#define __INLINE __inline
#else
#define __INLINE static __inline
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 32-bit unsigned addend.
/// \param __y
/// A 32-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c SBB instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 32-bit unsigned minuend.
/// \param __y
/// The 32-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 64-bit unsigned addend.
/// \param __y
/// A 64-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_addcarry_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 64-bit unsigned minuend.
/// \param __y
/// The 64-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_subborrow_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
}
#endif
#if defined(__cplusplus)
}
#endif
#undef __INLINE
#undef __DEFAULT_FN_ATTRS
#endif /* __ADCINTRIN_H */

View File

@ -15,7 +15,8 @@
#define __ADXINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
/* Use C++ inline semantics in C++, GNU inline for C mode. */
#if defined(__cplusplus)
@ -53,9 +54,9 @@ extern "C" {
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
@ -84,144 +85,18 @@ __INLINE unsigned char
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
__INLINE unsigned char __DEFAULT_FN_ATTRS
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif
/* Intrinsics that are also available if __ADX__ is undefined. */
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 32-bit unsigned addend.
/// \param __y
/// A 32-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 64-bit unsigned addend.
/// \param __y
/// A 64-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_addcarry_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c SBB instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 32-bit unsigned minuend.
/// \param __y
/// The 32-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 64-bit unsigned minuend.
/// \param __y
/// The 64-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_subborrow_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
}
#endif
#if defined(__cplusplus)
}
#endif
#undef __INLINE
#undef __DEFAULT_FN_ATTRS
#endif /* __ADXINTRIN_H */

43
lib/include/altivec.h vendored
View File

@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
int __b) {
vector signed char __res = (vector signed char)(0);
__res[__b & 0x7] = __a;
const vector signed char __zero = (vector signed char)0;
vector signed char __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_promote(unsigned char __a, int __b) {
vector unsigned char __res = (vector unsigned char)(0);
__res[__b & 0x7] = __a;
const vector unsigned char __zero = (vector unsigned char)(0);
vector unsigned char __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
vector short __res = (vector short)(0);
const vector short __zero = (vector short)(0);
vector short __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_promote(unsigned short __a, int __b) {
vector unsigned short __res = (vector unsigned short)(0);
const vector unsigned short __zero = (vector unsigned short)(0);
vector unsigned short __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
vector int __res = (vector int)(0);
const vector int __zero = (vector int)(0);
vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
int __b) {
vector unsigned int __res = (vector unsigned int)(0);
const vector unsigned int __zero = (vector unsigned int)(0);
vector unsigned int __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
vector float __res = (vector float)(0);
const vector float __zero = (vector float)(0);
vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
#ifdef __VSX__
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
vector double __res = (vector double)(0);
const vector double __zero = (vector double)(0);
vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector signed long long __ATTRS_o_ai
vec_promote(signed long long __a, int __b) {
vector signed long long __res = (vector signed long long)(0);
const vector signed long long __zero = (vector signed long long)(0);
vector signed long long __res =
__builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_promote(unsigned long long __a, int __b) {
vector unsigned long long __res = (vector unsigned long long)(0);
const vector unsigned long long __zero = (vector unsigned long long)(0);
vector unsigned long long __res =
__builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}

View File

@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y)
/// \param __a
/// The 64-bit double-precision floating-point register value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_sd(double *__p, __m128d __a)
_mm_stream_sd(void *__p, __m128d __a)
{
__builtin_ia32_movntsd(__p, (__v2df)__a);
__builtin_ia32_movntsd((double *)__p, (__v2df)__a);
}
/// Stores a 32-bit single-precision floating-point value in a 32-bit
@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a)
/// \param __a
/// The 32-bit single-precision floating-point register value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_ss(float *__p, __m128 __a)
_mm_stream_ss(void *__p, __m128 __a)
{
__builtin_ia32_movntss(__p, (__v4sf)__a);
__builtin_ia32_movntss((float *)__p, (__v4sf)__a);
}
#undef __DEFAULT_FN_ATTRS

142
lib/include/arm_acle.h vendored
View File

@ -4,6 +4,13 @@
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* The Arm C Language Extensions specifications can be found in the following
* link: https://github.com/ARM-software/acle/releases
*
* The ACLE section numbers are subject to change. When consulting the
* specifications, it is recommended to search using section titles if
* the section numbers look outdated.
*
*===-----------------------------------------------------------------------===
*/
@ -20,8 +27,8 @@
extern "C" {
#endif
/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
/* 8.3 Memory barriers */
/* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
/* 7.3 Memory barriers */
#if !__has_builtin(__dmb)
#define __dmb(i) __builtin_arm_dmb(i)
#endif
@ -32,7 +39,7 @@ extern "C" {
#define __isb(i) __builtin_arm_isb(i)
#endif
/* 8.4 Hints */
/* 7.4 Hints */
#if !__has_builtin(__wfi)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
@ -68,7 +75,7 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
#define __dbg(t) __builtin_arm_dbg(t)
#endif
/* 8.5 Swap */
/* 7.5 Swap */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__swp(uint32_t __x, volatile uint32_t *__p) {
uint32_t v;
@ -78,8 +85,8 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
return v;
}
/* 8.6 Memory prefetch intrinsics */
/* 8.6.1 Data prefetch */
/* 7.6 Memory prefetch intrinsics */
/* 7.6.1 Data prefetch */
#define __pld(addr) __pldx(0, 0, 0, addr)
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@ -90,7 +97,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
#endif
/* 8.6.2 Instruction prefetch */
/* 7.6.2 Instruction prefetch */
#define __pli(addr) __plix(0, 0, addr)
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@ -101,15 +108,15 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
__builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
#endif
/* 8.7 NOP */
/* 7.7 NOP */
#if !defined(_MSC_VER) || !defined(__aarch64__)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
__builtin_arm_nop();
}
#endif
/* 9 DATA-PROCESSING INTRINSICS */
/* 9.2 Miscellaneous data-processing intrinsics */
/* 8 DATA-PROCESSING INTRINSICS */
/* 8.2 Miscellaneous data-processing intrinsics */
/* ROR */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__ror(uint32_t __x, uint32_t __y) {
@ -248,9 +255,7 @@ __rbitl(unsigned long __t) {
#endif
}
/*
* 9.3 16-bit multiplications
*/
/* 8.3 16-bit multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
__smulbb(int32_t __a, int32_t __b) {
@ -279,18 +284,18 @@ __smulwt(int32_t __a, int32_t __b) {
#endif
/*
* 9.4 Saturating intrinsics
* 8.4 Saturating intrinsics
*
* FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
* intrinsics are implemented and the flag is enabled.
*/
/* 9.4.1 Width-specified saturation intrinsics */
/* 8.4.1 Width-specified saturation intrinsics */
#if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
#define __ssat(x, y) __builtin_arm_ssat(x, y)
#define __usat(x, y) __builtin_arm_usat(x, y)
#endif
/* 9.4.2 Saturating addition and subtraction intrinsics */
/* 8.4.2 Saturating addition and subtraction intrinsics */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qadd(int32_t __t, int32_t __v) {
@ -308,7 +313,7 @@ __qdbl(int32_t __t) {
}
#endif
/* 9.4.3 Accumultating multiplications */
/* 8.4.3 Accumultating multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
@ -337,13 +342,13 @@ __smlawt(int32_t __a, int32_t __b, int32_t __c) {
#endif
/* 9.5.4 Parallel 16-bit saturation */
/* 8.5.4 Parallel 16-bit saturation */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
#define __usat16(x, y) __builtin_arm_usat16(x, y)
#endif
/* 9.5.5 Packing and unpacking */
/* 8.5.5 Packing and unpacking */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
typedef int32_t int8x4_t;
typedef int32_t int16x2_t;
@ -368,7 +373,7 @@ __uxtb16(int8x4_t __a) {
}
#endif
/* 9.5.6 Parallel selection */
/* 8.5.6 Parallel selection */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
__sel(uint8x4_t __a, uint8x4_t __b) {
@ -376,7 +381,7 @@ __sel(uint8x4_t __a, uint8x4_t __b) {
}
#endif
/* 9.5.7 Parallel 8-bit addition and subtraction */
/* 8.5.7 Parallel 8-bit addition and subtraction */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
__qadd8(int8x4_t __a, int8x4_t __b) {
@ -428,7 +433,7 @@ __usub8(uint8x4_t __a, uint8x4_t __b) {
}
#endif
/* 9.5.8 Sum of 8-bit absolute differences */
/* 8.5.8 Sum of 8-bit absolute differences */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__usad8(uint8x4_t __a, uint8x4_t __b) {
@ -440,7 +445,7 @@ __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
}
#endif
/* 9.5.9 Parallel 16-bit addition and subtraction */
/* 8.5.9 Parallel 16-bit addition and subtraction */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
__qadd16(int16x2_t __a, int16x2_t __b) {
@ -540,7 +545,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) {
}
#endif
/* 9.5.10 Parallel 16-bit multiplications */
/* 8.5.10 Parallel 16-bit multiplications */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
@ -592,7 +597,22 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
}
#endif
/* 9.7 CRC32 intrinsics */
/* 8.6 Floating-point data-processing intrinsics */
#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING) && \
(__ARM_FEATURE_DIRECTED_ROUNDING)) && \
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ double __attribute__((__always_inline__, __nodebug__))
__rintn(double __a) {
return __builtin_roundeven(__a);
}
static __inline__ float __attribute__((__always_inline__, __nodebug__))
__rintnf(float __a) {
return __builtin_roundevenf(__a);
}
#endif
/* 8.8 CRC32 intrinsics */
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
@ -636,6 +656,7 @@ __crc32cd(uint32_t __a, uint64_t __b) {
}
#endif
/* 8.6 Floating-point data-processing intrinsics */
/* Armv8.3-A Javascript conversion intrinsic */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
@ -687,7 +708,7 @@ __rint64x(double __a) {
}
#endif
/* Armv8.7-A load/store 64-byte intrinsics */
/* 8.9 Armv8.7-A load/store 64-byte intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
typedef struct {
uint64_t val[8];
@ -713,7 +734,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
}
#endif
/* 10.1 Special register intrinsics */
/* 11.1 Special register intrinsics */
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
#define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg)
@ -727,7 +748,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
/* Memory Tagging Extensions (MTE) Intrinsics */
/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
@ -736,12 +757,71 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
/* Memory Operations Intrinsics */
/* 18 Memory Operations Intrinsics */
#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
__builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
#endif
/* Transactional Memory Extension (TME) Intrinsics */
/* 11.3 Coprocessor Intrinsics */
#if defined(__ARM_FEATURE_COPROC)
#if (__ARM_FEATURE_COPROC & 0x1)
#if (__ARM_ARCH < 8)
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
#endif /* __ARM_ARCH < 8 */
#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p)
#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p)
#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \
__builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2)
#define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \
__builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2)
#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8)
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
#endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */
#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__)
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
#endif /* ___ARM_ARCH_8M_MAIN__ */
#endif /* __ARM_FEATURE_COPROC & 0x1 */
#if (__ARM_FEATURE_COPROC & 0x2)
#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \
__builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)
#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p)
#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p)
#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p)
#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p)
#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \
__builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)
#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \
__builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2)
#endif
#if (__ARM_FEATURE_COPROC & 0x4)
#define __arm_mcrr(coproc, opc1, value, CRm) \
__builtin_arm_mcrr(coproc, opc1, value, CRm)
#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm)
#endif
#if (__ARM_FEATURE_COPROC & 0x8)
#define __arm_mcrr2(coproc, opc1, value, CRm) \
__builtin_arm_mcrr2(coproc, opc1, value, CRm)
#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm)
#endif
#endif // __ARM_FEATURE_COPROC
/* 17 Transactional Memory Extension (TME) Intrinsics */
#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
#define _TMFAILURE_REASON 0x00007fffu
@ -763,7 +843,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#endif /* __ARM_FEATURE_TME */
/* Armv8.5-A Random number generation intrinsics */
/* 8.7 Armv8.5-A Random number generation intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
__rndr(uint64_t *__p) {

412
lib/include/arm_neon.h vendored
View File

@ -35,12 +35,7 @@
#include <stdint.h>
#include <arm_bf16.h>
typedef float float32_t;
typedef __fp16 float16_t;
#ifdef __aarch64__
typedef double float64_t;
#endif
#include <arm_vector_types.h>
#ifdef __aarch64__
typedef uint8_t poly8_t;
typedef uint16_t poly16_t;
@ -51,30 +46,6 @@ typedef int8_t poly8_t;
typedef int16_t poly16_t;
typedef int64_t poly64_t;
#endif
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
#ifdef __aarch64__
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
#endif
typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
@ -82,96 +53,6 @@ typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
typedef struct int8x8x2_t {
int8x8_t val[2];
} int8x8x2_t;
typedef struct int8x16x2_t {
int8x16_t val[2];
} int8x16x2_t;
typedef struct int16x4x2_t {
int16x4_t val[2];
} int16x4x2_t;
typedef struct int16x8x2_t {
int16x8_t val[2];
} int16x8x2_t;
typedef struct int32x2x2_t {
int32x2_t val[2];
} int32x2x2_t;
typedef struct int32x4x2_t {
int32x4_t val[2];
} int32x4x2_t;
typedef struct int64x1x2_t {
int64x1_t val[2];
} int64x1x2_t;
typedef struct int64x2x2_t {
int64x2_t val[2];
} int64x2x2_t;
typedef struct uint8x8x2_t {
uint8x8_t val[2];
} uint8x8x2_t;
typedef struct uint8x16x2_t {
uint8x16_t val[2];
} uint8x16x2_t;
typedef struct uint16x4x2_t {
uint16x4_t val[2];
} uint16x4x2_t;
typedef struct uint16x8x2_t {
uint16x8_t val[2];
} uint16x8x2_t;
typedef struct uint32x2x2_t {
uint32x2_t val[2];
} uint32x2x2_t;
typedef struct uint32x4x2_t {
uint32x4_t val[2];
} uint32x4x2_t;
typedef struct uint64x1x2_t {
uint64x1_t val[2];
} uint64x1x2_t;
typedef struct uint64x2x2_t {
uint64x2_t val[2];
} uint64x2x2_t;
typedef struct float16x4x2_t {
float16x4_t val[2];
} float16x4x2_t;
typedef struct float16x8x2_t {
float16x8_t val[2];
} float16x8x2_t;
typedef struct float32x2x2_t {
float32x2_t val[2];
} float32x2x2_t;
typedef struct float32x4x2_t {
float32x4_t val[2];
} float32x4x2_t;
#ifdef __aarch64__
typedef struct float64x1x2_t {
float64x1_t val[2];
} float64x1x2_t;
typedef struct float64x2x2_t {
float64x2_t val[2];
} float64x2x2_t;
#endif
typedef struct poly8x8x2_t {
poly8x8_t val[2];
} poly8x8x2_t;
@ -196,96 +77,6 @@ typedef struct poly64x2x2_t {
poly64x2_t val[2];
} poly64x2x2_t;
typedef struct int8x8x3_t {
int8x8_t val[3];
} int8x8x3_t;
typedef struct int8x16x3_t {
int8x16_t val[3];
} int8x16x3_t;
typedef struct int16x4x3_t {
int16x4_t val[3];
} int16x4x3_t;
typedef struct int16x8x3_t {
int16x8_t val[3];
} int16x8x3_t;
typedef struct int32x2x3_t {
int32x2_t val[3];
} int32x2x3_t;
typedef struct int32x4x3_t {
int32x4_t val[3];
} int32x4x3_t;
typedef struct int64x1x3_t {
int64x1_t val[3];
} int64x1x3_t;
typedef struct int64x2x3_t {
int64x2_t val[3];
} int64x2x3_t;
typedef struct uint8x8x3_t {
uint8x8_t val[3];
} uint8x8x3_t;
typedef struct uint8x16x3_t {
uint8x16_t val[3];
} uint8x16x3_t;
typedef struct uint16x4x3_t {
uint16x4_t val[3];
} uint16x4x3_t;
typedef struct uint16x8x3_t {
uint16x8_t val[3];
} uint16x8x3_t;
typedef struct uint32x2x3_t {
uint32x2_t val[3];
} uint32x2x3_t;
typedef struct uint32x4x3_t {
uint32x4_t val[3];
} uint32x4x3_t;
typedef struct uint64x1x3_t {
uint64x1_t val[3];
} uint64x1x3_t;
typedef struct uint64x2x3_t {
uint64x2_t val[3];
} uint64x2x3_t;
typedef struct float16x4x3_t {
float16x4_t val[3];
} float16x4x3_t;
typedef struct float16x8x3_t {
float16x8_t val[3];
} float16x8x3_t;
typedef struct float32x2x3_t {
float32x2_t val[3];
} float32x2x3_t;
typedef struct float32x4x3_t {
float32x4_t val[3];
} float32x4x3_t;
#ifdef __aarch64__
typedef struct float64x1x3_t {
float64x1_t val[3];
} float64x1x3_t;
typedef struct float64x2x3_t {
float64x2_t val[3];
} float64x2x3_t;
#endif
typedef struct poly8x8x3_t {
poly8x8_t val[3];
} poly8x8x3_t;
@ -310,96 +101,6 @@ typedef struct poly64x2x3_t {
poly64x2_t val[3];
} poly64x2x3_t;
typedef struct int8x8x4_t {
int8x8_t val[4];
} int8x8x4_t;
typedef struct int8x16x4_t {
int8x16_t val[4];
} int8x16x4_t;
typedef struct int16x4x4_t {
int16x4_t val[4];
} int16x4x4_t;
typedef struct int16x8x4_t {
int16x8_t val[4];
} int16x8x4_t;
typedef struct int32x2x4_t {
int32x2_t val[4];
} int32x2x4_t;
typedef struct int32x4x4_t {
int32x4_t val[4];
} int32x4x4_t;
typedef struct int64x1x4_t {
int64x1_t val[4];
} int64x1x4_t;
typedef struct int64x2x4_t {
int64x2_t val[4];
} int64x2x4_t;
typedef struct uint8x8x4_t {
uint8x8_t val[4];
} uint8x8x4_t;
typedef struct uint8x16x4_t {
uint8x16_t val[4];
} uint8x16x4_t;
typedef struct uint16x4x4_t {
uint16x4_t val[4];
} uint16x4x4_t;
typedef struct uint16x8x4_t {
uint16x8_t val[4];
} uint16x8x4_t;
typedef struct uint32x2x4_t {
uint32x2_t val[4];
} uint32x2x4_t;
typedef struct uint32x4x4_t {
uint32x4_t val[4];
} uint32x4x4_t;
typedef struct uint64x1x4_t {
uint64x1_t val[4];
} uint64x1x4_t;
typedef struct uint64x2x4_t {
uint64x2_t val[4];
} uint64x2x4_t;
typedef struct float16x4x4_t {
float16x4_t val[4];
} float16x4x4_t;
typedef struct float16x8x4_t {
float16x8_t val[4];
} float16x8x4_t;
typedef struct float32x2x4_t {
float32x2_t val[4];
} float32x2x4_t;
typedef struct float32x4x4_t {
float32x4_t val[4];
} float32x4x4_t;
#ifdef __aarch64__
typedef struct float64x1x4_t {
float64x1_t val[4];
} float64x1x4_t;
typedef struct float64x2x4_t {
float64x2_t val[4];
} float64x2x4_t;
#endif
typedef struct poly8x8x4_t {
poly8x8_t val[4];
} poly8x8x4_t;
@ -424,33 +125,6 @@ typedef struct poly64x2x4_t {
poly64x2_t val[4];
} poly64x2x4_t;
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
typedef struct bfloat16x4x2_t {
bfloat16x4_t val[2];
} bfloat16x4x2_t;
typedef struct bfloat16x8x2_t {
bfloat16x8_t val[2];
} bfloat16x8x2_t;
typedef struct bfloat16x4x3_t {
bfloat16x4_t val[3];
} bfloat16x4x3_t;
typedef struct bfloat16x8x3_t {
bfloat16x8_t val[3];
} bfloat16x8x3_t;
typedef struct bfloat16x4x4_t {
bfloat16x4_t val[4];
} bfloat16x4x4_t;
typedef struct bfloat16x8x4_t {
bfloat16x8_t val[4];
} bfloat16x8x4_t;
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
#ifdef __LITTLE_ENDIAN__
@ -66600,6 +66274,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32x_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10);
return __ret;
}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) {
float32x4_t __ret;
@ -66632,6 +66327,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32z_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10);
return __ret;
}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) {
float32x4_t __ret;
@ -66664,6 +66380,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64x_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10);
return __ret;
}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) {
float32x4_t __ret;
@ -66696,6 +66433,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64z_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10);
return __ret;
}
#endif
#if defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)
#ifdef __LITTLE_ENDIAN__

2412
lib/include/arm_sme.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,642 +0,0 @@
/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics ------===
*
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __ARM_SME_H
#define __ARM_SME_H
#if !defined(__LITTLE_ENDIAN__)
#error "Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h"
#endif
#include <arm_sve.h>
/* Function attributes */
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__))
#ifdef __cplusplus
extern "C" {
#endif
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsb(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsd(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsh(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsw(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128), arm_streaming, arm_shared_za))
void svld1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16), arm_streaming, arm_shared_za))
void svld1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32), arm_streaming, arm_shared_za))
void svld1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64), arm_streaming, arm_shared_za))
void svld1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8), arm_streaming, arm_shared_za))
void svld1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128), arm_streaming, arm_shared_za))
void svld1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16), arm_streaming, arm_shared_za))
void svld1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32), arm_streaming, arm_shared_za))
void svld1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64), arm_streaming, arm_shared_za))
void svld1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8), arm_streaming, arm_shared_za))
void svld1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za), arm_streaming_compatible, arm_shared_za))
void svzero_mask_za(uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za), arm_streaming_compatible, arm_shared_za))
void svzero_za();
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za), arm_streaming_compatible, arm_shared_za))
void svldr_vnum_za(uint32_t, uint64_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za), arm_streaming_compatible, arm_shared_za, arm_preserves_za))
void svstr_vnum_za(uint32_t, uint64_t, void *);
#ifdef __cplusplus
} // extern "C"
#endif
#undef __ai
#endif /* __ARM_SME_H */

7829
lib/include/arm_sve.h vendored

File diff suppressed because it is too large Load Diff

345
lib/include/arm_vector_types.h vendored Normal file
View File

@ -0,0 +1,345 @@
/*===---- arm_vector_types - ARM vector type ------===
*
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)
#error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead"
#endif
#ifndef __ARM_NEON_TYPES_H
#define __ARM_NEON_TYPES_H
typedef float float32_t;
typedef __fp16 float16_t;
#ifdef __aarch64__
typedef double float64_t;
#endif
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
#ifdef __aarch64__
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
#endif
typedef struct int8x8x2_t {
int8x8_t val[2];
} int8x8x2_t;
typedef struct int8x16x2_t {
int8x16_t val[2];
} int8x16x2_t;
typedef struct int16x4x2_t {
int16x4_t val[2];
} int16x4x2_t;
typedef struct int16x8x2_t {
int16x8_t val[2];
} int16x8x2_t;
typedef struct int32x2x2_t {
int32x2_t val[2];
} int32x2x2_t;
typedef struct int32x4x2_t {
int32x4_t val[2];
} int32x4x2_t;
typedef struct int64x1x2_t {
int64x1_t val[2];
} int64x1x2_t;
typedef struct int64x2x2_t {
int64x2_t val[2];
} int64x2x2_t;
typedef struct uint8x8x2_t {
uint8x8_t val[2];
} uint8x8x2_t;
typedef struct uint8x16x2_t {
uint8x16_t val[2];
} uint8x16x2_t;
typedef struct uint16x4x2_t {
uint16x4_t val[2];
} uint16x4x2_t;
typedef struct uint16x8x2_t {
uint16x8_t val[2];
} uint16x8x2_t;
typedef struct uint32x2x2_t {
uint32x2_t val[2];
} uint32x2x2_t;
typedef struct uint32x4x2_t {
uint32x4_t val[2];
} uint32x4x2_t;
typedef struct uint64x1x2_t {
uint64x1_t val[2];
} uint64x1x2_t;
typedef struct uint64x2x2_t {
uint64x2_t val[2];
} uint64x2x2_t;
typedef struct float16x4x2_t {
float16x4_t val[2];
} float16x4x2_t;
typedef struct float16x8x2_t {
float16x8_t val[2];
} float16x8x2_t;
typedef struct float32x2x2_t {
float32x2_t val[2];
} float32x2x2_t;
typedef struct float32x4x2_t {
float32x4_t val[2];
} float32x4x2_t;
#ifdef __aarch64__
typedef struct float64x1x2_t {
float64x1_t val[2];
} float64x1x2_t;
typedef struct float64x2x2_t {
float64x2_t val[2];
} float64x2x2_t;
#endif
typedef struct int8x8x3_t {
int8x8_t val[3];
} int8x8x3_t;
typedef struct int8x16x3_t {
int8x16_t val[3];
} int8x16x3_t;
typedef struct int16x4x3_t {
int16x4_t val[3];
} int16x4x3_t;
typedef struct int16x8x3_t {
int16x8_t val[3];
} int16x8x3_t;
typedef struct int32x2x3_t {
int32x2_t val[3];
} int32x2x3_t;
typedef struct int32x4x3_t {
int32x4_t val[3];
} int32x4x3_t;
typedef struct int64x1x3_t {
int64x1_t val[3];
} int64x1x3_t;
typedef struct int64x2x3_t {
int64x2_t val[3];
} int64x2x3_t;
typedef struct uint8x8x3_t {
uint8x8_t val[3];
} uint8x8x3_t;
typedef struct uint8x16x3_t {
uint8x16_t val[3];
} uint8x16x3_t;
typedef struct uint16x4x3_t {
uint16x4_t val[3];
} uint16x4x3_t;
typedef struct uint16x8x3_t {
uint16x8_t val[3];
} uint16x8x3_t;
typedef struct uint32x2x3_t {
uint32x2_t val[3];
} uint32x2x3_t;
typedef struct uint32x4x3_t {
uint32x4_t val[3];
} uint32x4x3_t;
typedef struct uint64x1x3_t {
uint64x1_t val[3];
} uint64x1x3_t;
typedef struct uint64x2x3_t {
uint64x2_t val[3];
} uint64x2x3_t;
typedef struct float16x4x3_t {
float16x4_t val[3];
} float16x4x3_t;
typedef struct float16x8x3_t {
float16x8_t val[3];
} float16x8x3_t;
typedef struct float32x2x3_t {
float32x2_t val[3];
} float32x2x3_t;
typedef struct float32x4x3_t {
float32x4_t val[3];
} float32x4x3_t;
#ifdef __aarch64__
typedef struct float64x1x3_t {
float64x1_t val[3];
} float64x1x3_t;
typedef struct float64x2x3_t {
float64x2_t val[3];
} float64x2x3_t;
#endif
typedef struct int8x8x4_t {
int8x8_t val[4];
} int8x8x4_t;
typedef struct int8x16x4_t {
int8x16_t val[4];
} int8x16x4_t;
typedef struct int16x4x4_t {
int16x4_t val[4];
} int16x4x4_t;
typedef struct int16x8x4_t {
int16x8_t val[4];
} int16x8x4_t;
typedef struct int32x2x4_t {
int32x2_t val[4];
} int32x2x4_t;
typedef struct int32x4x4_t {
int32x4_t val[4];
} int32x4x4_t;
typedef struct int64x1x4_t {
int64x1_t val[4];
} int64x1x4_t;
typedef struct int64x2x4_t {
int64x2_t val[4];
} int64x2x4_t;
typedef struct uint8x8x4_t {
uint8x8_t val[4];
} uint8x8x4_t;
typedef struct uint8x16x4_t {
uint8x16_t val[4];
} uint8x16x4_t;
typedef struct uint16x4x4_t {
uint16x4_t val[4];
} uint16x4x4_t;
typedef struct uint16x8x4_t {
uint16x8_t val[4];
} uint16x8x4_t;
typedef struct uint32x2x4_t {
uint32x2_t val[4];
} uint32x2x4_t;
typedef struct uint32x4x4_t {
uint32x4_t val[4];
} uint32x4x4_t;
typedef struct uint64x1x4_t {
uint64x1_t val[4];
} uint64x1x4_t;
typedef struct uint64x2x4_t {
uint64x2_t val[4];
} uint64x2x4_t;
typedef struct float16x4x4_t {
float16x4_t val[4];
} float16x4x4_t;
typedef struct float16x8x4_t {
float16x8_t val[4];
} float16x8x4_t;
typedef struct float32x2x4_t {
float32x2_t val[4];
} float32x2x4_t;
typedef struct float32x4x4_t {
float32x4_t val[4];
} float32x4x4_t;
#ifdef __aarch64__
typedef struct float64x1x4_t {
float64x1_t val[4];
} float64x1x4_t;
typedef struct float64x2x4_t {
float64x2_t val[4];
} float64x2x4_t;
#endif
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
typedef struct bfloat16x4x2_t {
bfloat16x4_t val[2];
} bfloat16x4x2_t;
typedef struct bfloat16x8x2_t {
bfloat16x8_t val[2];
} bfloat16x8x2_t;
typedef struct bfloat16x4x3_t {
bfloat16x4_t val[3];
} bfloat16x4x3_t;
typedef struct bfloat16x8x3_t {
bfloat16x8_t val[3];
} bfloat16x8x3_t;
typedef struct bfloat16x4x4_t {
bfloat16x4_t val[4];
} bfloat16x4x4_t;
typedef struct bfloat16x8x4_t {
bfloat16x8_t val[4];
} bfloat16x8x4_t;
#endif // __ARM_NEON_TYPES_H

View File

@ -15,8 +15,12 @@
#define __AVX2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx2,no-evex512"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx2,no-evex512"), __min_vector_width__(128)))
/* SSE4 Multiple Packed Sums of Absolute Difference. */
/// Computes sixteen sum of absolute difference (SAD) operations on sets of
@ -1307,6 +1311,23 @@ _mm256_min_epu32(__m256i __a, __m256i __b)
return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
}
/// Creates a 32-bit integer mask from the most significant bit of each byte
/// in the 256-bit integer vector in \a __a and returns the result.
///
/// \code{.operation}
/// FOR i := 0 TO 31
/// j := i*8
/// result[i] := __a[j+7]
/// ENDFOR
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c VPMOVMSKB instruction.
///
/// \param __a
/// A 256-bit integer vector containing the source bytes.
/// \returns The 32-bit integer mask.
static __inline__ int __DEFAULT_FN_ATTRS256
_mm256_movemask_epi8(__m256i __a)
{
@ -2962,7 +2983,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
/// A pointer to the 32-byte aligned memory containing the vector to load.
/// \returns A 256-bit integer vector loaded from memory.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_stream_load_si256(__m256i const *__V)
_mm256_stream_load_si256(const void *__V)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);

View File

@ -20,10 +20,11 @@ typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \
__min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bf16,no-evex512")))
/// Convert One BF16 Data to One Single Float Data.
///

View File

@ -15,7 +15,10 @@
#define __AVX512BITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bitalg,evex512"), \
__min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)

View File

@ -18,8 +18,12 @@ typedef unsigned int __mmask32;
typedef unsigned long long __mmask64;
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,no-evex512")))
static __inline __mmask32 __DEFAULT_FN_ATTRS
_knot_mask32(__mmask32 __M)
@ -27,9 +31,7 @@ _knot_mask32(__mmask32 __M)
return __builtin_ia32_knotsi(__M);
}
static __inline __mmask64 __DEFAULT_FN_ATTRS
_knot_mask64(__mmask64 __M)
{
static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
return __builtin_ia32_knotdi(__M);
}
@ -39,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kand_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
}
@ -51,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kandn_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
}
@ -63,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
}
@ -75,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
}
@ -87,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}
@ -112,14 +109,12 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
@ -148,14 +143,12 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}
@ -171,9 +164,8 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kadd_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
}
@ -214,8 +206,7 @@ _load_mask32(__mmask32 *__A) {
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_load_mask64(__mmask64 *__A) {
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
}
@ -224,8 +215,8 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
}
static __inline__ void __DEFAULT_FN_ATTRS
_store_mask64(__mmask64 *__A, __mmask64 __B) {
static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
__mmask64 __B) {
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
}
@ -1714,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
(__v64qi) _mm512_setzero_si512());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
__mmask64 __B) {
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}

View File

@ -15,7 +15,9 @@
#define __AVX512CDINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512cd,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_conflict_epi64 (__m512i __A)

View File

@ -15,8 +15,10 @@
#define __AVX512DQINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512dq,no-evex512")))
static __inline __mmask8 __DEFAULT_FN_ATTRS
_knot_mask8(__mmask8 __M)

View File

@ -167,9 +167,13 @@ typedef enum
} _MM_MANTISSA_SIGN_ENUM;
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,no-evex512"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,no-evex512")))
/* Create vectors with repeated elements */

View File

@ -22,13 +22,15 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
__min_vector_width__(512)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16,no-evex512"), \
__min_vector_width__(128)))
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {

View File

@ -15,7 +15,9 @@
#define __IFMAINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512ifma,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)

View File

@ -15,8 +15,14 @@
#define __IFMAVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512ifma,avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512ifma,avx512vl,no-evex512"), \
__min_vector_width__(256)))
#define _mm_madd52hi_epu64(X, Y, Z) \
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \

View File

@ -14,9 +14,6 @@
#ifndef __AVX512PFINTRIN_H
#define __AVX512PFINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(void const *)(addr), (int)(scale), \
@ -92,6 +89,4 @@
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -15,7 +15,7 @@
#define __AVX512VBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS

View File

@ -15,8 +15,9 @@
#define __VBMIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)

View File

@ -15,9 +15,14 @@
#define __VBMIVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)

View File

@ -17,10 +17,12 @@
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(128)))
__target__("avx512vl,avx512bf16,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(256)))
__target__("avx512vl,avx512bf16,no-evex512"), \
__min_vector_width__(256)))
/// Convert Two Packed Single Data to One Packed BF16 Data.
///

View File

@ -15,8 +15,14 @@
#define __AVX512VLBITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A)

View File

@ -15,8 +15,14 @@
#define __AVX512VLBWINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bw,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bw,no-evex512"), \
__min_vector_width__(256)))
/* Integer compare */

View File

@ -14,9 +14,14 @@
#define __AVX512VLCDINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmb_epi64 (__mmask8 __A)

View File

@ -15,8 +15,14 @@
#define __AVX512VLDQINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512dq,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512dq,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {

View File

@ -19,11 +19,11 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16, avx512vl"), \
__target__("avx512fp16,avx512vl,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16, avx512vl"), \
__target__("avx512fp16,avx512vl,no-evex512"), \
__min_vector_width__(128)))
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {

View File

@ -14,8 +14,14 @@
#ifndef __AVX512VLINTRIN_H
#define __AVX512VLINTRIN_H
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,no-evex512"), \
__min_vector_width__(256)))
typedef short __v2hi __attribute__((__vector_size__(4)));
typedef char __v4qi __attribute__((__vector_size__(4)));

View File

@ -15,8 +15,14 @@
#define __AVX512VLVBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vbmi2,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vbmi2,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)

View File

@ -15,8 +15,14 @@
#define __AVX512VLVNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vnni,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vnni,no-evex512"), \
__min_vector_width__(256)))
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed

View File

@ -29,11 +29,13 @@
#define _AVX512VLVP2INTERSECT_H
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
__min_vector_width__(256)))
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between dwords in operands __a and __b.

View File

@ -15,8 +15,9 @@
#define __AVX512VNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vnni,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)

View File

@ -29,7 +29,8 @@
#define _AVX512VP2INTERSECT_H
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vp2intersect,evex512"), \
__min_vector_width__(512)))
/// Store, in an even/odd pair of mask registers, the indicators of the

View File

@ -17,7 +17,9 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vpopcntdq,evex512"), \
__min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);

View File

@ -17,9 +17,13 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi64(__m128i __A) {

View File

@ -50,8 +50,12 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
#endif
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
__min_vector_width__(128)))
/* Arithmetic */
/// Adds two 256-bit vectors of [4 x double].
@ -3563,7 +3567,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
/// \param __b
/// A 256-bit integer vector containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_si256(__m256i *__a, __m256i __b)
_mm256_stream_si256(void *__a, __m256i __b)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
@ -3583,7 +3587,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x double] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_pd(double *__a, __m256d __b)
_mm256_stream_pd(void *__a, __m256d __b)
{
typedef __v4df __v4df_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
@ -3604,7 +3608,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
/// \param __a
/// A 256-bit vector of [8 x float] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_ps(float *__p, __m256 __a)
_mm256_stream_ps(void *__p, __m256 __a)
{
typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);

View File

@ -19,18 +19,17 @@
to use it as a potentially faster version of BSF. */
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#define _tzcnt_u16(a) (__tzcnt_u16((a)))
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 16-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see _tzcnt_u16
static __inline__ unsigned short __RELAXED_FN_ATTRS
__tzcnt_u16(unsigned short __X)
{
@ -41,13 +40,30 @@ __tzcnt_u16(unsigned short __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// \code
/// unsigned short _tzcnt_u16(unsigned short __X);
/// \endcode
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 16-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see __tzcnt_u16
#define _tzcnt_u16 __tzcnt_u16
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see _mm_tzcnt_32
/// \see { _mm_tzcnt_32 _tzcnt_u32 }
static __inline__ unsigned int __RELAXED_FN_ATTRS
__tzcnt_u32(unsigned int __X)
{
@ -58,20 +74,35 @@ __tzcnt_u32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An 32-bit integer containing the number of trailing zero bits in
/// \returns A 32-bit integer containing the number of trailing zero bits in
/// the operand.
/// \see __tzcnt_u32
/// \see { __tzcnt_u32 _tzcnt_u32 }
static __inline__ int __RELAXED_FN_ATTRS
_mm_tzcnt_32(unsigned int __X)
{
return (int)__builtin_ia32_tzcnt_u32(__X);
}
#define _tzcnt_u32(a) (__tzcnt_u32((a)))
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned int _tzcnt_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see { _mm_tzcnt_32 __tzcnt_u32 }
#define _tzcnt_u32 __tzcnt_u32
#ifdef __x86_64__
@ -79,13 +110,13 @@ _mm_tzcnt_32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see _mm_tzcnt_64
/// \see { _mm_tzcnt_64 _tzcnt_u64 }
static __inline__ unsigned long long __RELAXED_FN_ATTRS
__tzcnt_u64(unsigned long long __X)
{
@ -96,20 +127,35 @@ __tzcnt_u64(unsigned long long __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An 64-bit integer containing the number of trailing zero bits in
/// the operand.
/// \see __tzcnt_u64
/// \see { __tzcnt_u64 _tzcnt_u64 }
static __inline__ long long __RELAXED_FN_ATTRS
_mm_tzcnt_64(unsigned long long __X)
{
return (long long)__builtin_ia32_tzcnt_u64(__X);
}
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned long long _tzcnt_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see { _mm_tzcnt_64 __tzcnt_u64
#define _tzcnt_u64 __tzcnt_u64
#endif /* __x86_64__ */
@ -121,21 +167,12 @@ _mm_tzcnt_64(unsigned long long __X)
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
#define _andn_u32(a, b) (__andn_u32((a), (b)))
/* _bextr_u32 != __bextr_u32 */
#define _blsi_u32(a) (__blsi_u32((a)))
#define _blsmsk_u32(a) (__blsmsk_u32((a)))
#define _blsr_u32(a) (__blsr_u32((a)))
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned integer containing one of the operands.
@ -143,19 +180,40 @@ _mm_tzcnt_64(unsigned long long __X)
/// An unsigned integer containing one of the operands.
/// \returns An unsigned integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see _andn_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__andn_u32(unsigned int __X, unsigned int __Y)
{
return ~__X & __Y;
}
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
/// \endcode
///
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned integer containing one of the operands.
/// \param __Y
/// An unsigned integer containing one of the operands.
/// \returns An unsigned integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see __andn_u32
#define _andn_u32 __andn_u32
/* AMD-specified, double-leading-underscore version of BEXTR */
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@ -178,7 +236,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@ -203,7 +261,7 @@ _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@ -224,33 +282,89 @@ _bextr2_u32(unsigned int __X, unsigned int __Y) {
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be cleared.
/// \returns An unsigned integer containing the result of clearing the bits from
/// the source operand.
/// \see _blsi_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsi_u32(unsigned int __X)
{
return __X & -__X;
}
/// Clears all bits in the source except for the least significant bit
/// containing a value of 1 and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned int _blsi_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be cleared.
/// \returns An unsigned integer containing the result of clearing the bits from
/// the source operand.
/// \see __blsi_u32
#define _blsi_u32 __blsi_u32
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned integer used to create the mask.
/// \returns An unsigned integer containing the newly created mask.
/// \see _blsmsk_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsmsk_u32(unsigned int __X)
{
return __X ^ (__X - 1);
}
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
/// \code
/// unsigned int _blsmsk_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned integer used to create the mask.
/// \returns An unsigned integer containing the newly created mask.
/// \see __blsmsk_u32
#define _blsmsk_u32 __blsmsk_u32
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned integer containing the operand to be cleared.
/// \returns An unsigned integer containing the result of clearing the source
/// operand.
/// \see _blsr_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsmsk_u32(unsigned int __X)
__blsr_u32(unsigned int __X)
{
return __X ^ (__X - 1);
return __X & (__X - 1);
}
/// Clears the least significant bit that is set to 1 in the source
@ -258,35 +372,27 @@ __blsmsk_u32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
/// \code
/// unsigned int _bls4_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned integer containing the operand to be cleared.
/// \returns An unsigned integer containing the result of clearing the source
/// operand.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsr_u32(unsigned int __X)
{
return __X & (__X - 1);
}
/// \see __blsr_u32
#define _blsr_u32 __blsr_u32
#ifdef __x86_64__
#define _andn_u64(a, b) (__andn_u64((a), (b)))
/* _bextr_u64 != __bextr_u64 */
#define _blsi_u64(a) (__blsi_u64((a)))
#define _blsmsk_u64(a) (__blsmsk_u64((a)))
#define _blsr_u64(a) (__blsr_u64((a)))
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing one of the operands.
@ -294,19 +400,41 @@ __blsr_u32(unsigned int __X)
/// An unsigned 64-bit integer containing one of the operands.
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see _andn_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__andn_u64 (unsigned long long __X, unsigned long long __Y)
{
return ~__X & __Y;
}
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned long long _andn_u64(unsigned long long __X,
/// unsigned long long __Y);
/// \endcode
///
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing one of the operands.
/// \param __Y
/// An unsigned 64-bit integer containing one of the operands.
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see __andn_u64
#define _andn_u64 __andn_u64
/* AMD-specified, double-leading-underscore version of BEXTR */
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@ -329,7 +457,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@ -354,7 +482,7 @@ _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@ -375,33 +503,89 @@ _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// bits from the source operand.
/// \see _blsi_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsi_u64(unsigned long long __X)
{
return __X & -__X;
}
/// Clears all bits in the source except for the least significant bit
/// containing a value of 1 and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned long long _blsi_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// bits from the source operand.
/// \see __blsi_u64
#define _blsi_u64 __blsi_u64
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned 64-bit integer used to create the mask.
/// \returns An unsigned 64-bit integer containing the newly created mask.
/// \see _blsmsk_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsmsk_u64(unsigned long long __X)
{
return __X ^ (__X - 1);
}
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
/// \code
/// unsigned long long _blsmsk_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned 64-bit integer used to create the mask.
/// \returns An unsigned 64-bit integer containing the newly created mask.
/// \see __blsmsk_u64
#define _blsmsk_u64 __blsmsk_u64
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing the operand to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// source operand.
/// \see _blsr_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsmsk_u64(unsigned long long __X)
__blsr_u64(unsigned long long __X)
{
return __X ^ (__X - 1);
return __X & (__X - 1);
}
/// Clears the least significant bit that is set to 1 in the source
@ -409,17 +593,18 @@ __blsmsk_u64(unsigned long long __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
/// \code
/// unsigned long long _blsr_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing the operand to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// source operand.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsr_u64(unsigned long long __X)
{
return __X & (__X - 1);
}
/// \see __blsr_u64
#define _blsr_u64 __blsr_u64
#endif /* __x86_64__ */

View File

@ -0,0 +1,9 @@
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
// `__attribute((__noinline__))`. In order to avoid compilation error,
// temporarily unset __noinline__ when we include affected libstdc++ header.
#pragma push_macro("__noinline__")
#undef __noinline__
#include_next "bits/basic_string.h"
#pragma pop_macro("__noinline__")

View File

@ -0,0 +1,9 @@
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
// `__attribute((__noinline__))`. In order to avoid compilation error,
// temporarily unset __noinline__ when we include affected libstdc++ header.
#pragma push_macro("__noinline__")
#undef __noinline__
#include_next "bits/basic_string.tcc"
#pragma pop_macro("__noinline__")

View File

@ -50,11 +50,11 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
__min_vector_width__(128)))
__attribute__((__always_inline__, __nodebug__, \
__target__("sse2,no-evex512"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_MMX \
__attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \
__min_vector_width__(64)))
__attribute__((__always_inline__, __nodebug__, \
__target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
/// Adds lower double-precision values in both operands and returns the
/// sum in the lower 64 bits of the result. The upper 64 bits of the result
@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A vector of [2 x double] containing the 64-bit values to be stored.
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
__m128d __a) {
__builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
}
@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A 128-bit integer vector containing the values to be stored.
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
__m128i __a) {
__builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
}
@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
/// A 32-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
_mm_stream_si32(int *__p, int __a) {
__builtin_ia32_movnti(__p, __a);
_mm_stream_si32(void *__p, int __a) {
__builtin_ia32_movnti((int *)__p, __a);
}
#ifdef __x86_64__
@ -4003,8 +4003,8 @@ static __inline__ void
/// A 64-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
_mm_stream_si64(long long *__p, long long __a) {
__builtin_ia32_movnti64(__p, __a);
_mm_stream_si64(void *__p, long long __a) {
__builtin_ia32_movnti64((long long *)__p, __a);
}
#endif

View File

@ -15,19 +15,36 @@
#define __GFNIINTRIN_H
/* Default attributes for simple form (no masking). */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("gfni,no-evex512"), __min_vector_width__(128)))
/* Default attributes for YMM unmasked form. */
#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS_Y \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx,gfni,no-evex512"), \
__min_vector_width__(256)))
/* Default attributes for ZMM unmasked forms. */
#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS_Z \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,evex512,gfni"), \
__min_vector_width__(512)))
/* Default attributes for ZMM masked forms. */
#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS_Z_MASK \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,evex512,gfni"), \
__min_vector_width__(512)))
/* Default attributes for VLX masked forms. */
#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS_VL128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_VL256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
__min_vector_width__(256)))
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \

File diff suppressed because it is too large Load Diff

View File

@ -291,11 +291,13 @@
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPID__)
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
///
/// \returns The 32-bit contents of the MSR.
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void) {
return __builtin_ia32_rdpid();
@ -488,6 +490,15 @@ _writegsbase_u64(unsigned long long __V)
* field inside of it.
*/
/// Load a 16-bit value from memory and swap its bytes.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the 16-bit value to load.
/// \returns The byte-swapped value.
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P) {
struct __loadu_i16 {
@ -496,6 +507,16 @@ _loadbe_i16(void const * __P) {
return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
}
/// Swap the bytes of a 16-bit value and store it to memory.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the memory for storing the swapped value.
/// \param __D
/// The 16-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P, short __D) {
struct __storeu_i16 {
@ -504,6 +525,15 @@ _storebe_i16(void * __P, short __D) {
((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
}
/// Load a 32-bit value from memory and swap its bytes.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the 32-bit value to load.
/// \returns The byte-swapped value.
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P) {
struct __loadu_i32 {
@ -512,6 +542,16 @@ _loadbe_i32(void const * __P) {
return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
}
/// Swap the bytes of a 32-bit value and store it to memory.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the memory for storing the swapped value.
/// \param __D
/// The 32-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P, int __D) {
struct __storeu_i32 {
@ -521,6 +561,15 @@ _storebe_i32(void * __P, int __D) {
}
#ifdef __x86_64__
/// Load a 64-bit value from memory and swap its bytes.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the 64-bit value to load.
/// \returns The byte-swapped value.
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P) {
struct __loadu_i64 {
@ -529,6 +578,16 @@ _loadbe_i64(void const * __P) {
return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
}
/// Swap the bytes of a 64-bit value and store it to memory.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the memory for storing the swapped value.
/// \param __D
/// The 64-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P, long long __D) {
struct __storeu_i64 {
@ -578,9 +637,13 @@ _storebe_i64(void * __P, long long __D) {
#include <cetintrin.h>
#endif
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
* whereas others are also available at all times. */
/* Intrinsics inside adcintrin.h are available at all times. */
#include <adcintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__ADX__)
#include <adxintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDSEED__)

16
lib/include/intrin.h vendored
View File

@ -572,6 +572,22 @@ unsigned char __readx18byte(unsigned long offset);
unsigned short __readx18word(unsigned long offset);
unsigned long __readx18dword(unsigned long offset);
unsigned __int64 __readx18qword(unsigned long offset);
double _CopyDoubleFromInt64(__int64);
float _CopyFloatFromInt32(__int32);
__int32 _CopyInt32FromFloat(float);
__int64 _CopyInt64FromDouble(double);
unsigned int _CountLeadingOnes(unsigned long);
unsigned int _CountLeadingOnes64(unsigned __int64);
unsigned int _CountLeadingSigns(long);
unsigned int _CountLeadingSigns64(__int64);
unsigned int _CountLeadingZeros(unsigned long);
unsigned int _CountLeadingZeros64(unsigned _int64);
unsigned int _CountOneBits(unsigned long);
unsigned int _CountOneBits64(unsigned __int64);
void __cdecl __prefetch(void *);
#endif
/*----------------------------------------------------------------------------*\

View File

@ -156,7 +156,7 @@ extern __inline unsigned char
return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
}
extern __inline unsigned char
extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__iocsrrd_h(unsigned int _1) {
return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
@ -228,6 +228,18 @@ extern __inline void
((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2)))
#endif
#define __frecipe_s(/*float*/ _1) \
(float)__builtin_loongarch_frecipe_s((float)_1)
#define __frecipe_d(/*double*/ _1) \
(double)__builtin_loongarch_frecipe_d((double)_1)
#define __frsqrte_s(/*float*/ _1) \
(float)__builtin_loongarch_frsqrte_s((float)_1)
#define __frsqrte_d(/*double*/ _1) \
(double)__builtin_loongarch_frsqrte_d((double)_1)
#ifdef __cplusplus
}
#endif

3884
lib/include/lasxintrin.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -66,10 +66,8 @@
#define CHAR_BIT __CHAR_BIT__
/* C2x 5.2.4.2.1 */
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
/* C23 5.2.4.2.1 */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define BOOL_WIDTH __BOOL_WIDTH__
#define CHAR_WIDTH CHAR_BIT
#define SCHAR_WIDTH CHAR_BIT

34
lib/include/llvm_libc_wrappers/assert.h vendored Normal file
View File

@ -0,0 +1,34 @@
//===-- Wrapper for C standard assert.h declarations on the GPU ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
#error "This file is for GPU offloading compilation only"
#endif
#include_next <assert.h>
#if __has_include(<llvm-libc-decls/assert.h>)
#if defined(__HIP__) || defined(__CUDA__)
#define __LIBC_ATTRS __attribute__((device))
#endif
#pragma omp begin declare target
#include <llvm-libc-decls/assert.h>
#pragma omp end declare target
#undef __LIBC_ATTRS
#endif
#endif // __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__

View File

@ -13,8 +13,19 @@
#error "This file is for GPU offloading compilation only"
#endif
// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is
// necessary to prevent it from doing that and remapping our implementation.
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
#pragma push_macro("__USE_EXTERN_INLINES")
#undef __USE_EXTERN_INLINES
#endif
#include_next <ctype.h>
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
#pragma pop_macro("__USE_EXTERN_INLINES")
#endif
#if __has_include(<llvm-libc-decls/ctype.h>)
#if defined(__HIP__) || defined(__CUDA__)
@ -26,6 +37,7 @@
#pragma push_macro("isalnum")
#pragma push_macro("isalpha")
#pragma push_macro("isascii")
#pragma push_macro("isblank")
#pragma push_macro("iscntrl")
#pragma push_macro("isdigit")
@ -36,11 +48,13 @@
#pragma push_macro("isspace")
#pragma push_macro("isupper")
#pragma push_macro("isxdigit")
#pragma push_macro("toascii")
#pragma push_macro("tolower")
#pragma push_macro("toupper")
#undef isalnum
#undef isalpha
#undef isascii
#undef iscntrl
#undef isdigit
#undef islower
@ -51,6 +65,7 @@
#undef isupper
#undef isblank
#undef isxdigit
#undef toascii
#undef tolower
#undef toupper
@ -64,6 +79,7 @@
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
#pragma pop_macro("isalnum")
#pragma pop_macro("isalpha")
#pragma pop_macro("isascii")
#pragma pop_macro("isblank")
#pragma pop_macro("iscntrl")
#pragma pop_macro("isdigit")
@ -74,6 +90,7 @@
#pragma pop_macro("isspace")
#pragma pop_macro("isupper")
#pragma pop_macro("isxdigit")
#pragma pop_macro("toascii")
#pragma pop_macro("tolower")
#pragma pop_macro("toupper")
#endif

View File

@ -6,21 +6,58 @@
//
//===----------------------------------------------------------------------===//
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
#error "This file is for GPU offloading compilation only"
#endif
#include_next <stdio.h>
// In some old versions of glibc, other standard headers sometimes define
// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h
// to produce special definitions. Future includes of stdio.h when those
// special macros are undefined are expected to produce the normal definitions
// from stdio.h.
//
// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__)
// unconditionally to the above include_next. Otherwise, after an occurrence of
// the first glibc stdio.h use case described above, the include_next would be
// skipped for remaining includes of stdio.h, leaving required symbols
// undefined.
//
// We make the following assumptions to handle all use cases:
//
// 1. If the above include_next produces special glibc definitions, then (a) it
// does not produce the normal definitions that we must intercept below, (b)
// the current file was included from a glibc header that already defined
// __GLIBC__ (usually by including glibc's <features.h>), and (c) the above
// include_next does not define _STDIO_H. In that case, we skip the rest of
// the current file and don't guard against future includes.
// 2. If the above include_next produces the normal stdio.h definitions, then
// either (a) __GLIBC__ is not defined because C headers are from some other
// libc implementation or (b) the above include_next defines _STDIO_H to
// prevent the above include_next from having any effect in the future.
#if !defined(__GLIBC__) || defined(_STDIO_H)
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#if __has_include(<llvm-libc-decls/stdio.h>)
#if defined(__HIP__) || defined(__CUDA__)
#define __LIBC_ATTRS __attribute__((device))
#endif
// Some headers provide these as macros. Temporarily undefine them so they do
// not conflict with any definitions for the GPU.
#pragma push_macro("stdout")
#pragma push_macro("stdin")
#pragma push_macro("stderr")
#undef stdout
#undef stderr
#undef stdin
#pragma omp begin declare target
#include <llvm-libc-decls/stdio.h>
@ -29,6 +66,15 @@
#undef __LIBC_ATTRS
// Restore the original macros when compiling on the host.
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
#pragma pop_macro("stdout")
#pragma pop_macro("stderr")
#pragma pop_macro("stdin")
#endif
#endif
#endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#endif

View File

@ -23,8 +23,11 @@
#pragma omp begin declare target
// The LLVM C library uses this type so we forward declare it.
// The LLVM C library uses these named types so we forward declare them.
typedef void (*__atexithandler_t)(void);
typedef int (*__bsearchcompare_t)(const void *, const void *);
typedef int (*__qsortcompare_t)(const void *, const void *);
typedef int (*__qsortrcompare_t)(const void *, const void *, void *);
// Enforce ABI compatibility with the structs used by the LLVM C library.
_Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!");

View File

@ -13,9 +13,6 @@
#error "This file is for GPU offloading compilation only"
#endif
// FIXME: The GNU headers provide C++ standard compliant headers when in C++
// mode and the LLVM libc does not. We cannot enable memchr, strchr, strchrnul,
// strpbrk, strrchr, strstr, or strcasestr until this is addressed.
#include_next <string.h>
#if __has_include(<llvm-libc-decls/string.h>)
@ -26,8 +23,70 @@
#pragma omp begin declare target
// The GNU headers provide C++ standard compliant headers when in C++ mode and
// the LLVM libc does not. We need to manually provide the definitions using the
// same prototypes.
#if defined(__cplusplus) && defined(__GLIBC__) && \
defined(__CORRECT_ISO_CPP_STRING_H_PROTO)
#ifndef __LIBC_ATTRS
#define __LIBC_ATTRS
#endif
extern "C" {
void *memccpy(void *__restrict, const void *__restrict, int,
size_t) __LIBC_ATTRS;
int memcmp(const void *, const void *, size_t) __LIBC_ATTRS;
void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS;
void *memmove(void *, const void *, size_t) __LIBC_ATTRS;
void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
void *memset(void *, int, size_t) __LIBC_ATTRS;
char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS;
int strcmp(const char *, const char *) __LIBC_ATTRS;
int strcoll(const char *, const char *) __LIBC_ATTRS;
char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
size_t strcspn(const char *, const char *) __LIBC_ATTRS;
char *strdup(const char *) __LIBC_ATTRS;
size_t strlen(const char *) __LIBC_ATTRS;
char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
int strncmp(const char *, const char *, size_t) __LIBC_ATTRS;
char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
char *strndup(const char *, size_t) __LIBC_ATTRS;
size_t strnlen(const char *, size_t) __LIBC_ATTRS;
size_t strspn(const char *, const char *) __LIBC_ATTRS;
char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS;
char *strtok_r(char *__restrict, const char *__restrict,
char **__restrict) __LIBC_ATTRS;
size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
}
extern "C++" {
char *strstr(char *, const char *) noexcept __LIBC_ATTRS;
const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS;
char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS;
const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS;
char *strrchr(char *, int) noexcept __LIBC_ATTRS;
const char *strrchr(const char *, int) noexcept __LIBC_ATTRS;
char *strchr(char *, int) noexcept __LIBC_ATTRS;
const char *strchr(const char *, int) noexcept __LIBC_ATTRS;
char *strchrnul(char *, int) noexcept __LIBC_ATTRS;
const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS;
char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS;
const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS;
void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
}
#else
#include <llvm-libc-decls/string.h>
#endif
#pragma omp end declare target
#undef __LIBC_ATTRS

34
lib/include/llvm_libc_wrappers/time.h vendored Normal file
View File

@ -0,0 +1,34 @@
//===-- Wrapper for C standard time.h declarations on the GPU -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
#error "This file is for GPU offloading compilation only"
#endif
#include_next <time.h>
#if __has_include(<llvm-libc-decls/time.h>)
#if defined(__HIP__) || defined(__CUDA__)
#define __LIBC_ATTRS __attribute__((device))
#endif
#pragma omp begin declare target
_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!");
#include <llvm-libc-decls/time.h>
#pragma omp end declare target
#endif
#endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__

3750
lib/include/lsxintrin.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,9 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
__min_vector_width__(64)))
/// Clears the MMX state by setting the state of the x87 stack registers
/// to empty.
@ -31,9 +33,9 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
///
/// This intrinsic corresponds to the <c> EMMS </c> instruction.
///
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
_mm_empty(void)
{
static __inline__ void __attribute__((__always_inline__, __nodebug__,
__target__("mmx,no-evex512")))
_mm_empty(void) {
__builtin_ia32_emms();
}

View File

@ -153,10 +153,163 @@ module _Builtin_intrinsics [system] [extern_c] {
}
}
module _Builtin_stddef_max_align_t [system] [extern_c] {
header "__stddef_max_align_t.h"
// Start -fbuiltin-headers-in-system-modules affected modules
// The following modules all ignore their headers when
// -fbuiltin-headers-in-system-modules is passed, and many of
// those headers join system modules when present.
// e.g. if -fbuiltin-headers-in-system-modules is passed, then
// float.h will not be in the _Builtin_float module (that module
// will be empty). If there is a system module that declares
// `header "float.h"`, then the builtin float.h will join
// that module. The system float.h (if present) will be treated
// as a textual header in the sytem module.
module _Builtin_float [system] {
header "float.h"
export *
}
module _Builtin_inttypes [system] {
header "inttypes.h"
export *
}
module _Builtin_iso646 [system] {
header "iso646.h"
export *
}
module _Builtin_limits [system] {
header "limits.h"
export *
}
module _Builtin_stdalign [system] {
header "stdalign.h"
export *
}
module _Builtin_stdarg [system] {
textual header "stdarg.h"
explicit module __gnuc_va_list {
header "__stdarg___gnuc_va_list.h"
export *
}
explicit module __va_copy {
header "__stdarg___va_copy.h"
export *
}
explicit module va_arg {
header "__stdarg_va_arg.h"
export *
}
explicit module va_copy {
header "__stdarg_va_copy.h"
export *
}
explicit module va_list {
header "__stdarg_va_list.h"
export *
}
}
module _Builtin_stdatomic [system] {
header "stdatomic.h"
export *
}
module _Builtin_stdbool [system] {
header "stdbool.h"
export *
}
module _Builtin_stddef [system] {
textual header "stddef.h"
// __stddef_max_align_t.h is always in this module, even if
// -fbuiltin-headers-in-system-modules is passed.
explicit module max_align_t {
header "__stddef_max_align_t.h"
export *
}
explicit module null {
header "__stddef_null.h"
export *
}
explicit module nullptr_t {
header "__stddef_nullptr_t.h"
export *
}
explicit module offsetof {
header "__stddef_offsetof.h"
export *
}
explicit module ptrdiff_t {
header "__stddef_ptrdiff_t.h"
export *
}
explicit module rsize_t {
header "__stddef_rsize_t.h"
export *
}
explicit module size_t {
header "__stddef_size_t.h"
export *
}
explicit module unreachable {
header "__stddef_unreachable.h"
export *
}
explicit module wchar_t {
header "__stddef_wchar_t.h"
export *
}
}
// wint_t is provided by <wchar.h> and not <stddef.h>. It's here
// for compatibility, but must be explicitly requested. Therefore
// __stddef_wint_t.h is not part of _Builtin_stddef. It is always in
// this module even if -fbuiltin-headers-in-system-modules is passed.
module _Builtin_stddef_wint_t [system] {
header "__stddef_wint_t.h"
export *
}
module _Builtin_stdint [system] {
header "stdint.h"
export *
}
module _Builtin_stdnoreturn [system] {
header "stdnoreturn.h"
export *
}
module _Builtin_tgmath [system] {
header "tgmath.h"
export *
}
module _Builtin_unwind [system] {
header "unwind.h"
export *
}
// End -fbuiltin-headers-in-system-modules affected modules
module opencl_c {
requires opencl
header "opencl-c.h"

View File

@ -45,6 +45,7 @@
#define __opencl_c_ext_fp32_local_atomic_add 1
#define __opencl_c_ext_fp32_global_atomic_min_max 1
#define __opencl_c_ext_fp32_local_atomic_min_max 1
#define __opencl_c_ext_image_raw10_raw12 1
#endif // defined(__SPIR__) || defined(__SPIRV__)
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
@ -477,6 +478,10 @@ typedef enum memory_order
#if __OPENCL_C_VERSION__ >= CL_VERSION_3_0
#define CLK_UNORM_INT_101010_2 0x10E0
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0
#ifdef __opencl_c_ext_image_raw10_raw12
#define CLK_UNSIGNED_INT_RAW10_EXT 0x10E3
#define CLK_UNSIGNED_INT_RAW12_EXT 0x10E4
#endif // __opencl_c_ext_image_raw10_raw12
// Channel order, numbering must be aligned with cl_channel_order in cl.h
//

View File

@ -1,4 +1,4 @@
/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -===
/*===-- __clang_openmp_device_functions.h - OpenMP math declares -*- c++ -*-===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.

View File

@ -18,7 +18,8 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
__attribute__((__always_inline__, __nodebug__, \
__target__("sse3,no-evex512"), __min_vector_width__(128)))
/// Loads data from an unaligned memory location to elements in a 128-bit
/// vector.

26
lib/include/ppc_wrappers/nmmintrin.h vendored Normal file
View File

@ -0,0 +1,26 @@
/*===---- nmmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef NO_WARN_X86_INTRINSICS
/* This header is distributed to simplify porting x86_64 code that
makes explicit use of Intel intrinsics to powerpc64le.
It is the user's responsibility to determine if the results are
acceptable and make additional changes as necessary.
Note that much code that uses Intel intrinsics can be rewritten in
standard C or GNU C extensions, which are more portable and better
optimized across multiple targets. */
#endif
#ifndef NMMINTRIN_H_
#define NMMINTRIN_H_
/* We just include SSE4.1 header file. */
#include <smmintrin.h>
#endif /* NMMINTRIN_H_ */

View File

@ -14,7 +14,7 @@
#ifndef NO_WARN_X86_INTRINSICS
/* This header is distributed to simplify porting x86_64 code that
makes explicit use of Intel intrinsics to powerp64/powerpc64le.
makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
It is the user's responsibility to determine if the results are
acceptable and make additional changes as necessary.
@ -68,10 +68,10 @@ extern __inline __m128d
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
#else
__fpscr_save.__fr = __builtin_mffs();
__fpscr_save.__fr = __builtin_ppc_mffs();
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
__fpscr_save.__fpscr &= ~0xf8;
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
#endif
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
@ -83,10 +83,15 @@ extern __inline __m128d
switch (__rounding) {
case _MM_FROUND_TO_NEAREST_INT:
__fpscr_save.__fr = __builtin_mffsl();
#ifdef _ARCH_PWR9
__fpscr_save.__fr = __builtin_ppc_mffsl();
#else
__fpscr_save.__fr = __builtin_ppc_mffs();
__fpscr_save.__fpscr &= 0x70007f0ffL;
#endif
__attribute__((fallthrough));
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
__builtin_set_fpscr_rn(0b00);
__builtin_ppc_set_fpscr_rn(0b00);
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
a read/use of the variable before the FPSCR is modified, above.
@ -102,7 +107,7 @@ extern __inline __m128d
This can be removed if and when GCC PR102783 is fixed.
*/
__asm__("" : : "wa"(__r));
__builtin_set_fpscr_rn(__fpscr_save.__fpscr);
__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
break;
case _MM_FROUND_TO_NEG_INF:
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
@ -128,9 +133,14 @@ extern __inline __m128d
*/
__asm__("" : : "wa"(__r));
/* Restore enabled exceptions. */
__fpscr_save.__fr = __builtin_mffsl();
#ifdef _ARCH_PWR9
__fpscr_save.__fr = __builtin_ppc_mffsl();
#else
__fpscr_save.__fr = __builtin_ppc_mffs();
__fpscr_save.__fpscr &= 0x70007f0ffL;
#endif
__fpscr_save.__fpscr |= __enables_save.__fpscr;
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
}
return (__m128d)__r;
}
@ -159,10 +169,10 @@ extern __inline __m128
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
#else
__fpscr_save.__fr = __builtin_mffs();
__fpscr_save.__fr = __builtin_ppc_mffs();
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
__fpscr_save.__fpscr &= ~0xf8;
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
#endif
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
@ -174,10 +184,15 @@ extern __inline __m128
switch (__rounding) {
case _MM_FROUND_TO_NEAREST_INT:
__fpscr_save.__fr = __builtin_mffsl();
#ifdef _ARCH_PWR9
__fpscr_save.__fr = __builtin_ppc_mffsl();
#else
__fpscr_save.__fr = __builtin_ppc_mffs();
__fpscr_save.__fpscr &= 0x70007f0ffL;
#endif
__attribute__((fallthrough));
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
__builtin_set_fpscr_rn(0b00);
__builtin_ppc_set_fpscr_rn(0b00);
/* Insert an artificial "read/write" reference to the variable
read below, to ensure the compiler does not schedule
a read/use of the variable before the FPSCR is modified, above.
@ -193,7 +208,7 @@ extern __inline __m128
This can be removed if and when GCC PR102783 is fixed.
*/
__asm__("" : : "wa"(__r));
__builtin_set_fpscr_rn(__fpscr_save.__fpscr);
__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
break;
case _MM_FROUND_TO_NEG_INF:
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
@ -219,9 +234,14 @@ extern __inline __m128
*/
__asm__("" : : "wa"(__r));
/* Restore enabled exceptions. */
__fpscr_save.__fr = __builtin_mffsl();
#ifdef _ARCH_PWR9
__fpscr_save.__fr = __builtin_ppc_mffsl();
#else
__fpscr_save.__fr = __builtin_ppc_mffs();
__fpscr_save.__fpscr &= 0x70007f0ffL;
#endif
__fpscr_save.__fpscr |= __enables_save.__fpscr;
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
}
return (__m128)__r;
}

195
lib/include/riscv_bitmanip.h vendored Normal file
View File

@ -0,0 +1,195 @@
/*===---- riscv_bitmanip.h - RISC-V Zb* intrinsics --------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __RISCV_BITMANIP_H
#define __RISCV_BITMANIP_H
#include <stdint.h>
#if defined(__cplusplus)
extern "C" {
#endif
#if defined(__riscv_zbb)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_orc_b_32(uint32_t __x) {
return __builtin_riscv_orc_b_32(__x);
}
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
__riscv_clz_32(uint32_t __x) {
return __builtin_riscv_clz_32(__x);
}
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
__riscv_ctz_32(uint32_t __x) {
return __builtin_riscv_ctz_32(__x);
}
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
__riscv_cpop_32(uint32_t __x) {
return __builtin_popcount(__x);
}
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_orc_b_64(uint64_t __x) {
return __builtin_riscv_orc_b_64(__x);
}
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
__riscv_clz_64(uint64_t __x) {
return __builtin_riscv_clz_64(__x);
}
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
__riscv_ctz_64(uint64_t __x) {
return __builtin_riscv_ctz_64(__x);
}
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
__riscv_cpop_64(uint64_t __x) {
return __builtin_popcountll(__x);
}
#endif
#endif // defined(__riscv_zbb)
#if defined(__riscv_zbb) || defined(__riscv_zbkb)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_rev8_32(uint32_t __x) {
return __builtin_bswap32(__x);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_rol_32(uint32_t __x, uint32_t __y) {
return __builtin_rotateleft32(__x, __y);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_ror_32(uint32_t __x, uint32_t __y) {
return __builtin_rotateright32(__x, __y);
}
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_rev8_64(uint64_t __x) {
return __builtin_bswap64(__x);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_rol_64(uint64_t __x, uint32_t __y) {
return __builtin_rotateleft64(__x, __y);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_ror_64(uint64_t __x, uint32_t __y) {
return __builtin_rotateright64(__x, __y);
}
#endif
#endif // defined(__riscv_zbb) || defined(__riscv_zbkb)
#if defined(__riscv_zbkb)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_brev8_32(uint32_t __x) {
return __builtin_riscv_brev8_32(__x);
}
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_brev8_64(uint64_t __x) {
return __builtin_riscv_brev8_64(__x);
}
#endif
#if __riscv_xlen == 32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_unzip_32(uint32_t __x) {
return __builtin_riscv_unzip_32(__x);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_zip_32(uint32_t __x) {
return __builtin_riscv_zip_32(__x);
}
#endif
#endif // defined(__riscv_zbkb)
#if defined(__riscv_zbc)
#if __riscv_xlen == 32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_clmulr_32(uint32_t __x, uint32_t __y) {
return __builtin_riscv_clmulr_32(__x, __y);
}
#endif
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_clmulr_64(uint64_t __x, uint64_t __y) {
return __builtin_riscv_clmulr_64(__x, __y);
}
#endif
#endif // defined(__riscv_zbc)
#if defined(__riscv_zbkc) || defined(__riscv_zbc)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_clmul_32(uint32_t __x, uint32_t __y) {
return __builtin_riscv_clmul_32(__x, __y);
}
#if __riscv_xlen == 32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_clmulh_32(uint32_t __x, uint32_t __y) {
return __builtin_riscv_clmulh_32(__x, __y);
}
#endif
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_clmul_64(uint64_t __x, uint64_t __y) {
return __builtin_riscv_clmul_64(__x, __y);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_clmulh_64(uint64_t __x, uint64_t __y) {
return __builtin_riscv_clmulh_64(__x, __y);
}
#endif
#endif // defined(__riscv_zbkc) || defined(__riscv_zbc)
#if defined(__riscv_zbkx)
#if __riscv_xlen == 32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_xperm4_32(uint32_t __x, uint32_t __y) {
return __builtin_riscv_xperm4_32(__x, __y);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_xperm8_32(uint32_t __x, uint32_t __y) {
return __builtin_riscv_xperm8_32(__x, __y);
}
#endif
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_xperm4_64(uint64_t __x, uint64_t __y) {
return __builtin_riscv_xperm4_64(__x, __y);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_xperm8_64(uint64_t __x, uint64_t __y) {
return __builtin_riscv_xperm8_64(__x, __y);
}
#endif
#endif // defined(__riscv_zbkx)
#if defined(__cplusplus)
}
#endif
#endif

170
lib/include/riscv_crypto.h vendored Normal file
View File

@ -0,0 +1,170 @@
/*===---- riscv_crypto.h - RISC-V Zk* intrinsics ---------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __RISCV_CRYPTO_H
#define __RISCV_CRYPTO_H
#include <stdint.h>
#if defined(__cplusplus)
extern "C" {
#endif
#if defined(__riscv_zknd)
#if __riscv_xlen == 32
#define __riscv_aes32dsi(x, y, bs) __builtin_riscv_aes32dsi(x, y, bs)
#define __riscv_aes32dsmi(x, y, bs) __builtin_riscv_aes32dsmi(x, y, bs)
#endif
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_aes64ds(uint64_t __x, uint64_t __y) {
return __builtin_riscv_aes64ds(__x, __y);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_aes64dsm(uint64_t __x, uint64_t __y) {
return __builtin_riscv_aes64dsm(__x, __y);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_aes64im(uint64_t __x) {
return __builtin_riscv_aes64im(__x);
}
#endif
#endif // defined(__riscv_zknd)
#if defined(__riscv_zkne)
#if __riscv_xlen == 32
#define __riscv_aes32esi(x, y, bs) __builtin_riscv_aes32esi(x, y, bs)
#define __riscv_aes32esmi(x, y, bs) __builtin_riscv_aes32esmi(x, y, bs)
#endif
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_aes64es(uint64_t __x, uint64_t __y) {
return __builtin_riscv_aes64es(__x, __y);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_aes64esm(uint64_t __x, uint64_t __y) {
return __builtin_riscv_aes64esm(__x, __y);
}
#endif
#endif // defined(__riscv_zkne)
#if defined(__riscv_zknd) || defined(__riscv_zkne)
#if __riscv_xlen == 64
#define __riscv_aes64ks1i(x, rnum) __builtin_riscv_aes64ks1i(x, rnum)
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_aes64ks2(uint64_t __x, uint64_t __y) {
return __builtin_riscv_aes64ks2(__x, __y);
}
#endif
#endif // defined(__riscv_zknd) || defined(__riscv_zkne)
#if defined(__riscv_zknh)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha256sig0(uint32_t __x) {
return __builtin_riscv_sha256sig0(__x);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha256sig1(uint32_t __x) {
return __builtin_riscv_sha256sig1(__x);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha256sum0(uint32_t __x) {
return __builtin_riscv_sha256sum0(__x);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha256sum1(uint32_t __x) {
return __builtin_riscv_sha256sum1(__x);
}
#if __riscv_xlen == 32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sig0h(uint32_t __x, uint32_t __y) {
return __builtin_riscv_sha512sig0h(__x, __y);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sig0l(uint32_t __x, uint32_t __y) {
return __builtin_riscv_sha512sig0l(__x, __y);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sig1h(uint32_t __x, uint32_t __y) {
return __builtin_riscv_sha512sig1h(__x, __y);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sig1l(uint32_t __x, uint32_t __y) {
return __builtin_riscv_sha512sig1l(__x, __y);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sum0r(uint32_t __x, uint32_t __y) {
return __builtin_riscv_sha512sum0r(__x, __y);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sum1r(uint32_t __x, uint32_t __y) {
return __builtin_riscv_sha512sum1r(__x, __y);
}
#endif
#if __riscv_xlen == 64
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sig0(uint64_t __x) {
return __builtin_riscv_sha512sig0(__x);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sig1(uint64_t __x) {
return __builtin_riscv_sha512sig1(__x);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sum0(uint64_t __x) {
return __builtin_riscv_sha512sum0(__x);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__riscv_sha512sum1(uint64_t __x) {
return __builtin_riscv_sha512sum1(__x);
}
#endif
#endif // defined(__riscv_zknh)
#if defined(__riscv_zksh)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sm3p0(uint32_t __x) {
return __builtin_riscv_sm3p0(__x);
}
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__riscv_sm3p1(uint32_t __x) {
return __builtin_riscv_sm3p1(__x);
}
#endif // defined(__riscv_zksh)
#if defined(__riscv_zksed)
#define __riscv_sm4ed(x, y, bs) __builtin_riscv_sm4ed(x, y, bs);
#define __riscv_sm4ks(x, y, bs) __builtin_riscv_sm4ks(x, y, bs);
#endif // defined(__riscv_zksed)
#if defined(__cplusplus)
}
#endif
#endif

View File

@ -21,8 +21,6 @@ enum {
__RISCV_NTLH_ALL
};
#define __riscv_ntl_load(PTR, DOMAIN) __builtin_riscv_ntl_load((PTR), (DOMAIN))
#define __riscv_ntl_store(PTR, VAL, DOMAIN) \
__builtin_riscv_ntl_store((PTR), (VAL), (DOMAIN))
#define __riscv_ntl_load __builtin_riscv_ntl_load
#define __riscv_ntl_store __builtin_riscv_ntl_store
#endif

View File

@ -392,6 +392,37 @@ typedef __rvv_float64m2x4_t vfloat64m2x4_t;
typedef __rvv_float64m4_t vfloat64m4_t;
typedef __rvv_float64m4x2_t vfloat64m4x2_t;
typedef __rvv_float64m8_t vfloat64m8_t;
typedef __rvv_bfloat16mf4_t vbfloat16mf4_t;
typedef __rvv_bfloat16mf4x2_t vbfloat16mf4x2_t;
typedef __rvv_bfloat16mf4x3_t vbfloat16mf4x3_t;
typedef __rvv_bfloat16mf4x4_t vbfloat16mf4x4_t;
typedef __rvv_bfloat16mf4x5_t vbfloat16mf4x5_t;
typedef __rvv_bfloat16mf4x6_t vbfloat16mf4x6_t;
typedef __rvv_bfloat16mf4x7_t vbfloat16mf4x7_t;
typedef __rvv_bfloat16mf4x8_t vbfloat16mf4x8_t;
typedef __rvv_bfloat16mf2_t vbfloat16mf2_t;
typedef __rvv_bfloat16mf2x2_t vbfloat16mf2x2_t;
typedef __rvv_bfloat16mf2x3_t vbfloat16mf2x3_t;
typedef __rvv_bfloat16mf2x4_t vbfloat16mf2x4_t;
typedef __rvv_bfloat16mf2x5_t vbfloat16mf2x5_t;
typedef __rvv_bfloat16mf2x6_t vbfloat16mf2x6_t;
typedef __rvv_bfloat16mf2x7_t vbfloat16mf2x7_t;
typedef __rvv_bfloat16mf2x8_t vbfloat16mf2x8_t;
typedef __rvv_bfloat16m1_t vbfloat16m1_t;
typedef __rvv_bfloat16m1x2_t vbfloat16m1x2_t;
typedef __rvv_bfloat16m1x3_t vbfloat16m1x3_t;
typedef __rvv_bfloat16m1x4_t vbfloat16m1x4_t;
typedef __rvv_bfloat16m1x5_t vbfloat16m1x5_t;
typedef __rvv_bfloat16m1x6_t vbfloat16m1x6_t;
typedef __rvv_bfloat16m1x7_t vbfloat16m1x7_t;
typedef __rvv_bfloat16m1x8_t vbfloat16m1x8_t;
typedef __rvv_bfloat16m2_t vbfloat16m2_t;
typedef __rvv_bfloat16m2x2_t vbfloat16m2x2_t;
typedef __rvv_bfloat16m2x3_t vbfloat16m2x3_t;
typedef __rvv_bfloat16m2x4_t vbfloat16m2x4_t;
typedef __rvv_bfloat16m4_t vbfloat16m4_t;
typedef __rvv_bfloat16m4x2_t vbfloat16m4x2_t;
typedef __rvv_bfloat16m8_t vbfloat16m8_t;
#define __riscv_v_intrinsic_overloading 1
#ifdef __cplusplus

View File

@ -18,8 +18,8 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), \
__min_vector_width__(128)))
__attribute__((__always_inline__, __nodebug__, \
__target__("sse4.1,no-evex512"), __min_vector_width__(128)))
/* SSE4 Rounding macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
/// \returns A 128-bit integer vector containing the data stored at the
/// specified memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_stream_load_si128(__m128i const *__V) {
_mm_stream_load_si128(const void *__V) {
return (__m128i)__builtin_nontemporal_load((const __v2di *)__V);
}

View File

@ -10,10 +10,8 @@
#ifndef __STDALIGN_H
#define __STDALIGN_H
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__cplusplus) || \
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L)
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L)
#ifndef __cplusplus
#define alignas _Alignas
#define alignof _Alignof

90
lib/include/stdarg.h vendored
View File

@ -7,45 +7,73 @@
*===-----------------------------------------------------------------------===
*/
#ifndef __STDARG_H
/*
* This header is designed to be included multiple times. If any of the __need_
* macros are defined, then only that subset of interfaces are provided. This
* can be useful for POSIX headers that need to not expose all of stdarg.h, but
* need to use some of its interfaces. Otherwise this header provides all of
* the expected interfaces.
*
* When clang modules are enabled, this header is a textual header. It ignores
* its header guard so that multiple submodules can export its interfaces.
* Take module SM with submodules A and B, whose headers both include stdarg.h
* When SM.A builds, __STDARG_H will be defined. When SM.B builds, the
* definition from SM.A will leak when building without local submodule
* visibility. stdarg.h wouldn't include any of its implementation headers, and
* SM.B wouldn't import any of the stdarg modules, and SM.B's `export *`
* wouldn't export any stdarg interfaces as expected. However, since stdarg.h
* ignores its header guard when building with modules, it all works as
* expected.
*
* When clang modules are not enabled, the header guards can function in the
* normal simple fashion.
*/
#if !defined(__STDARG_H) || __has_feature(modules) || \
defined(__need___va_list) || defined(__need_va_list) || \
defined(__need_va_arg) || defined(__need___va_copy) || \
defined(__need_va_copy)
#ifndef __GNUC_VA_LIST
#define __GNUC_VA_LIST
typedef __builtin_va_list __gnuc_va_list;
#endif
#ifdef __need___va_list
#undef __need___va_list
#else
#if !defined(__need___va_list) && !defined(__need_va_list) && \
!defined(__need_va_arg) && !defined(__need___va_copy) && \
!defined(__need_va_copy)
#define __STDARG_H
#ifndef _VA_LIST
typedef __builtin_va_list va_list;
#define _VA_LIST
#endif
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
/* C2x does not require the second parameter for va_start. */
#define va_start(ap, ...) __builtin_va_start(ap, 0)
#else
/* Versions before C2x do require the second parameter. */
#define va_start(ap, param) __builtin_va_start(ap, param)
#endif
#define va_end(ap) __builtin_va_end(ap)
#define va_arg(ap, type) __builtin_va_arg(ap, type)
#define __need___va_list
#define __need_va_list
#define __need_va_arg
#define __need___va_copy
/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode
* or -ansi is not specified, since it was not part of C90.
*/
#define __va_copy(d,s) __builtin_va_copy(d,s)
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \
!defined(__STRICT_ANSI__)
#define va_copy(dest, src) __builtin_va_copy(dest, src)
#define __need_va_copy
#endif
#endif
#endif /* __STDARG_H */
#ifdef __need___va_list
#include <__stdarg___gnuc_va_list.h>
#undef __need___va_list
#endif /* defined(__need___va_list) */
#endif /* not __STDARG_H */
#ifdef __need_va_list
#include <__stdarg_va_list.h>
#undef __need_va_list
#endif /* defined(__need_va_list) */
#ifdef __need_va_arg
#include <__stdarg_va_arg.h>
#undef __need_va_arg
#endif /* defined(__need_va_arg) */
#ifdef __need___va_copy
#include <__stdarg___va_copy.h>
#undef __need___va_copy
#endif /* defined(__need___va_copy) */
#ifdef __need_va_copy
#include <__stdarg_va_copy.h>
#undef __need_va_copy
#endif /* defined(__need_va_copy) */
#endif

View File

@ -45,16 +45,14 @@ extern "C" {
#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE
/* 7.17.2 Initialization */
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L) || \
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L) || \
defined(__cplusplus)
/* ATOMIC_VAR_INIT was removed in C2x, but still remains in C++23. */
/* ATOMIC_VAR_INIT was removed in C23, but still remains in C++23. */
#define ATOMIC_VAR_INIT(value) (value)
#endif
#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L && \
__STDC_VERSION__ < 202000L) || \
__STDC_VERSION__ < 202311L) || \
(defined(__cplusplus) && __cplusplus >= 202002L)) && \
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
/* ATOMIC_VAR_INIT was deprecated in C17 and C++20. */

42
lib/include/stdckdint.h vendored Normal file
View File

@ -0,0 +1,42 @@
/*===---- stdckdint.h - Standard header for checking integer----------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __STDCKDINT_H
#define __STDCKDINT_H
/* If we're hosted, fall back to the system's stdckdint.h. FreeBSD, for
* example, already has a Clang-compatible stdckdint.h header.
*
* The `stdckdint.h` header requires C 23 or newer.
*/
#if __STDC_HOSTED__ && __has_include_next(<stdckdint.h>)
#include_next <stdckdint.h>
#else
/* C23 7.20.1 Defines several macros for performing checked integer arithmetic*/
#define __STDC_VERSION_STDCKDINT_H__ 202311L
// Both A and B shall be any integer type other than "plain" char, bool, a bit-
// precise integer type, or an enumerated type, and they need not be the same.
// R shall be a modifiable lvalue of any integer type other than "plain" char,
// bool, a bit-precise integer type, or an enumerated type. It shouldn't be
// short type, either. Otherwise, it may be unable to hold two the result of
// operating two 'int's.
// A diagnostic message will be produced if A or B are not suitable integer
// types, or if R is not a modifiable lvalue of a suitable integer type or R
// is short type.
#define ckd_add(R, A, B) __builtin_add_overflow((A), (B), (R))
#define ckd_sub(R, A, B) __builtin_sub_overflow((A), (B), (R))
#define ckd_mul(R, A, B) __builtin_mul_overflow((A), (B), (R))
#endif /* __STDC_HOSTED__ */
#endif /* __STDCKDINT_H */

166
lib/include/stddef.h vendored
View File

@ -7,126 +7,116 @@
*===-----------------------------------------------------------------------===
*/
#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \
defined(__need_size_t) || defined(__need_wchar_t) || \
defined(__need_NULL) || defined(__need_wint_t)
/*
* This header is designed to be included multiple times. If any of the __need_
* macros are defined, then only that subset of interfaces are provided. This
* can be useful for POSIX headers that need to not expose all of stddef.h, but
* need to use some of its interfaces. Otherwise this header provides all of
* the expected interfaces.
*
* When clang modules are enabled, this header is a textual header. It ignores
* its header guard so that multiple submodules can export its interfaces.
* Take module SM with submodules A and B, whose headers both include stddef.h
* When SM.A builds, __STDDEF_H will be defined. When SM.B builds, the
* definition from SM.A will leak when building without local submodule
* visibility. stddef.h wouldn't include any of its implementation headers, and
* SM.B wouldn't import any of the stddef modules, and SM.B's `export *`
* wouldn't export any stddef interfaces as expected. However, since stddef.h
* ignores its header guard when building with modules, it all works as
* expected.
*
* When clang modules are not enabled, the header guards can function in the
* normal simple fashion.
*/
#if !defined(__STDDEF_H) || __has_feature(modules) || \
(defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1) || \
defined(__need_ptrdiff_t) || defined(__need_size_t) || \
defined(__need_rsize_t) || defined(__need_wchar_t) || \
defined(__need_NULL) || defined(__need_nullptr_t) || \
defined(__need_unreachable) || defined(__need_max_align_t) || \
defined(__need_offsetof) || defined(__need_wint_t)
#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \
!defined(__need_wchar_t) && !defined(__need_NULL) && \
!defined(__need_wint_t)
/* Always define miscellaneous pieces when modules are available. */
#if !__has_feature(modules)
!defined(__need_rsize_t) && !defined(__need_wchar_t) && \
!defined(__need_NULL) && !defined(__need_nullptr_t) && \
!defined(__need_unreachable) && !defined(__need_max_align_t) && \
!defined(__need_offsetof) && !defined(__need_wint_t)
#define __STDDEF_H
#endif
#define __need_ptrdiff_t
#define __need_size_t
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
* enabled. */
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
#define __need_rsize_t
#endif
#define __need_wchar_t
#define __need_NULL
#define __need_STDDEF_H_misc
/* __need_wint_t is intentionally not defined here. */
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
defined(__cplusplus)
#define __need_nullptr_t
#endif
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define __need_unreachable
#endif
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
(defined(__cplusplus) && __cplusplus >= 201103L)
#define __need_max_align_t
#endif
#define __need_offsetof
/* wint_t is provided by <wchar.h> and not <stddef.h>. It's here
* for compatibility, but must be explicitly requested. Therefore
* __need_wint_t is intentionally not defined here. */
#endif
#if defined(__need_ptrdiff_t)
#if !defined(_PTRDIFF_T) || __has_feature(modules)
/* Always define ptrdiff_t when modules are available. */
#if !__has_feature(modules)
#define _PTRDIFF_T
#endif
typedef __PTRDIFF_TYPE__ ptrdiff_t;
#endif
#include <__stddef_ptrdiff_t.h>
#undef __need_ptrdiff_t
#endif /* defined(__need_ptrdiff_t) */
#if defined(__need_size_t)
#if !defined(_SIZE_T) || __has_feature(modules)
/* Always define size_t when modules are available. */
#if !__has_feature(modules)
#define _SIZE_T
#endif
typedef __SIZE_TYPE__ size_t;
#endif
#include <__stddef_size_t.h>
#undef __need_size_t
#endif /*defined(__need_size_t) */
#if defined(__need_STDDEF_H_misc)
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
* enabled. */
#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \
!defined(_RSIZE_T)) || __has_feature(modules)
/* Always define rsize_t when modules are available. */
#if !__has_feature(modules)
#define _RSIZE_T
#endif
typedef __SIZE_TYPE__ rsize_t;
#endif
#endif /* defined(__need_STDDEF_H_misc) */
#if defined(__need_rsize_t)
#include <__stddef_rsize_t.h>
#undef __need_rsize_t
#endif /* defined(__need_rsize_t) */
#if defined(__need_wchar_t)
#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
/* Always define wchar_t when modules are available. */
#if !defined(_WCHAR_T) || __has_feature(modules)
#if !__has_feature(modules)
#define _WCHAR_T
#if defined(_MSC_EXTENSIONS)
#define _WCHAR_T_DEFINED
#endif
#endif
typedef __WCHAR_TYPE__ wchar_t;
#endif
#endif
#include <__stddef_wchar_t.h>
#undef __need_wchar_t
#endif /* defined(__need_wchar_t) */
#if defined(__need_NULL)
#undef NULL
#ifdef __cplusplus
# if !defined(__MINGW32__) && !defined(_MSC_VER)
# define NULL __null
# else
# define NULL 0
# endif
#else
# define NULL ((void*)0)
#endif
#ifdef __cplusplus
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
namespace std { typedef decltype(nullptr) nullptr_t; }
using ::std::nullptr_t;
#endif
#endif
#include <__stddef_null.h>
#undef __need_NULL
#endif /* defined(__need_NULL) */
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
typedef typeof(nullptr) nullptr_t;
#endif /* defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L */
#if defined(__need_nullptr_t)
#include <__stddef_nullptr_t.h>
#undef __need_nullptr_t
#endif /* defined(__need_nullptr_t) */
#if defined(__need_STDDEF_H_misc) && defined(__STDC_VERSION__) && \
__STDC_VERSION__ >= 202000L
#define unreachable() __builtin_unreachable()
#endif /* defined(__need_STDDEF_H_misc) && >= C23 */
#if defined(__need_unreachable)
#include <__stddef_unreachable.h>
#undef __need_unreachable
#endif /* defined(__need_unreachable) */
#if defined(__need_STDDEF_H_misc)
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
(defined(__cplusplus) && __cplusplus >= 201103L)
#include "__stddef_max_align_t.h"
#endif
#define offsetof(t, d) __builtin_offsetof(t, d)
#undef __need_STDDEF_H_misc
#endif /* defined(__need_STDDEF_H_misc) */
#if defined(__need_max_align_t)
#include <__stddef_max_align_t.h>
#undef __need_max_align_t
#endif /* defined(__need_max_align_t) */
#if defined(__need_offsetof)
#include <__stddef_offsetof.h>
#undef __need_offsetof
#endif /* defined(__need_offsetof) */
/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
#if defined(__need_wint_t)
/* Always define wint_t when modules are available. */
#if !defined(_WINT_T) || __has_feature(modules)
#if !__has_feature(modules)
#define _WINT_T
#endif
typedef __WINT_TYPE__ wint_t;
#endif
#include <__stddef_wint_t.h>
#undef __need_wint_t
#endif /* __need_wint_t */

67
lib/include/stdint.h vendored
View File

@ -499,9 +499,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT64_MAX INT64_C( 9223372036854775807)
# define INT64_MIN (-INT64_C( 9223372036854775807)-1)
# define UINT64_MAX UINT64_C(18446744073709551615)
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT64_WIDTH 64
# define INT64_WIDTH UINT64_WIDTH
@ -545,9 +544,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST64_MAX __INT_LEAST64_MAX
# define UINT_FAST64_MAX __UINT_LEAST64_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST64_WIDTH __UINT_LEAST64_WIDTH
# define INT_LEAST64_WIDTH UINT_LEAST64_WIDTH
# define UINT_FAST64_WIDTH __UINT_LEAST64_WIDTH
@ -586,9 +583,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT56_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT56_WIDTH 56
# define INT56_WIDTH UINT56_WIDTH
# define UINT_LEAST56_WIDTH UINT56_WIDTH
@ -635,9 +630,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT48_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define UINT48_WIDTH 48
#define INT48_WIDTH UINT48_WIDTH
#define UINT_LEAST48_WIDTH UINT48_WIDTH
@ -684,9 +677,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT40_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT40_WIDTH 40
# define INT40_WIDTH UINT40_WIDTH
# define UINT_LEAST40_WIDTH UINT40_WIDTH
@ -727,9 +718,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT32_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT32_WIDTH 32
# define INT32_WIDTH UINT32_WIDTH
# undef __UINT_LEAST32_WIDTH
@ -749,9 +738,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST32_MAX __INT_LEAST32_MAX
# define UINT_FAST32_MAX __UINT_LEAST32_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST32_WIDTH __UINT_LEAST32_WIDTH
# define INT_LEAST32_WIDTH UINT_LEAST32_WIDTH
# define UINT_FAST32_WIDTH __UINT_LEAST32_WIDTH
@ -784,9 +771,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT24_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT24_WIDTH 24
# define INT24_WIDTH UINT24_WIDTH
# define UINT_LEAST24_WIDTH UINT24_WIDTH
@ -819,9 +804,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT16_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT16_WIDTH 16
# define INT16_WIDTH UINT16_WIDTH
# undef __UINT_LEAST16_WIDTH
@ -839,9 +822,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST16_MAX __INT_LEAST16_MAX
# define UINT_FAST16_MAX __UINT_LEAST16_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST16_WIDTH __UINT_LEAST16_WIDTH
# define INT_LEAST16_WIDTH UINT_LEAST16_WIDTH
# define UINT_FAST16_WIDTH __UINT_LEAST16_WIDTH
@ -862,9 +843,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# undef __UINT_LEAST8_MAX
# define __UINT_LEAST8_MAX UINT8_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT8_WIDTH 8
# define INT8_WIDTH UINT8_WIDTH
# undef __UINT_LEAST8_WIDTH
@ -880,9 +859,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
# define INT_FAST8_MAX __INT_LEAST8_MAX
# define UINT_FAST8_MAX __UINT_LEAST8_MAX
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
# define UINT_LEAST8_WIDTH __UINT_LEAST8_WIDTH
# define INT_LEAST8_WIDTH UINT_LEAST8_WIDTH
# define UINT_FAST8_WIDTH __UINT_LEAST8_WIDTH
@ -907,10 +884,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#define PTRDIFF_MAX __PTRDIFF_MAX__
#define SIZE_MAX __SIZE_MAX__
/* C2x 7.20.2.4 Width of integer types capable of holding object pointers. */
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
/* C23 7.22.2.4 Width of integer types capable of holding object pointers. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
/* NB: The C standard requires that these be the same value, but the compiler
exposes separate internal width macros. */
#define INTPTR_WIDTH __INTPTR_WIDTH__
@ -928,10 +903,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#define INTMAX_MAX __INTMAX_MAX__
#define UINTMAX_MAX __UINTMAX_MAX__
/* C2x 7.20.2.5 Width of greatest-width integer types. */
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
/* C23 7.22.2.5 Width of greatest-width integer types. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
/* NB: The C standard requires that these be the same value, but the compiler
exposes separate internal width macros. */
#define INTMAX_WIDTH __INTMAX_WIDTH__
@ -964,10 +937,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)
/* C2x 7.20.3.x Width of other integer types. */
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
/* C23 7.22.3.x Width of other integer types. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define PTRDIFF_WIDTH __PTRDIFF_WIDTH__
#define SIG_ATOMIC_WIDTH __SIG_ATOMIC_WIDTH__
#define SIZE_WIDTH __SIZE_WIDTH__

View File

@ -15,8 +15,8 @@
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
/* The noreturn macro is deprecated in C2x. We do not mark it as such because
including the header file in C2x is also deprecated and we do not want to
/* The noreturn macro is deprecated in C23. We do not mark it as such because
including the header file in C23 is also deprecated and we do not want to
issue a confusing diagnostic for code which includes <stdnoreturn.h>
followed by code that writes [[noreturn]]. The issue with such code is not
with the attribute, or the use of 'noreturn', but the inclusion of the

View File

@ -17,8 +17,13 @@
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("ssse3,no-evex512"), __min_vector_width__(64)))
#define __DEFAULT_FN_ATTRS_MMX \
__attribute__((__always_inline__, __nodebug__, \
__target__("mmx,ssse3,no-evex512"), \
__min_vector_width__(64)))
/// Computes the absolute value of each of the packed 8-bit signed
/// integers in the source operand and stores the 8-bit unsigned integer

51
lib/include/usermsrintrin.h vendored Normal file
View File

@ -0,0 +1,51 @@
/*===--------------- usermsrintrin.h - USERMSR intrinsics -----------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __X86GPRINTRIN_H
#error "Never use <usermsrintrin.h> directly; include <x86gprintrin.h> instead."
#endif // __X86GPRINTRIN_H
#ifndef __USERMSRINTRIN_H
#define __USERMSRINTRIN_H
#ifdef __x86_64__
/// Reads the contents of a 64-bit MSR specified in \a __A into \a dst.
///
/// This intrinsic corresponds to the <c> URDMSR </c> instruction.
/// \param __A
/// An unsigned long long.
///
/// \code{.operation}
/// DEST := MSR[__A]
/// \endcode
static __inline__ unsigned long long
__attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
_urdmsr(unsigned long long __A) {
return __builtin_ia32_urdmsr(__A);
}
/// Writes the contents of \a __B into the 64-bit MSR specified in \a __A.
///
/// This intrinsic corresponds to the <c> UWRMSR </c> instruction.
///
/// \param __A
/// An unsigned long long.
/// \param __B
/// An unsigned long long.
///
/// \code{.operation}
/// MSR[__A] := __B
/// \endcode
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
_uwrmsr(unsigned long long __A, unsigned long long __B) {
return __builtin_ia32_uwrmsr(__A, __B);
}
#endif // __x86_64__
#endif // __USERMSRINTRIN_H

View File

@ -18,8 +18,10 @@
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256)))
/* Default attributes for ZMM forms. */
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS_F \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,evex512,vaes"), \
__min_vector_width__(512)))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesenc_epi128(__m256i __A, __m256i __B)

Some files were not shown because too many files have changed in this diff Show More