mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
update C language headers to LLVM 18
release/18.x branch, commit 78b99c73ee4b96fe9ce0e294d4632326afb2db42
This commit is contained in:
parent
e4029b9943
commit
70c85b1bf1
4
lib/include/__clang_cuda_device_functions.h
vendored
4
lib/include/__clang_cuda_device_functions.h
vendored
@ -502,8 +502,8 @@ __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
|
||||
__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
|
||||
__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
|
||||
__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
|
||||
__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
|
||||
__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
|
||||
__DEVICE__ int __popc(unsigned int __a) { return __nv_popc(__a); }
|
||||
__DEVICE__ int __popcll(unsigned long long __a) { return __nv_popcll(__a); }
|
||||
__DEVICE__ float __powf(float __a, float __b) {
|
||||
return __nv_fast_powf(__a, __b);
|
||||
}
|
||||
|
||||
@ -285,8 +285,8 @@ __DEVICE__ double __nv_normcdfinv(double __a);
|
||||
__DEVICE__ float __nv_normcdfinvf(float __a);
|
||||
__DEVICE__ float __nv_normf(int __a, const float *__b);
|
||||
__DEVICE__ double __nv_norm(int __a, const double *__b);
|
||||
__DEVICE__ int __nv_popc(int __a);
|
||||
__DEVICE__ int __nv_popcll(long long __a);
|
||||
__DEVICE__ int __nv_popc(unsigned int __a);
|
||||
__DEVICE__ int __nv_popcll(unsigned long long __a);
|
||||
__DEVICE__ double __nv_pow(double __a, double __b);
|
||||
__DEVICE__ float __nv_powf(float __a, float __b);
|
||||
__DEVICE__ double __nv_powi(double __a, int __b);
|
||||
|
||||
6
lib/include/__clang_cuda_math.h
vendored
6
lib/include/__clang_cuda_math.h
vendored
@ -36,7 +36,7 @@
|
||||
// because the OpenMP overlay requires constexpr functions here but prior to
|
||||
// c++14 void return functions could not be constexpr.
|
||||
#pragma push_macro("__DEVICE_VOID__")
|
||||
#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
|
||||
#if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L
|
||||
#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
|
||||
#else
|
||||
#define __DEVICE_VOID__ __DEVICE__
|
||||
@ -45,9 +45,9 @@
|
||||
// libdevice provides fast low precision and slow full-recision implementations
|
||||
// for some functions. Which one gets selected depends on
|
||||
// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
|
||||
// -ffast-math or -fcuda-approx-transcendentals are in effect.
|
||||
// -ffast-math or -fgpu-approx-transcendentals are in effect.
|
||||
#pragma push_macro("__FAST_OR_SLOW")
|
||||
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
|
||||
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
|
||||
#define __FAST_OR_SLOW(fast, slow) fast
|
||||
#else
|
||||
#define __FAST_OR_SLOW(fast, slow) slow
|
||||
|
||||
4
lib/include/__clang_cuda_runtime_wrapper.h
vendored
4
lib/include/__clang_cuda_runtime_wrapper.h
vendored
@ -196,12 +196,12 @@ inline __host__ double __signbitd(double x) {
|
||||
|
||||
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
|
||||
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
|
||||
// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.
|
||||
// sin and exp. This is controlled in clang by -fgpu-approx-transcendentals.
|
||||
//
|
||||
// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.
|
||||
// slow divides), so we need to scope our define carefully here.
|
||||
#pragma push_macro("__USE_FAST_MATH__")
|
||||
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
|
||||
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
|
||||
#define __USE_FAST_MATH__ 1
|
||||
#endif
|
||||
|
||||
|
||||
363
lib/include/__clang_hip_math.h
vendored
363
lib/include/__clang_hip_math.h
vendored
@ -14,9 +14,6 @@
|
||||
#endif
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#if defined(__cplusplus)
|
||||
#include <algorithm>
|
||||
#endif
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
@ -32,6 +29,17 @@
|
||||
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
// Device library provides fast low precision and slow full-recision
|
||||
// implementations for some functions. Which one gets selected depends on
|
||||
// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
|
||||
// -ffast-math or -fgpu-approx-transcendentals are in effect.
|
||||
#pragma push_macro("__FAST_OR_SLOW")
|
||||
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
|
||||
#define __FAST_OR_SLOW(fast, slow) fast
|
||||
#else
|
||||
#define __FAST_OR_SLOW(fast, slow) slow
|
||||
#endif
|
||||
|
||||
// A few functions return bool type starting only in C++11.
|
||||
#pragma push_macro("__RETURN_TYPE")
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
@ -139,21 +147,180 @@ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
|
||||
}
|
||||
|
||||
// BEGIN FLOAT
|
||||
|
||||
// BEGIN INTRINSICS
|
||||
|
||||
__DEVICE__
|
||||
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __exp10f(float __x) {
|
||||
const float __log2_10 = 0x1.a934f0p+1f;
|
||||
return __builtin_amdgcn_exp2f(__log2_10 * __x);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float __expf(float __x) {
|
||||
const float __log2_e = 0x1.715476p+0;
|
||||
return __builtin_amdgcn_exp2f(__log2_e * __x);
|
||||
}
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __x + __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __x / __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __fdividef(float __x, float __y) { return __x / __y; }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmaf_rd(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtn_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rte_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_ru(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtp_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rz(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtz_f32(__x, __y, __z);
|
||||
}
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __builtin_fmaf(__x, __y, __z);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __x * __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return 1.0f / __x; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __x - __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __log10f(float __x) { return __builtin_log10f(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __logf(float __x) { return __builtin_logf(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
|
||||
|
||||
__DEVICE__
|
||||
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
|
||||
|
||||
__DEVICE__
|
||||
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
*__sinptr = __ocml_native_sin_f32(__x);
|
||||
*__cosptr = __ocml_native_cos_f32(__x);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __tanf(float __x) {
|
||||
return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
|
||||
}
|
||||
// END INTRINSICS
|
||||
|
||||
#if defined(__cplusplus)
|
||||
__DEVICE__
|
||||
int abs(int __x) {
|
||||
int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
|
||||
return (__x ^ __sgn) - __sgn;
|
||||
return __builtin_abs(__x);
|
||||
}
|
||||
__DEVICE__
|
||||
long labs(long __x) {
|
||||
long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
|
||||
return (__x ^ __sgn) - __sgn;
|
||||
return __builtin_labs(__x);
|
||||
}
|
||||
__DEVICE__
|
||||
long long llabs(long long __x) {
|
||||
long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
|
||||
return (__x ^ __sgn) - __sgn;
|
||||
return __builtin_llabs(__x);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -188,7 +355,7 @@ __DEVICE__
|
||||
float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
|
||||
|
||||
__DEVICE__
|
||||
float cosf(float __x) { return __ocml_cos_f32(__x); }
|
||||
float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float coshf(float __x) { return __ocml_cosh_f32(__x); }
|
||||
@ -321,13 +488,13 @@ __DEVICE__
|
||||
float log1pf(float __x) { return __ocml_log1p_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float log2f(float __x) { return __builtin_log2f(__x); }
|
||||
float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float logbf(float __x) { return __ocml_logb_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float logf(float __x) { return __builtin_logf(__x); }
|
||||
float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
long int lrintf(float __x) { return __builtin_rintf(__x); }
|
||||
@ -401,7 +568,7 @@ float normf(int __dim,
|
||||
++__a;
|
||||
}
|
||||
|
||||
return __ocml_sqrt_f32(__r);
|
||||
return __builtin_sqrtf(__r);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
@ -483,9 +650,13 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
|
||||
__sincosf(__x, __sinptr, __cosptr);
|
||||
#else
|
||||
*__sinptr =
|
||||
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
#endif
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
@ -500,7 +671,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) {
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float sinf(float __x) { return __ocml_sin_f32(__x); }
|
||||
float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float sinhf(float __x) { return __ocml_sinh_f32(__x); }
|
||||
@ -509,7 +680,7 @@ __DEVICE__
|
||||
float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
|
||||
float sqrtf(float __x) { return __builtin_sqrtf(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float tanf(float __x) { return __ocml_tan_f32(__x); }
|
||||
@ -551,158 +722,7 @@ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
|
||||
return __x1;
|
||||
}
|
||||
|
||||
// BEGIN INTRINSICS
|
||||
|
||||
__DEVICE__
|
||||
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __expf(float __x) { return __ocml_native_exp_f32(__x); }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __x + __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __x / __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __fdividef(float __x, float __y) { return __x / __y; }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmaf_rd(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtn_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rte_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_ru(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtp_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rz(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtz_f32(__x, __y, __z);
|
||||
}
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __builtin_fmaf(__x, __y, __z);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __x * __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return 1.0f / __x; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __x - __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __logf(float __x) { return __ocml_native_log_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
|
||||
|
||||
__DEVICE__
|
||||
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
|
||||
|
||||
__DEVICE__
|
||||
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
*__sinptr = __ocml_native_sin_f32(__x);
|
||||
*__cosptr = __ocml_native_cos_f32(__x);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __tanf(float __x) { return __ocml_tan_f32(__x); }
|
||||
// END INTRINSICS
|
||||
// END FLOAT
|
||||
|
||||
// BEGIN DOUBLE
|
||||
@ -941,7 +961,7 @@ double norm(int __dim,
|
||||
++__a;
|
||||
}
|
||||
|
||||
return __ocml_sqrt_f64(__r);
|
||||
return __builtin_sqrt(__r);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
@ -1064,7 +1084,7 @@ __DEVICE__
|
||||
double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
|
||||
|
||||
__DEVICE__
|
||||
double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
|
||||
double sqrt(double __x) { return __builtin_sqrt(__x); }
|
||||
|
||||
__DEVICE__
|
||||
double tan(double __x) { return __ocml_tan_f64(__x); }
|
||||
@ -1198,7 +1218,7 @@ __DEVICE__
|
||||
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
|
||||
double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
@ -1288,16 +1308,17 @@ double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
|
||||
|
||||
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||
__host__ inline static int min(int __arg1, int __arg2) {
|
||||
return std::min(__arg1, __arg2);
|
||||
return __arg1 < __arg2 ? __arg1 : __arg2;
|
||||
}
|
||||
|
||||
__host__ inline static int max(int __arg1, int __arg2) {
|
||||
return std::max(__arg1, __arg2);
|
||||
return __arg1 > __arg2 ? __arg1 : __arg2;
|
||||
}
|
||||
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||
#endif
|
||||
|
||||
#pragma pop_macro("__DEVICE__")
|
||||
#pragma pop_macro("__RETURN_TYPE")
|
||||
#pragma pop_macro("__FAST_OR_SLOW")
|
||||
|
||||
#endif // __CLANG_HIP_MATH_H__
|
||||
|
||||
114
lib/include/__clang_hip_runtime_wrapper.h
vendored
114
lib/include/__clang_hip_runtime_wrapper.h
vendored
@ -46,6 +46,67 @@ extern "C" {
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#if __has_include("hip/hip_version.h")
|
||||
#include "hip/hip_version.h"
|
||||
#endif // __has_include("hip/hip_version.h")
|
||||
#endif // __HIPCC_RTC__
|
||||
|
||||
typedef __SIZE_TYPE__ __hip_size_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif //__cplusplus
|
||||
|
||||
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
|
||||
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
|
||||
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
|
||||
#if __has_feature(address_sanitizer)
|
||||
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
|
||||
unsigned long long __pc);
|
||||
__device__ void __asan_free_impl(unsigned long long __addr,
|
||||
unsigned long long __pc);
|
||||
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
return (void *)__asan_malloc_impl(__size, __pc);
|
||||
}
|
||||
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
__asan_free_impl((unsigned long long)__ptr, __pc);
|
||||
}
|
||||
#else // __has_feature(address_sanitizer)
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return (void *) __ockl_dm_alloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__ockl_dm_dealloc((unsigned long long)__ptr);
|
||||
}
|
||||
#endif // __has_feature(address_sanitizer)
|
||||
#else // HIP version check
|
||||
#if __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__device__ void *__hip_malloc(__hip_size_t __size);
|
||||
__device__ void *__hip_free(void *__ptr);
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return __hip_malloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__hip_free(__ptr);
|
||||
}
|
||||
#else // __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
__builtin_trap();
|
||||
return (void *)0;
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__builtin_trap();
|
||||
}
|
||||
#endif // __HIP_ENABLE_DEVICE_MALLOC__
|
||||
#endif // HIP version check
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif //__cplusplus
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
@ -71,59 +132,6 @@ typedef __SIZE_TYPE__ size_t;
|
||||
#define INT_MAX __INTMAX_MAX__
|
||||
#endif // __HIPCC_RTC__
|
||||
|
||||
typedef __SIZE_TYPE__ __hip_size_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif //__cplusplus
|
||||
|
||||
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
|
||||
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
|
||||
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
|
||||
#if __has_feature(address_sanitizer)
|
||||
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
|
||||
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
|
||||
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
return (void *)__asan_malloc_impl(__size, __pc);
|
||||
}
|
||||
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
__asan_free_impl((unsigned long long)__ptr, __pc);
|
||||
}
|
||||
#else
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return (void *) __ockl_dm_alloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__ockl_dm_dealloc((unsigned long long)__ptr);
|
||||
}
|
||||
#endif // __has_feature(address_sanitizer)
|
||||
#else // HIP version check
|
||||
#if __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__device__ void *__hip_malloc(__hip_size_t __size);
|
||||
__device__ void *__hip_free(void *__ptr);
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return __hip_malloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__hip_free(__ptr);
|
||||
}
|
||||
#else
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
__builtin_trap();
|
||||
return (void *)0;
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__builtin_trap();
|
||||
}
|
||||
#endif
|
||||
#endif // HIP version check
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif //__cplusplus
|
||||
|
||||
#include <__clang_hip_libdevice_declares.h>
|
||||
#include <__clang_hip_math.h>
|
||||
#include <__clang_hip_stdlib.h>
|
||||
|
||||
13
lib/include/__stdarg___gnuc_va_list.h
vendored
Normal file
13
lib/include/__stdarg___gnuc_va_list.h
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
/*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __GNUC_VA_LIST
|
||||
#define __GNUC_VA_LIST
|
||||
typedef __builtin_va_list __gnuc_va_list;
|
||||
#endif
|
||||
12
lib/include/__stdarg___va_copy.h
vendored
Normal file
12
lib/include/__stdarg___va_copy.h
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
/*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __va_copy
|
||||
#define __va_copy(d, s) __builtin_va_copy(d, s)
|
||||
#endif
|
||||
22
lib/include/__stdarg_va_arg.h
vendored
Normal file
22
lib/include/__stdarg_va_arg.h
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
/*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef va_arg
|
||||
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
/* C23 does not require the second parameter for va_start. */
|
||||
#define va_start(ap, ...) __builtin_va_start(ap, 0)
|
||||
#else
|
||||
/* Versions before C23 do require the second parameter. */
|
||||
#define va_start(ap, param) __builtin_va_start(ap, param)
|
||||
#endif
|
||||
#define va_end(ap) __builtin_va_end(ap)
|
||||
#define va_arg(ap, type) __builtin_va_arg(ap, type)
|
||||
|
||||
#endif
|
||||
12
lib/include/__stdarg_va_copy.h
vendored
Normal file
12
lib/include/__stdarg_va_copy.h
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
/*===---- __stdarg_va_copy.h - Definition of va_copy------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef va_copy
|
||||
#define va_copy(dest, src) __builtin_va_copy(dest, src)
|
||||
#endif
|
||||
13
lib/include/__stdarg_va_list.h
vendored
Normal file
13
lib/include/__stdarg_va_list.h
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
/*===---- __stdarg_va_list.h - Definition of va_list -----------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _VA_LIST
|
||||
#define _VA_LIST
|
||||
typedef __builtin_va_list va_list;
|
||||
#endif
|
||||
2
lib/include/__stddef_max_align_t.h
vendored
2
lib/include/__stddef_max_align_t.h
vendored
@ -1,4 +1,4 @@
|
||||
/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===
|
||||
/*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
29
lib/include/__stddef_null.h
vendored
Normal file
29
lib/include/__stddef_null.h
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
/*===---- __stddef_null.h - Definition of NULL -----------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined(NULL) || !__building_module(_Builtin_stddef)
|
||||
|
||||
/* linux/stddef.h will define NULL to 0. glibc (and other) headers then define
|
||||
* __need_NULL and rely on stddef.h to redefine NULL to the correct value again.
|
||||
* Modules don't support redefining macros like that, but support that pattern
|
||||
* in the non-modules case.
|
||||
*/
|
||||
#undef NULL
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if !defined(__MINGW32__) && !defined(_MSC_VER)
|
||||
#define NULL __null
|
||||
#else
|
||||
#define NULL 0
|
||||
#endif
|
||||
#else
|
||||
#define NULL ((void*)0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
29
lib/include/__stddef_nullptr_t.h
vendored
Normal file
29
lib/include/__stddef_nullptr_t.h
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
/*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_NULLPTR_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _NULLPTR_T
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
|
||||
namespace std {
|
||||
typedef decltype(nullptr) nullptr_t;
|
||||
}
|
||||
using ::std::nullptr_t;
|
||||
#endif
|
||||
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
typedef typeof(nullptr) nullptr_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
17
lib/include/__stddef_offsetof.h
vendored
Normal file
17
lib/include/__stddef_offsetof.h
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
/*===---- __stddef_offsetof.h - Definition of offsetof ---------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(offsetof) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define offsetof(t, d) __builtin_offsetof(t, d)
|
||||
#endif
|
||||
20
lib/include/__stddef_ptrdiff_t.h
vendored
Normal file
20
lib/include/__stddef_ptrdiff_t.h
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_PTRDIFF_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _PTRDIFF_T
|
||||
|
||||
typedef __PTRDIFF_TYPE__ ptrdiff_t;
|
||||
|
||||
#endif
|
||||
20
lib/include/__stddef_rsize_t.h
vendored
Normal file
20
lib/include/__stddef_rsize_t.h
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_RSIZE_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _RSIZE_T
|
||||
|
||||
typedef __SIZE_TYPE__ rsize_t;
|
||||
|
||||
#endif
|
||||
20
lib/include/__stddef_size_t.h
vendored
Normal file
20
lib/include/__stddef_size_t.h
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*===---- __stddef_size_t.h - Definition of size_t -------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_SIZE_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _SIZE_T
|
||||
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
|
||||
#endif
|
||||
21
lib/include/__stddef_unreachable.h
vendored
Normal file
21
lib/include/__stddef_unreachable.h
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
/*===---- __stddef_unreachable.h - Definition of unreachable ---------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(unreachable) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define unreachable() __builtin_unreachable()
|
||||
#endif
|
||||
|
||||
#endif
|
||||
28
lib/include/__stddef_wchar_t.h
vendored
Normal file
28
lib/include/__stddef_wchar_t.h
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
/*===---- __stddef_wchar.h - Definition of wchar_t -------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_WCHAR_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _WCHAR_T
|
||||
|
||||
#ifdef _MSC_EXTENSIONS
|
||||
#define _WCHAR_T_DEFINED
|
||||
#endif
|
||||
|
||||
typedef __WCHAR_TYPE__ wchar_t;
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
15
lib/include/__stddef_wint_t.h
vendored
Normal file
15
lib/include/__stddef_wint_t.h
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
/*===---- __stddef_wint.h - Definition of wint_t ---------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _WINT_T
|
||||
#define _WINT_T
|
||||
|
||||
typedef __WINT_TYPE__ wint_t;
|
||||
|
||||
#endif
|
||||
160
lib/include/adcintrin.h
vendored
Normal file
160
lib/include/adcintrin.h
vendored
Normal file
@ -0,0 +1,160 @@
|
||||
/*===---- adcintrin.h - ADC intrinsics -------------------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __ADCINTRIN_H
|
||||
#define __ADCINTRIN_H
|
||||
|
||||
#if !defined(__i386__) && !defined(__x86_64__)
|
||||
#error "This header is only meant to be used on x86 and x64 architecture"
|
||||
#endif
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
/* Use C++ inline semantics in C++, GNU inline for C mode. */
|
||||
#if defined(__cplusplus)
|
||||
#define __INLINE __inline
|
||||
#else
|
||||
#define __INLINE static __inline
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
|
||||
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c SBB instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 32-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 32-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
|
||||
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 64-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 64-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_subborrow_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef __INLINE
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __ADCINTRIN_H */
|
||||
139
lib/include/adxintrin.h
vendored
139
lib/include/adxintrin.h
vendored
@ -15,7 +15,8 @@
|
||||
#define __ADXINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
|
||||
|
||||
/* Use C++ inline semantics in C++, GNU inline for C mode. */
|
||||
#if defined(__cplusplus)
|
||||
@ -53,9 +54,9 @@ extern "C" {
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
|
||||
_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
@ -84,144 +85,18 @@ __INLINE unsigned char
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Intrinsics that are also available if __ADX__ is undefined. */
|
||||
|
||||
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
|
||||
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c SBB instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 32-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 32-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
|
||||
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 64-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 64-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_subborrow_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef __INLINE
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __ADXINTRIN_H */
|
||||
|
||||
43
lib/include/altivec.h
vendored
43
lib/include/altivec.h
vendored
@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
|
||||
|
||||
static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
|
||||
int __b) {
|
||||
vector signed char __res = (vector signed char)(0);
|
||||
__res[__b & 0x7] = __a;
|
||||
const vector signed char __zero = (vector signed char)0;
|
||||
vector signed char __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0xf] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||
vec_promote(unsigned char __a, int __b) {
|
||||
vector unsigned char __res = (vector unsigned char)(0);
|
||||
__res[__b & 0x7] = __a;
|
||||
const vector unsigned char __zero = (vector unsigned char)(0);
|
||||
vector unsigned char __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0xf] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
|
||||
vector short __res = (vector short)(0);
|
||||
const vector short __zero = (vector short)(0);
|
||||
vector short __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0x7] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned short __ATTRS_o_ai
|
||||
vec_promote(unsigned short __a, int __b) {
|
||||
vector unsigned short __res = (vector unsigned short)(0);
|
||||
const vector unsigned short __zero = (vector unsigned short)(0);
|
||||
vector unsigned short __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0x7] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
|
||||
vector int __res = (vector int)(0);
|
||||
const vector int __zero = (vector int)(0);
|
||||
vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
|
||||
__res[__b & 0x3] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
|
||||
int __b) {
|
||||
vector unsigned int __res = (vector unsigned int)(0);
|
||||
const vector unsigned int __zero = (vector unsigned int)(0);
|
||||
vector unsigned int __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
|
||||
__res[__b & 0x3] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
|
||||
vector float __res = (vector float)(0);
|
||||
const vector float __zero = (vector float)(0);
|
||||
vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
|
||||
__res[__b & 0x3] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
|
||||
vector double __res = (vector double)(0);
|
||||
const vector double __zero = (vector double)(0);
|
||||
vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
|
||||
__res[__b & 0x1] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector signed long long __ATTRS_o_ai
|
||||
vec_promote(signed long long __a, int __b) {
|
||||
vector signed long long __res = (vector signed long long)(0);
|
||||
const vector signed long long __zero = (vector signed long long)(0);
|
||||
vector signed long long __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1);
|
||||
__res[__b & 0x1] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned long long __ATTRS_o_ai
|
||||
vec_promote(unsigned long long __a, int __b) {
|
||||
vector unsigned long long __res = (vector unsigned long long)(0);
|
||||
const vector unsigned long long __zero = (vector unsigned long long)(0);
|
||||
vector unsigned long long __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1);
|
||||
__res[__b & 0x1] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
8
lib/include/ammintrin.h
vendored
8
lib/include/ammintrin.h
vendored
@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y)
|
||||
/// \param __a
|
||||
/// The 64-bit double-precision floating-point register value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_stream_sd(double *__p, __m128d __a)
|
||||
_mm_stream_sd(void *__p, __m128d __a)
|
||||
{
|
||||
__builtin_ia32_movntsd(__p, (__v2df)__a);
|
||||
__builtin_ia32_movntsd((double *)__p, (__v2df)__a);
|
||||
}
|
||||
|
||||
/// Stores a 32-bit single-precision floating-point value in a 32-bit
|
||||
@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a)
|
||||
/// \param __a
|
||||
/// The 32-bit single-precision floating-point register value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_stream_ss(float *__p, __m128 __a)
|
||||
_mm_stream_ss(void *__p, __m128 __a)
|
||||
{
|
||||
__builtin_ia32_movntss(__p, (__v4sf)__a);
|
||||
__builtin_ia32_movntss((float *)__p, (__v4sf)__a);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
142
lib/include/arm_acle.h
vendored
142
lib/include/arm_acle.h
vendored
@ -4,6 +4,13 @@
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
* The Arm C Language Extensions specifications can be found in the following
|
||||
* link: https://github.com/ARM-software/acle/releases
|
||||
*
|
||||
* The ACLE section numbers are subject to change. When consulting the
|
||||
* specifications, it is recommended to search using section titles if
|
||||
* the section numbers look outdated.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
@ -20,8 +27,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
|
||||
/* 8.3 Memory barriers */
|
||||
/* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
|
||||
/* 7.3 Memory barriers */
|
||||
#if !__has_builtin(__dmb)
|
||||
#define __dmb(i) __builtin_arm_dmb(i)
|
||||
#endif
|
||||
@ -32,7 +39,7 @@ extern "C" {
|
||||
#define __isb(i) __builtin_arm_isb(i)
|
||||
#endif
|
||||
|
||||
/* 8.4 Hints */
|
||||
/* 7.4 Hints */
|
||||
|
||||
#if !__has_builtin(__wfi)
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
|
||||
@ -68,7 +75,7 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
|
||||
#define __dbg(t) __builtin_arm_dbg(t)
|
||||
#endif
|
||||
|
||||
/* 8.5 Swap */
|
||||
/* 7.5 Swap */
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
uint32_t v;
|
||||
@ -78,8 +85,8 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
return v;
|
||||
}
|
||||
|
||||
/* 8.6 Memory prefetch intrinsics */
|
||||
/* 8.6.1 Data prefetch */
|
||||
/* 7.6 Memory prefetch intrinsics */
|
||||
/* 7.6.1 Data prefetch */
|
||||
#define __pld(addr) __pldx(0, 0, 0, addr)
|
||||
|
||||
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
|
||||
@ -90,7 +97,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
|
||||
#endif
|
||||
|
||||
/* 8.6.2 Instruction prefetch */
|
||||
/* 7.6.2 Instruction prefetch */
|
||||
#define __pli(addr) __plix(0, 0, addr)
|
||||
|
||||
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
|
||||
@ -101,15 +108,15 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
__builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
|
||||
#endif
|
||||
|
||||
/* 8.7 NOP */
|
||||
/* 7.7 NOP */
|
||||
#if !defined(_MSC_VER) || !defined(__aarch64__)
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
|
||||
__builtin_arm_nop();
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9 DATA-PROCESSING INTRINSICS */
|
||||
/* 9.2 Miscellaneous data-processing intrinsics */
|
||||
/* 8 DATA-PROCESSING INTRINSICS */
|
||||
/* 8.2 Miscellaneous data-processing intrinsics */
|
||||
/* ROR */
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__ror(uint32_t __x, uint32_t __y) {
|
||||
@ -248,9 +255,7 @@ __rbitl(unsigned long __t) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* 9.3 16-bit multiplications
|
||||
*/
|
||||
/* 8.3 16-bit multiplications */
|
||||
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smulbb(int32_t __a, int32_t __b) {
|
||||
@ -279,18 +284,18 @@ __smulwt(int32_t __a, int32_t __b) {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 9.4 Saturating intrinsics
|
||||
* 8.4 Saturating intrinsics
|
||||
*
|
||||
* FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
|
||||
* intrinsics are implemented and the flag is enabled.
|
||||
*/
|
||||
/* 9.4.1 Width-specified saturation intrinsics */
|
||||
/* 8.4.1 Width-specified saturation intrinsics */
|
||||
#if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
|
||||
#define __ssat(x, y) __builtin_arm_ssat(x, y)
|
||||
#define __usat(x, y) __builtin_arm_usat(x, y)
|
||||
#endif
|
||||
|
||||
/* 9.4.2 Saturating addition and subtraction intrinsics */
|
||||
/* 8.4.2 Saturating addition and subtraction intrinsics */
|
||||
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd(int32_t __t, int32_t __v) {
|
||||
@ -308,7 +313,7 @@ __qdbl(int32_t __t) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.4.3 Accumultating multiplications */
|
||||
/* 8.4.3 Accumultating multiplications */
|
||||
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
|
||||
@ -337,13 +342,13 @@ __smlawt(int32_t __a, int32_t __b, int32_t __c) {
|
||||
#endif
|
||||
|
||||
|
||||
/* 9.5.4 Parallel 16-bit saturation */
|
||||
/* 8.5.4 Parallel 16-bit saturation */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
|
||||
#define __usat16(x, y) __builtin_arm_usat16(x, y)
|
||||
#endif
|
||||
|
||||
/* 9.5.5 Packing and unpacking */
|
||||
/* 8.5.5 Packing and unpacking */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
typedef int32_t int8x4_t;
|
||||
typedef int32_t int16x2_t;
|
||||
@ -368,7 +373,7 @@ __uxtb16(int8x4_t __a) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.6 Parallel selection */
|
||||
/* 8.5.6 Parallel selection */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sel(uint8x4_t __a, uint8x4_t __b) {
|
||||
@ -376,7 +381,7 @@ __sel(uint8x4_t __a, uint8x4_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.7 Parallel 8-bit addition and subtraction */
|
||||
/* 8.5.7 Parallel 8-bit addition and subtraction */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd8(int8x4_t __a, int8x4_t __b) {
|
||||
@ -428,7 +433,7 @@ __usub8(uint8x4_t __a, uint8x4_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.8 Sum of 8-bit absolute differences */
|
||||
/* 8.5.8 Sum of 8-bit absolute differences */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__usad8(uint8x4_t __a, uint8x4_t __b) {
|
||||
@ -440,7 +445,7 @@ __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.9 Parallel 16-bit addition and subtraction */
|
||||
/* 8.5.9 Parallel 16-bit addition and subtraction */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd16(int16x2_t __a, int16x2_t __b) {
|
||||
@ -540,7 +545,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.10 Parallel 16-bit multiplications */
|
||||
/* 8.5.10 Parallel 16-bit multiplications */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
|
||||
@ -592,7 +597,22 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.7 CRC32 intrinsics */
|
||||
/* 8.6 Floating-point data-processing intrinsics */
|
||||
#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING) && \
|
||||
(__ARM_FEATURE_DIRECTED_ROUNDING)) && \
|
||||
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
|
||||
static __inline__ double __attribute__((__always_inline__, __nodebug__))
|
||||
__rintn(double __a) {
|
||||
return __builtin_roundeven(__a);
|
||||
}
|
||||
|
||||
static __inline__ float __attribute__((__always_inline__, __nodebug__))
|
||||
__rintnf(float __a) {
|
||||
return __builtin_roundevenf(__a);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 8.8 CRC32 intrinsics */
|
||||
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
|
||||
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
|
||||
@ -636,6 +656,7 @@ __crc32cd(uint32_t __a, uint64_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 8.6 Floating-point data-processing intrinsics */
|
||||
/* Armv8.3-A Javascript conversion intrinsic */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
|
||||
@ -687,7 +708,7 @@ __rint64x(double __a) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Armv8.7-A load/store 64-byte intrinsics */
|
||||
/* 8.9 Armv8.7-A load/store 64-byte intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
typedef struct {
|
||||
uint64_t val[8];
|
||||
@ -713,7 +734,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 10.1 Special register intrinsics */
|
||||
/* 11.1 Special register intrinsics */
|
||||
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
|
||||
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
|
||||
#define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg)
|
||||
@ -727,7 +748,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
|
||||
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
|
||||
|
||||
/* Memory Tagging Extensions (MTE) Intrinsics */
|
||||
/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
|
||||
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
|
||||
@ -736,12 +757,71 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
|
||||
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
|
||||
|
||||
/* Memory Operations Intrinsics */
|
||||
/* 18 Memory Operations Intrinsics */
|
||||
#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
|
||||
__builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
|
||||
#endif
|
||||
|
||||
/* Transactional Memory Extension (TME) Intrinsics */
|
||||
/* 11.3 Coprocessor Intrinsics */
|
||||
#if defined(__ARM_FEATURE_COPROC)
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x1)
|
||||
|
||||
#if (__ARM_ARCH < 8)
|
||||
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
|
||||
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
|
||||
#endif /* __ARM_ARCH < 8 */
|
||||
|
||||
#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p)
|
||||
#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p)
|
||||
|
||||
#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \
|
||||
__builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2)
|
||||
#define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \
|
||||
__builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2)
|
||||
|
||||
#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8)
|
||||
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
|
||||
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
|
||||
#endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */
|
||||
|
||||
#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__)
|
||||
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
|
||||
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
|
||||
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
|
||||
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
|
||||
#endif /* ___ARM_ARCH_8M_MAIN__ */
|
||||
|
||||
#endif /* __ARM_FEATURE_COPROC & 0x1 */
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x2)
|
||||
#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \
|
||||
__builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)
|
||||
#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p)
|
||||
#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p)
|
||||
#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p)
|
||||
#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p)
|
||||
#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \
|
||||
__builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)
|
||||
#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \
|
||||
__builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2)
|
||||
#endif
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x4)
|
||||
#define __arm_mcrr(coproc, opc1, value, CRm) \
|
||||
__builtin_arm_mcrr(coproc, opc1, value, CRm)
|
||||
#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm)
|
||||
#endif
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x8)
|
||||
#define __arm_mcrr2(coproc, opc1, value, CRm) \
|
||||
__builtin_arm_mcrr2(coproc, opc1, value, CRm)
|
||||
#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm)
|
||||
#endif
|
||||
|
||||
#endif // __ARM_FEATURE_COPROC
|
||||
|
||||
/* 17 Transactional Memory Extension (TME) Intrinsics */
|
||||
#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
|
||||
|
||||
#define _TMFAILURE_REASON 0x00007fffu
|
||||
@ -763,7 +843,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
|
||||
#endif /* __ARM_FEATURE_TME */
|
||||
|
||||
/* Armv8.5-A Random number generation intrinsics */
|
||||
/* 8.7 Armv8.5-A Random number generation intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
|
||||
__rndr(uint64_t *__p) {
|
||||
|
||||
412
lib/include/arm_neon.h
vendored
412
lib/include/arm_neon.h
vendored
@ -35,12 +35,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <arm_bf16.h>
|
||||
typedef float float32_t;
|
||||
typedef __fp16 float16_t;
|
||||
#ifdef __aarch64__
|
||||
typedef double float64_t;
|
||||
#endif
|
||||
|
||||
#include <arm_vector_types.h>
|
||||
#ifdef __aarch64__
|
||||
typedef uint8_t poly8_t;
|
||||
typedef uint16_t poly16_t;
|
||||
@ -51,30 +46,6 @@ typedef int8_t poly8_t;
|
||||
typedef int16_t poly16_t;
|
||||
typedef int64_t poly64_t;
|
||||
#endif
|
||||
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
|
||||
#ifdef __aarch64__
|
||||
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
|
||||
#endif
|
||||
typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
|
||||
typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
|
||||
typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
|
||||
@ -82,96 +53,6 @@ typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
|
||||
typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
|
||||
typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
|
||||
|
||||
typedef struct int8x8x2_t {
|
||||
int8x8_t val[2];
|
||||
} int8x8x2_t;
|
||||
|
||||
typedef struct int8x16x2_t {
|
||||
int8x16_t val[2];
|
||||
} int8x16x2_t;
|
||||
|
||||
typedef struct int16x4x2_t {
|
||||
int16x4_t val[2];
|
||||
} int16x4x2_t;
|
||||
|
||||
typedef struct int16x8x2_t {
|
||||
int16x8_t val[2];
|
||||
} int16x8x2_t;
|
||||
|
||||
typedef struct int32x2x2_t {
|
||||
int32x2_t val[2];
|
||||
} int32x2x2_t;
|
||||
|
||||
typedef struct int32x4x2_t {
|
||||
int32x4_t val[2];
|
||||
} int32x4x2_t;
|
||||
|
||||
typedef struct int64x1x2_t {
|
||||
int64x1_t val[2];
|
||||
} int64x1x2_t;
|
||||
|
||||
typedef struct int64x2x2_t {
|
||||
int64x2_t val[2];
|
||||
} int64x2x2_t;
|
||||
|
||||
typedef struct uint8x8x2_t {
|
||||
uint8x8_t val[2];
|
||||
} uint8x8x2_t;
|
||||
|
||||
typedef struct uint8x16x2_t {
|
||||
uint8x16_t val[2];
|
||||
} uint8x16x2_t;
|
||||
|
||||
typedef struct uint16x4x2_t {
|
||||
uint16x4_t val[2];
|
||||
} uint16x4x2_t;
|
||||
|
||||
typedef struct uint16x8x2_t {
|
||||
uint16x8_t val[2];
|
||||
} uint16x8x2_t;
|
||||
|
||||
typedef struct uint32x2x2_t {
|
||||
uint32x2_t val[2];
|
||||
} uint32x2x2_t;
|
||||
|
||||
typedef struct uint32x4x2_t {
|
||||
uint32x4_t val[2];
|
||||
} uint32x4x2_t;
|
||||
|
||||
typedef struct uint64x1x2_t {
|
||||
uint64x1_t val[2];
|
||||
} uint64x1x2_t;
|
||||
|
||||
typedef struct uint64x2x2_t {
|
||||
uint64x2_t val[2];
|
||||
} uint64x2x2_t;
|
||||
|
||||
typedef struct float16x4x2_t {
|
||||
float16x4_t val[2];
|
||||
} float16x4x2_t;
|
||||
|
||||
typedef struct float16x8x2_t {
|
||||
float16x8_t val[2];
|
||||
} float16x8x2_t;
|
||||
|
||||
typedef struct float32x2x2_t {
|
||||
float32x2_t val[2];
|
||||
} float32x2x2_t;
|
||||
|
||||
typedef struct float32x4x2_t {
|
||||
float32x4_t val[2];
|
||||
} float32x4x2_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x2_t {
|
||||
float64x1_t val[2];
|
||||
} float64x1x2_t;
|
||||
|
||||
typedef struct float64x2x2_t {
|
||||
float64x2_t val[2];
|
||||
} float64x2x2_t;
|
||||
|
||||
#endif
|
||||
typedef struct poly8x8x2_t {
|
||||
poly8x8_t val[2];
|
||||
} poly8x8x2_t;
|
||||
@ -196,96 +77,6 @@ typedef struct poly64x2x2_t {
|
||||
poly64x2_t val[2];
|
||||
} poly64x2x2_t;
|
||||
|
||||
typedef struct int8x8x3_t {
|
||||
int8x8_t val[3];
|
||||
} int8x8x3_t;
|
||||
|
||||
typedef struct int8x16x3_t {
|
||||
int8x16_t val[3];
|
||||
} int8x16x3_t;
|
||||
|
||||
typedef struct int16x4x3_t {
|
||||
int16x4_t val[3];
|
||||
} int16x4x3_t;
|
||||
|
||||
typedef struct int16x8x3_t {
|
||||
int16x8_t val[3];
|
||||
} int16x8x3_t;
|
||||
|
||||
typedef struct int32x2x3_t {
|
||||
int32x2_t val[3];
|
||||
} int32x2x3_t;
|
||||
|
||||
typedef struct int32x4x3_t {
|
||||
int32x4_t val[3];
|
||||
} int32x4x3_t;
|
||||
|
||||
typedef struct int64x1x3_t {
|
||||
int64x1_t val[3];
|
||||
} int64x1x3_t;
|
||||
|
||||
typedef struct int64x2x3_t {
|
||||
int64x2_t val[3];
|
||||
} int64x2x3_t;
|
||||
|
||||
typedef struct uint8x8x3_t {
|
||||
uint8x8_t val[3];
|
||||
} uint8x8x3_t;
|
||||
|
||||
typedef struct uint8x16x3_t {
|
||||
uint8x16_t val[3];
|
||||
} uint8x16x3_t;
|
||||
|
||||
typedef struct uint16x4x3_t {
|
||||
uint16x4_t val[3];
|
||||
} uint16x4x3_t;
|
||||
|
||||
typedef struct uint16x8x3_t {
|
||||
uint16x8_t val[3];
|
||||
} uint16x8x3_t;
|
||||
|
||||
typedef struct uint32x2x3_t {
|
||||
uint32x2_t val[3];
|
||||
} uint32x2x3_t;
|
||||
|
||||
typedef struct uint32x4x3_t {
|
||||
uint32x4_t val[3];
|
||||
} uint32x4x3_t;
|
||||
|
||||
typedef struct uint64x1x3_t {
|
||||
uint64x1_t val[3];
|
||||
} uint64x1x3_t;
|
||||
|
||||
typedef struct uint64x2x3_t {
|
||||
uint64x2_t val[3];
|
||||
} uint64x2x3_t;
|
||||
|
||||
typedef struct float16x4x3_t {
|
||||
float16x4_t val[3];
|
||||
} float16x4x3_t;
|
||||
|
||||
typedef struct float16x8x3_t {
|
||||
float16x8_t val[3];
|
||||
} float16x8x3_t;
|
||||
|
||||
typedef struct float32x2x3_t {
|
||||
float32x2_t val[3];
|
||||
} float32x2x3_t;
|
||||
|
||||
typedef struct float32x4x3_t {
|
||||
float32x4_t val[3];
|
||||
} float32x4x3_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x3_t {
|
||||
float64x1_t val[3];
|
||||
} float64x1x3_t;
|
||||
|
||||
typedef struct float64x2x3_t {
|
||||
float64x2_t val[3];
|
||||
} float64x2x3_t;
|
||||
|
||||
#endif
|
||||
typedef struct poly8x8x3_t {
|
||||
poly8x8_t val[3];
|
||||
} poly8x8x3_t;
|
||||
@ -310,96 +101,6 @@ typedef struct poly64x2x3_t {
|
||||
poly64x2_t val[3];
|
||||
} poly64x2x3_t;
|
||||
|
||||
typedef struct int8x8x4_t {
|
||||
int8x8_t val[4];
|
||||
} int8x8x4_t;
|
||||
|
||||
typedef struct int8x16x4_t {
|
||||
int8x16_t val[4];
|
||||
} int8x16x4_t;
|
||||
|
||||
typedef struct int16x4x4_t {
|
||||
int16x4_t val[4];
|
||||
} int16x4x4_t;
|
||||
|
||||
typedef struct int16x8x4_t {
|
||||
int16x8_t val[4];
|
||||
} int16x8x4_t;
|
||||
|
||||
typedef struct int32x2x4_t {
|
||||
int32x2_t val[4];
|
||||
} int32x2x4_t;
|
||||
|
||||
typedef struct int32x4x4_t {
|
||||
int32x4_t val[4];
|
||||
} int32x4x4_t;
|
||||
|
||||
typedef struct int64x1x4_t {
|
||||
int64x1_t val[4];
|
||||
} int64x1x4_t;
|
||||
|
||||
typedef struct int64x2x4_t {
|
||||
int64x2_t val[4];
|
||||
} int64x2x4_t;
|
||||
|
||||
typedef struct uint8x8x4_t {
|
||||
uint8x8_t val[4];
|
||||
} uint8x8x4_t;
|
||||
|
||||
typedef struct uint8x16x4_t {
|
||||
uint8x16_t val[4];
|
||||
} uint8x16x4_t;
|
||||
|
||||
typedef struct uint16x4x4_t {
|
||||
uint16x4_t val[4];
|
||||
} uint16x4x4_t;
|
||||
|
||||
typedef struct uint16x8x4_t {
|
||||
uint16x8_t val[4];
|
||||
} uint16x8x4_t;
|
||||
|
||||
typedef struct uint32x2x4_t {
|
||||
uint32x2_t val[4];
|
||||
} uint32x2x4_t;
|
||||
|
||||
typedef struct uint32x4x4_t {
|
||||
uint32x4_t val[4];
|
||||
} uint32x4x4_t;
|
||||
|
||||
typedef struct uint64x1x4_t {
|
||||
uint64x1_t val[4];
|
||||
} uint64x1x4_t;
|
||||
|
||||
typedef struct uint64x2x4_t {
|
||||
uint64x2_t val[4];
|
||||
} uint64x2x4_t;
|
||||
|
||||
typedef struct float16x4x4_t {
|
||||
float16x4_t val[4];
|
||||
} float16x4x4_t;
|
||||
|
||||
typedef struct float16x8x4_t {
|
||||
float16x8_t val[4];
|
||||
} float16x8x4_t;
|
||||
|
||||
typedef struct float32x2x4_t {
|
||||
float32x2_t val[4];
|
||||
} float32x2x4_t;
|
||||
|
||||
typedef struct float32x4x4_t {
|
||||
float32x4_t val[4];
|
||||
} float32x4x4_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x4_t {
|
||||
float64x1_t val[4];
|
||||
} float64x1x4_t;
|
||||
|
||||
typedef struct float64x2x4_t {
|
||||
float64x2_t val[4];
|
||||
} float64x2x4_t;
|
||||
|
||||
#endif
|
||||
typedef struct poly8x8x4_t {
|
||||
poly8x8_t val[4];
|
||||
} poly8x8x4_t;
|
||||
@ -424,33 +125,6 @@ typedef struct poly64x2x4_t {
|
||||
poly64x2_t val[4];
|
||||
} poly64x2x4_t;
|
||||
|
||||
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
|
||||
|
||||
typedef struct bfloat16x4x2_t {
|
||||
bfloat16x4_t val[2];
|
||||
} bfloat16x4x2_t;
|
||||
|
||||
typedef struct bfloat16x8x2_t {
|
||||
bfloat16x8_t val[2];
|
||||
} bfloat16x8x2_t;
|
||||
|
||||
typedef struct bfloat16x4x3_t {
|
||||
bfloat16x4_t val[3];
|
||||
} bfloat16x4x3_t;
|
||||
|
||||
typedef struct bfloat16x8x3_t {
|
||||
bfloat16x8_t val[3];
|
||||
} bfloat16x8x3_t;
|
||||
|
||||
typedef struct bfloat16x4x4_t {
|
||||
bfloat16x4_t val[4];
|
||||
} bfloat16x4x4_t;
|
||||
|
||||
typedef struct bfloat16x8x4_t {
|
||||
bfloat16x8_t val[4];
|
||||
} bfloat16x8x4_t;
|
||||
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
@ -66600,6 +66274,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32x_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) {
|
||||
float32x4_t __ret;
|
||||
@ -66632,6 +66327,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32z_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) {
|
||||
float32x4_t __ret;
|
||||
@ -66664,6 +66380,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64x_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) {
|
||||
float32x4_t __ret;
|
||||
@ -66696,6 +66433,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64z_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
|
||||
2412
lib/include/arm_sme.h
vendored
Normal file
2412
lib/include/arm_sme.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
642
lib/include/arm_sme_draft_spec_subject_to_change.h
vendored
642
lib/include/arm_sme_draft_spec_subject_to_change.h
vendored
@ -1,642 +0,0 @@
|
||||
/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics ------===
|
||||
*
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __ARM_SME_H
|
||||
#define __ARM_SME_H
|
||||
|
||||
#if !defined(__LITTLE_ENDIAN__)
|
||||
#error "Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h"
|
||||
#endif
|
||||
#include <arm_sve.h>
|
||||
|
||||
/* Function attributes */
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__))
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsb(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsd(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsh(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsw(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za), arm_streaming_compatible, arm_shared_za))
|
||||
void svzero_mask_za(uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za), arm_streaming_compatible, arm_shared_za))
|
||||
void svzero_za();
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za), arm_streaming_compatible, arm_shared_za))
|
||||
void svldr_vnum_za(uint32_t, uint64_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za), arm_streaming_compatible, arm_shared_za, arm_preserves_za))
|
||||
void svstr_vnum_za(uint32_t, uint64_t, void *);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#undef __ai
|
||||
|
||||
#endif /* __ARM_SME_H */
|
||||
7829
lib/include/arm_sve.h
vendored
7829
lib/include/arm_sve.h
vendored
File diff suppressed because it is too large
Load Diff
345
lib/include/arm_vector_types.h
vendored
Normal file
345
lib/include/arm_vector_types.h
vendored
Normal file
@ -0,0 +1,345 @@
|
||||
/*===---- arm_vector_types - ARM vector type ------===
|
||||
*
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)
|
||||
#error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead"
|
||||
|
||||
#endif
|
||||
#ifndef __ARM_NEON_TYPES_H
|
||||
#define __ARM_NEON_TYPES_H
|
||||
typedef float float32_t;
|
||||
typedef __fp16 float16_t;
|
||||
#ifdef __aarch64__
|
||||
typedef double float64_t;
|
||||
#endif
|
||||
|
||||
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
|
||||
#ifdef __aarch64__
|
||||
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
|
||||
#endif
|
||||
|
||||
typedef struct int8x8x2_t {
|
||||
int8x8_t val[2];
|
||||
} int8x8x2_t;
|
||||
|
||||
typedef struct int8x16x2_t {
|
||||
int8x16_t val[2];
|
||||
} int8x16x2_t;
|
||||
|
||||
typedef struct int16x4x2_t {
|
||||
int16x4_t val[2];
|
||||
} int16x4x2_t;
|
||||
|
||||
typedef struct int16x8x2_t {
|
||||
int16x8_t val[2];
|
||||
} int16x8x2_t;
|
||||
|
||||
typedef struct int32x2x2_t {
|
||||
int32x2_t val[2];
|
||||
} int32x2x2_t;
|
||||
|
||||
typedef struct int32x4x2_t {
|
||||
int32x4_t val[2];
|
||||
} int32x4x2_t;
|
||||
|
||||
typedef struct int64x1x2_t {
|
||||
int64x1_t val[2];
|
||||
} int64x1x2_t;
|
||||
|
||||
typedef struct int64x2x2_t {
|
||||
int64x2_t val[2];
|
||||
} int64x2x2_t;
|
||||
|
||||
typedef struct uint8x8x2_t {
|
||||
uint8x8_t val[2];
|
||||
} uint8x8x2_t;
|
||||
|
||||
typedef struct uint8x16x2_t {
|
||||
uint8x16_t val[2];
|
||||
} uint8x16x2_t;
|
||||
|
||||
typedef struct uint16x4x2_t {
|
||||
uint16x4_t val[2];
|
||||
} uint16x4x2_t;
|
||||
|
||||
typedef struct uint16x8x2_t {
|
||||
uint16x8_t val[2];
|
||||
} uint16x8x2_t;
|
||||
|
||||
typedef struct uint32x2x2_t {
|
||||
uint32x2_t val[2];
|
||||
} uint32x2x2_t;
|
||||
|
||||
typedef struct uint32x4x2_t {
|
||||
uint32x4_t val[2];
|
||||
} uint32x4x2_t;
|
||||
|
||||
typedef struct uint64x1x2_t {
|
||||
uint64x1_t val[2];
|
||||
} uint64x1x2_t;
|
||||
|
||||
typedef struct uint64x2x2_t {
|
||||
uint64x2_t val[2];
|
||||
} uint64x2x2_t;
|
||||
|
||||
typedef struct float16x4x2_t {
|
||||
float16x4_t val[2];
|
||||
} float16x4x2_t;
|
||||
|
||||
typedef struct float16x8x2_t {
|
||||
float16x8_t val[2];
|
||||
} float16x8x2_t;
|
||||
|
||||
typedef struct float32x2x2_t {
|
||||
float32x2_t val[2];
|
||||
} float32x2x2_t;
|
||||
|
||||
typedef struct float32x4x2_t {
|
||||
float32x4_t val[2];
|
||||
} float32x4x2_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x2_t {
|
||||
float64x1_t val[2];
|
||||
} float64x1x2_t;
|
||||
|
||||
typedef struct float64x2x2_t {
|
||||
float64x2_t val[2];
|
||||
} float64x2x2_t;
|
||||
|
||||
#endif
|
||||
typedef struct int8x8x3_t {
|
||||
int8x8_t val[3];
|
||||
} int8x8x3_t;
|
||||
|
||||
typedef struct int8x16x3_t {
|
||||
int8x16_t val[3];
|
||||
} int8x16x3_t;
|
||||
|
||||
typedef struct int16x4x3_t {
|
||||
int16x4_t val[3];
|
||||
} int16x4x3_t;
|
||||
|
||||
typedef struct int16x8x3_t {
|
||||
int16x8_t val[3];
|
||||
} int16x8x3_t;
|
||||
|
||||
typedef struct int32x2x3_t {
|
||||
int32x2_t val[3];
|
||||
} int32x2x3_t;
|
||||
|
||||
typedef struct int32x4x3_t {
|
||||
int32x4_t val[3];
|
||||
} int32x4x3_t;
|
||||
|
||||
typedef struct int64x1x3_t {
|
||||
int64x1_t val[3];
|
||||
} int64x1x3_t;
|
||||
|
||||
typedef struct int64x2x3_t {
|
||||
int64x2_t val[3];
|
||||
} int64x2x3_t;
|
||||
|
||||
typedef struct uint8x8x3_t {
|
||||
uint8x8_t val[3];
|
||||
} uint8x8x3_t;
|
||||
|
||||
typedef struct uint8x16x3_t {
|
||||
uint8x16_t val[3];
|
||||
} uint8x16x3_t;
|
||||
|
||||
typedef struct uint16x4x3_t {
|
||||
uint16x4_t val[3];
|
||||
} uint16x4x3_t;
|
||||
|
||||
typedef struct uint16x8x3_t {
|
||||
uint16x8_t val[3];
|
||||
} uint16x8x3_t;
|
||||
|
||||
typedef struct uint32x2x3_t {
|
||||
uint32x2_t val[3];
|
||||
} uint32x2x3_t;
|
||||
|
||||
typedef struct uint32x4x3_t {
|
||||
uint32x4_t val[3];
|
||||
} uint32x4x3_t;
|
||||
|
||||
typedef struct uint64x1x3_t {
|
||||
uint64x1_t val[3];
|
||||
} uint64x1x3_t;
|
||||
|
||||
typedef struct uint64x2x3_t {
|
||||
uint64x2_t val[3];
|
||||
} uint64x2x3_t;
|
||||
|
||||
typedef struct float16x4x3_t {
|
||||
float16x4_t val[3];
|
||||
} float16x4x3_t;
|
||||
|
||||
typedef struct float16x8x3_t {
|
||||
float16x8_t val[3];
|
||||
} float16x8x3_t;
|
||||
|
||||
typedef struct float32x2x3_t {
|
||||
float32x2_t val[3];
|
||||
} float32x2x3_t;
|
||||
|
||||
typedef struct float32x4x3_t {
|
||||
float32x4_t val[3];
|
||||
} float32x4x3_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x3_t {
|
||||
float64x1_t val[3];
|
||||
} float64x1x3_t;
|
||||
|
||||
typedef struct float64x2x3_t {
|
||||
float64x2_t val[3];
|
||||
} float64x2x3_t;
|
||||
|
||||
#endif
|
||||
typedef struct int8x8x4_t {
|
||||
int8x8_t val[4];
|
||||
} int8x8x4_t;
|
||||
|
||||
typedef struct int8x16x4_t {
|
||||
int8x16_t val[4];
|
||||
} int8x16x4_t;
|
||||
|
||||
typedef struct int16x4x4_t {
|
||||
int16x4_t val[4];
|
||||
} int16x4x4_t;
|
||||
|
||||
typedef struct int16x8x4_t {
|
||||
int16x8_t val[4];
|
||||
} int16x8x4_t;
|
||||
|
||||
typedef struct int32x2x4_t {
|
||||
int32x2_t val[4];
|
||||
} int32x2x4_t;
|
||||
|
||||
typedef struct int32x4x4_t {
|
||||
int32x4_t val[4];
|
||||
} int32x4x4_t;
|
||||
|
||||
typedef struct int64x1x4_t {
|
||||
int64x1_t val[4];
|
||||
} int64x1x4_t;
|
||||
|
||||
typedef struct int64x2x4_t {
|
||||
int64x2_t val[4];
|
||||
} int64x2x4_t;
|
||||
|
||||
typedef struct uint8x8x4_t {
|
||||
uint8x8_t val[4];
|
||||
} uint8x8x4_t;
|
||||
|
||||
typedef struct uint8x16x4_t {
|
||||
uint8x16_t val[4];
|
||||
} uint8x16x4_t;
|
||||
|
||||
typedef struct uint16x4x4_t {
|
||||
uint16x4_t val[4];
|
||||
} uint16x4x4_t;
|
||||
|
||||
typedef struct uint16x8x4_t {
|
||||
uint16x8_t val[4];
|
||||
} uint16x8x4_t;
|
||||
|
||||
typedef struct uint32x2x4_t {
|
||||
uint32x2_t val[4];
|
||||
} uint32x2x4_t;
|
||||
|
||||
typedef struct uint32x4x4_t {
|
||||
uint32x4_t val[4];
|
||||
} uint32x4x4_t;
|
||||
|
||||
typedef struct uint64x1x4_t {
|
||||
uint64x1_t val[4];
|
||||
} uint64x1x4_t;
|
||||
|
||||
typedef struct uint64x2x4_t {
|
||||
uint64x2_t val[4];
|
||||
} uint64x2x4_t;
|
||||
|
||||
typedef struct float16x4x4_t {
|
||||
float16x4_t val[4];
|
||||
} float16x4x4_t;
|
||||
|
||||
typedef struct float16x8x4_t {
|
||||
float16x8_t val[4];
|
||||
} float16x8x4_t;
|
||||
|
||||
typedef struct float32x2x4_t {
|
||||
float32x2_t val[4];
|
||||
} float32x2x4_t;
|
||||
|
||||
typedef struct float32x4x4_t {
|
||||
float32x4_t val[4];
|
||||
} float32x4x4_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x4_t {
|
||||
float64x1_t val[4];
|
||||
} float64x1x4_t;
|
||||
|
||||
typedef struct float64x2x4_t {
|
||||
float64x2_t val[4];
|
||||
} float64x2x4_t;
|
||||
|
||||
#endif
|
||||
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
|
||||
|
||||
typedef struct bfloat16x4x2_t {
|
||||
bfloat16x4_t val[2];
|
||||
} bfloat16x4x2_t;
|
||||
|
||||
typedef struct bfloat16x8x2_t {
|
||||
bfloat16x8_t val[2];
|
||||
} bfloat16x8x2_t;
|
||||
|
||||
typedef struct bfloat16x4x3_t {
|
||||
bfloat16x4_t val[3];
|
||||
} bfloat16x4x3_t;
|
||||
|
||||
typedef struct bfloat16x8x3_t {
|
||||
bfloat16x8_t val[3];
|
||||
} bfloat16x8x3_t;
|
||||
|
||||
typedef struct bfloat16x4x4_t {
|
||||
bfloat16x4_t val[4];
|
||||
} bfloat16x4x4_t;
|
||||
|
||||
typedef struct bfloat16x8x4_t {
|
||||
bfloat16x8_t val[4];
|
||||
} bfloat16x8x4_t;
|
||||
|
||||
#endif // __ARM_NEON_TYPES_H
|
||||
27
lib/include/avx2intrin.h
vendored
27
lib/include/avx2intrin.h
vendored
@ -15,8 +15,12 @@
|
||||
#define __AVX2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx2,no-evex512"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx2,no-evex512"), __min_vector_width__(128)))
|
||||
|
||||
/* SSE4 Multiple Packed Sums of Absolute Difference. */
|
||||
/// Computes sixteen sum of absolute difference (SAD) operations on sets of
|
||||
@ -1307,6 +1311,23 @@ _mm256_min_epu32(__m256i __a, __m256i __b)
|
||||
return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
|
||||
}
|
||||
|
||||
/// Creates a 32-bit integer mask from the most significant bit of each byte
|
||||
/// in the 256-bit integer vector in \a __a and returns the result.
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// FOR i := 0 TO 31
|
||||
/// j := i*8
|
||||
/// result[i] := __a[j+7]
|
||||
/// ENDFOR
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c VPMOVMSKB instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 256-bit integer vector containing the source bytes.
|
||||
/// \returns The 32-bit integer mask.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS256
|
||||
_mm256_movemask_epi8(__m256i __a)
|
||||
{
|
||||
@ -2962,7 +2983,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
|
||||
/// A pointer to the 32-byte aligned memory containing the vector to load.
|
||||
/// \returns A 256-bit integer vector loaded from memory.
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_stream_load_si256(__m256i const *__V)
|
||||
_mm256_stream_load_si256(const void *__V)
|
||||
{
|
||||
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
|
||||
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);
|
||||
|
||||
5
lib/include/avx512bf16intrin.h
vendored
5
lib/include/avx512bf16intrin.h
vendored
@ -20,10 +20,11 @@ typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
|
||||
typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
|
||||
|
||||
#define __DEFAULT_FN_ATTRS512 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bf16,no-evex512")))
|
||||
|
||||
/// Convert One BF16 Data to One Single Float Data.
|
||||
///
|
||||
|
||||
5
lib/include/avx512bitalgintrin.h
vendored
5
lib/include/avx512bitalgintrin.h
vendored
@ -15,7 +15,10 @@
|
||||
#define __AVX512BITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bitalg,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi16(__m512i __A)
|
||||
|
||||
66
lib/include/avx512bwintrin.h
vendored
66
lib/include/avx512bwintrin.h
vendored
@ -18,8 +18,12 @@ typedef unsigned int __mmask32;
|
||||
typedef unsigned long long __mmask64;
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
|
||||
#define __DEFAULT_FN_ATTRS512 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,no-evex512")))
|
||||
|
||||
static __inline __mmask32 __DEFAULT_FN_ATTRS
|
||||
_knot_mask32(__mmask32 __M)
|
||||
@ -27,9 +31,7 @@ _knot_mask32(__mmask32 __M)
|
||||
return __builtin_ia32_knotsi(__M);
|
||||
}
|
||||
|
||||
static __inline __mmask64 __DEFAULT_FN_ATTRS
|
||||
_knot_mask64(__mmask64 __M)
|
||||
{
|
||||
static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
|
||||
return __builtin_ia32_knotdi(__M);
|
||||
}
|
||||
|
||||
@ -39,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kand_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -51,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kandn_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -63,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kor_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -75,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -87,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kxor_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -112,14 +109,12 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
|
||||
}
|
||||
|
||||
@ -148,14 +143,12 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
|
||||
}
|
||||
|
||||
@ -171,9 +164,8 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kadd_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -214,8 +206,7 @@ _load_mask32(__mmask32 *__A) {
|
||||
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_load_mask64(__mmask64 *__A) {
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
|
||||
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
|
||||
}
|
||||
|
||||
@ -224,8 +215,8 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
|
||||
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_store_mask64(__mmask64 *__A, __mmask64 __B) {
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
|
||||
__mmask64 __B) {
|
||||
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -1714,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
|
||||
(__v64qi) _mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
|
||||
4
lib/include/avx512cdintrin.h
vendored
4
lib/include/avx512cdintrin.h
vendored
@ -15,7 +15,9 @@
|
||||
#define __AVX512CDINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512cd,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_conflict_epi64 (__m512i __A)
|
||||
|
||||
6
lib/include/avx512dqintrin.h
vendored
6
lib/include/avx512dqintrin.h
vendored
@ -15,8 +15,10 @@
|
||||
#define __AVX512DQINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512dq,no-evex512")))
|
||||
|
||||
static __inline __mmask8 __DEFAULT_FN_ATTRS
|
||||
_knot_mask8(__mmask8 __M)
|
||||
|
||||
10
lib/include/avx512fintrin.h
vendored
10
lib/include/avx512fintrin.h
vendored
@ -167,9 +167,13 @@ typedef enum
|
||||
} _MM_MANTISSA_SIGN_ENUM;
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512f,no-evex512"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512f,no-evex512")))
|
||||
|
||||
/* Create vectors with repeated elements */
|
||||
|
||||
|
||||
10
lib/include/avx512fp16intrin.h
vendored
10
lib/include/avx512fp16intrin.h
vendored
@ -22,13 +22,15 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
|
||||
__min_vector_width__(512)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
|
||||
|
||||
4
lib/include/avx512ifmaintrin.h
vendored
4
lib/include/avx512ifmaintrin.h
vendored
@ -15,7 +15,9 @@
|
||||
#define __IFMAINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512ifma,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
|
||||
10
lib/include/avx512ifmavlintrin.h
vendored
10
lib/include/avx512ifmavlintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __IFMAVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512ifma,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512ifma,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
#define _mm_madd52hi_epu64(X, Y, Z) \
|
||||
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
|
||||
|
||||
5
lib/include/avx512pfintrin.h
vendored
5
lib/include/avx512pfintrin.h
vendored
@ -14,9 +14,6 @@
|
||||
#ifndef __AVX512PFINTRIN_H
|
||||
#define __AVX512PFINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
@ -92,6 +89,4 @@
|
||||
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
||||
2
lib/include/avx512vbmi2intrin.h
vendored
2
lib/include/avx512vbmi2intrin.h
vendored
@ -15,7 +15,7 @@
|
||||
#define __AVX512VBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
|
||||
5
lib/include/avx512vbmiintrin.h
vendored
5
lib/include/avx512vbmiintrin.h
vendored
@ -15,8 +15,9 @@
|
||||
#define __VBMIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vbmi,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)
|
||||
|
||||
11
lib/include/avx512vbmivlintrin.h
vendored
11
lib/include/avx512vbmivlintrin.h
vendored
@ -15,9 +15,14 @@
|
||||
#define __VBMIVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vbmi,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vbmi,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
|
||||
|
||||
6
lib/include/avx512vlbf16intrin.h
vendored
6
lib/include/avx512vlbf16intrin.h
vendored
@ -17,10 +17,12 @@
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl, avx512bf16"), __min_vector_width__(128)))
|
||||
__target__("avx512vl,avx512bf16,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl, avx512bf16"), __min_vector_width__(256)))
|
||||
__target__("avx512vl,avx512bf16,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/// Convert Two Packed Single Data to One Packed BF16 Data.
|
||||
///
|
||||
|
||||
10
lib/include/avx512vlbitalgintrin.h
vendored
10
lib/include/avx512vlbitalgintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLBITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bitalg,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bitalg,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_popcnt_epi16(__m256i __A)
|
||||
|
||||
10
lib/include/avx512vlbwintrin.h
vendored
10
lib/include/avx512vlbwintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLBWINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bw,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bw,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/* Integer compare */
|
||||
|
||||
|
||||
11
lib/include/avx512vlcdintrin.h
vendored
11
lib/include/avx512vlcdintrin.h
vendored
@ -14,9 +14,14 @@
|
||||
#define __AVX512VLCDINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512cd,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512cd,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_broadcastmb_epi64 (__mmask8 __A)
|
||||
|
||||
10
lib/include/avx512vldqintrin.h
vendored
10
lib/include/avx512vldqintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLDQINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512dq,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512dq,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
|
||||
|
||||
4
lib/include/avx512vlfp16intrin.h
vendored
4
lib/include/avx512vlfp16intrin.h
vendored
@ -19,11 +19,11 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16, avx512vl"), \
|
||||
__target__("avx512fp16,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16, avx512vl"), \
|
||||
__target__("avx512fp16,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
|
||||
|
||||
10
lib/include/avx512vlintrin.h
vendored
10
lib/include/avx512vlintrin.h
vendored
@ -14,8 +14,14 @@
|
||||
#ifndef __AVX512VLINTRIN_H
|
||||
#define __AVX512VLINTRIN_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
typedef short __v2hi __attribute__((__vector_size__(4)));
|
||||
typedef char __v4qi __attribute__((__vector_size__(4)));
|
||||
|
||||
10
lib/include/avx512vlvbmi2intrin.h
vendored
10
lib/include/avx512vlvbmi2intrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLVBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vbmi2,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vbmi2,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
|
||||
10
lib/include/avx512vlvnniintrin.h
vendored
10
lib/include/avx512vlvnniintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLVNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vnni,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vnni,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
|
||||
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
|
||||
|
||||
6
lib/include/avx512vlvp2intersectintrin.h
vendored
6
lib/include/avx512vlvp2intersectintrin.h
vendored
@ -29,11 +29,13 @@
|
||||
#define _AVX512VLVP2INTERSECT_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
/// Store, in an even/odd pair of mask registers, the indicators of the
|
||||
/// locations of value matches between dwords in operands __a and __b.
|
||||
|
||||
5
lib/include/avx512vnniintrin.h
vendored
5
lib/include/avx512vnniintrin.h
vendored
@ -15,8 +15,9 @@
|
||||
#define __AVX512VNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vnni,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
|
||||
3
lib/include/avx512vp2intersectintrin.h
vendored
3
lib/include/avx512vp2intersectintrin.h
vendored
@ -29,7 +29,8 @@
|
||||
#define _AVX512VP2INTERSECT_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vp2intersect,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
/// Store, in an even/odd pair of mask registers, the indicators of the
|
||||
|
||||
4
lib/include/avx512vpopcntdqintrin.h
vendored
4
lib/include/avx512vpopcntdqintrin.h
vendored
@ -17,7 +17,9 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vpopcntdq,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
|
||||
|
||||
8
lib/include/avx512vpopcntdqvlintrin.h
vendored
8
lib/include/avx512vpopcntdqvlintrin.h
vendored
@ -17,9 +17,13 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_popcnt_epi64(__m128i __A) {
|
||||
|
||||
14
lib/include/avxintrin.h
vendored
14
lib/include/avxintrin.h
vendored
@ -50,8 +50,12 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
|
||||
#endif
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
/* Arithmetic */
|
||||
/// Adds two 256-bit vectors of [4 x double].
|
||||
@ -3563,7 +3567,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
|
||||
/// \param __b
|
||||
/// A 256-bit integer vector containing the values to be moved.
|
||||
static __inline void __DEFAULT_FN_ATTRS
|
||||
_mm256_stream_si256(__m256i *__a, __m256i __b)
|
||||
_mm256_stream_si256(void *__a, __m256i __b)
|
||||
{
|
||||
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
|
||||
__builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
|
||||
@ -3583,7 +3587,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
|
||||
/// \param __b
|
||||
/// A 256-bit vector of [4 x double] containing the values to be moved.
|
||||
static __inline void __DEFAULT_FN_ATTRS
|
||||
_mm256_stream_pd(double *__a, __m256d __b)
|
||||
_mm256_stream_pd(void *__a, __m256d __b)
|
||||
{
|
||||
typedef __v4df __v4df_aligned __attribute__((aligned(32)));
|
||||
__builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
|
||||
@ -3604,7 +3608,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
|
||||
/// \param __a
|
||||
/// A 256-bit vector of [8 x float] containing the values to be moved.
|
||||
static __inline void __DEFAULT_FN_ATTRS
|
||||
_mm256_stream_ps(float *__p, __m256 __a)
|
||||
_mm256_stream_ps(void *__p, __m256 __a)
|
||||
{
|
||||
typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
|
||||
__builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
|
||||
|
||||
305
lib/include/bmiintrin.h
vendored
305
lib/include/bmiintrin.h
vendored
@ -19,18 +19,17 @@
|
||||
to use it as a potentially faster version of BSF. */
|
||||
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#define _tzcnt_u16(a) (__tzcnt_u16((a)))
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 16-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _tzcnt_u16
|
||||
static __inline__ unsigned short __RELAXED_FN_ATTRS
|
||||
__tzcnt_u16(unsigned short __X)
|
||||
{
|
||||
@ -41,13 +40,30 @@ __tzcnt_u16(unsigned short __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// \code
|
||||
/// unsigned short _tzcnt_u16(unsigned short __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 16-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see __tzcnt_u16
|
||||
#define _tzcnt_u16 __tzcnt_u16
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 32-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _mm_tzcnt_32
|
||||
/// \see { _mm_tzcnt_32 _tzcnt_u32 }
|
||||
static __inline__ unsigned int __RELAXED_FN_ATTRS
|
||||
__tzcnt_u32(unsigned int __X)
|
||||
{
|
||||
@ -58,20 +74,35 @@ __tzcnt_u32(unsigned int __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An 32-bit integer containing the number of trailing zero bits in
|
||||
/// \returns A 32-bit integer containing the number of trailing zero bits in
|
||||
/// the operand.
|
||||
/// \see __tzcnt_u32
|
||||
/// \see { __tzcnt_u32 _tzcnt_u32 }
|
||||
static __inline__ int __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_32(unsigned int __X)
|
||||
{
|
||||
return (int)__builtin_ia32_tzcnt_u32(__X);
|
||||
}
|
||||
|
||||
#define _tzcnt_u32(a) (__tzcnt_u32((a)))
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned int _tzcnt_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 32-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see { _mm_tzcnt_32 __tzcnt_u32 }
|
||||
#define _tzcnt_u32 __tzcnt_u32
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
@ -79,13 +110,13 @@ _mm_tzcnt_32(unsigned int __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 64-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _mm_tzcnt_64
|
||||
/// \see { _mm_tzcnt_64 _tzcnt_u64 }
|
||||
static __inline__ unsigned long long __RELAXED_FN_ATTRS
|
||||
__tzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
@ -96,20 +127,35 @@ __tzcnt_u64(unsigned long long __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An 64-bit integer containing the number of trailing zero bits in
|
||||
/// the operand.
|
||||
/// \see __tzcnt_u64
|
||||
/// \see { __tzcnt_u64 _tzcnt_u64 }
|
||||
static __inline__ long long __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_64(unsigned long long __X)
|
||||
{
|
||||
return (long long)__builtin_ia32_tzcnt_u64(__X);
|
||||
}
|
||||
|
||||
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned long long _tzcnt_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 64-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see { _mm_tzcnt_64 __tzcnt_u64
|
||||
#define _tzcnt_u64 __tzcnt_u64
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
@ -121,21 +167,12 @@ _mm_tzcnt_64(unsigned long long __X)
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
|
||||
|
||||
#define _andn_u32(a, b) (__andn_u32((a), (b)))
|
||||
|
||||
/* _bextr_u32 != __bextr_u32 */
|
||||
#define _blsi_u32(a) (__blsi_u32((a)))
|
||||
|
||||
#define _blsmsk_u32(a) (__blsmsk_u32((a)))
|
||||
|
||||
#define _blsr_u32(a) (__blsr_u32((a)))
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing one of the operands.
|
||||
@ -143,19 +180,40 @@ _mm_tzcnt_64(unsigned long long __X)
|
||||
/// An unsigned integer containing one of the operands.
|
||||
/// \returns An unsigned integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see _andn_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__andn_u32(unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing one of the operands.
|
||||
/// \param __Y
|
||||
/// An unsigned integer containing one of the operands.
|
||||
/// \returns An unsigned integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see __andn_u32
|
||||
#define _andn_u32 __andn_u32
|
||||
|
||||
/* AMD-specified, double-leading-underscore version of BEXTR */
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be extracted.
|
||||
@ -178,7 +236,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be extracted.
|
||||
@ -203,7 +261,7 @@ _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be extracted.
|
||||
@ -224,33 +282,89 @@ _bextr2_u32(unsigned int __X, unsigned int __Y) {
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the bits from
|
||||
/// the source operand.
|
||||
/// \see _blsi_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsi_u32(unsigned int __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
/// Clears all bits in the source except for the least significant bit
|
||||
/// containing a value of 1 and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned int _blsi_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the bits from
|
||||
/// the source operand.
|
||||
/// \see __blsi_u32
|
||||
#define _blsi_u32 __blsi_u32
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer used to create the mask.
|
||||
/// \returns An unsigned integer containing the newly created mask.
|
||||
/// \see _blsmsk_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u32(unsigned int __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
|
||||
/// \code
|
||||
/// unsigned int _blsmsk_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer used to create the mask.
|
||||
/// \returns An unsigned integer containing the newly created mask.
|
||||
/// \see __blsmsk_u32
|
||||
#define _blsmsk_u32 __blsmsk_u32
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing the operand to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the source
|
||||
/// operand.
|
||||
/// \see _blsr_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u32(unsigned int __X)
|
||||
__blsr_u32(unsigned int __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
@ -258,35 +372,27 @@ __blsmsk_u32(unsigned int __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
|
||||
/// \code
|
||||
/// unsigned int _bls4_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing the operand to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the source
|
||||
/// operand.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsr_u32(unsigned int __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
/// \see __blsr_u32
|
||||
#define _blsr_u32 __blsr_u32
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#define _andn_u64(a, b) (__andn_u64((a), (b)))
|
||||
|
||||
/* _bextr_u64 != __bextr_u64 */
|
||||
#define _blsi_u64(a) (__blsi_u64((a)))
|
||||
|
||||
#define _blsmsk_u64(a) (__blsmsk_u64((a)))
|
||||
|
||||
#define _blsr_u64(a) (__blsr_u64((a)))
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
@ -294,19 +400,41 @@ __blsr_u32(unsigned int __X)
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see _andn_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned long long _andn_u64(unsigned long long __X,
|
||||
/// unsigned long long __Y);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
/// \param __Y
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see __andn_u64
|
||||
#define _andn_u64 __andn_u64
|
||||
|
||||
/* AMD-specified, double-leading-underscore version of BEXTR */
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be extracted.
|
||||
@ -329,7 +457,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be extracted.
|
||||
@ -354,7 +482,7 @@ _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be extracted.
|
||||
@ -375,33 +503,89 @@ _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// bits from the source operand.
|
||||
/// \see _blsi_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsi_u64(unsigned long long __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
/// Clears all bits in the source except for the least significant bit
|
||||
/// containing a value of 1 and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned long long _blsi_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// bits from the source operand.
|
||||
/// \see __blsi_u64
|
||||
#define _blsi_u64 __blsi_u64
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer used to create the mask.
|
||||
/// \returns An unsigned 64-bit integer containing the newly created mask.
|
||||
/// \see _blsmsk_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
|
||||
/// \code
|
||||
/// unsigned long long _blsmsk_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer used to create the mask.
|
||||
/// \returns An unsigned 64-bit integer containing the newly created mask.
|
||||
/// \see __blsmsk_u64
|
||||
#define _blsmsk_u64 __blsmsk_u64
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing the operand to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// source operand.
|
||||
/// \see _blsr_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u64(unsigned long long __X)
|
||||
__blsr_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
@ -409,17 +593,18 @@ __blsmsk_u64(unsigned long long __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
|
||||
/// \code
|
||||
/// unsigned long long _blsr_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing the operand to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// source operand.
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsr_u64(unsigned long long __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
/// \see __blsr_u64
|
||||
#define _blsr_u64 __blsr_u64
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
|
||||
9
lib/include/cuda_wrappers/bits/basic_string.h
vendored
Normal file
9
lib/include/cuda_wrappers/bits/basic_string.h
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
|
||||
// `__attribute((__noinline__))`. In order to avoid compilation error,
|
||||
// temporarily unset __noinline__ when we include affected libstdc++ header.
|
||||
|
||||
#pragma push_macro("__noinline__")
|
||||
#undef __noinline__
|
||||
#include_next "bits/basic_string.h"
|
||||
|
||||
#pragma pop_macro("__noinline__")
|
||||
9
lib/include/cuda_wrappers/bits/basic_string.tcc
vendored
Normal file
9
lib/include/cuda_wrappers/bits/basic_string.tcc
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
|
||||
// `__attribute((__noinline__))`. In order to avoid compilation error,
|
||||
// temporarily unset __noinline__ when we include affected libstdc++ header.
|
||||
|
||||
#pragma push_macro("__noinline__")
|
||||
#undef __noinline__
|
||||
#include_next "bits/basic_string.tcc"
|
||||
|
||||
#pragma pop_macro("__noinline__")
|
||||
20
lib/include/emmintrin.h
vendored
20
lib/include/emmintrin.h
vendored
@ -50,11 +50,11 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
|
||||
__min_vector_width__(128)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("sse2,no-evex512"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS_MMX \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \
|
||||
__min_vector_width__(64)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
|
||||
|
||||
/// Adds lower double-precision values in both operands and returns the
|
||||
/// sum in the lower 64 bits of the result. The upper 64 bits of the result
|
||||
@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
|
||||
/// A pointer to the 128-bit aligned memory location used to store the value.
|
||||
/// \param __a
|
||||
/// A vector of [2 x double] containing the 64-bit values to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
|
||||
__m128d __a) {
|
||||
__builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
|
||||
}
|
||||
@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
|
||||
/// A pointer to the 128-bit aligned memory location used to store the value.
|
||||
/// \param __a
|
||||
/// A 128-bit integer vector containing the values to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
|
||||
__m128i __a) {
|
||||
__builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
|
||||
}
|
||||
@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
|
||||
/// A 32-bit integer containing the value to be stored.
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
|
||||
_mm_stream_si32(int *__p, int __a) {
|
||||
__builtin_ia32_movnti(__p, __a);
|
||||
_mm_stream_si32(void *__p, int __a) {
|
||||
__builtin_ia32_movnti((int *)__p, __a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@ -4003,8 +4003,8 @@ static __inline__ void
|
||||
/// A 64-bit integer containing the value to be stored.
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
|
||||
_mm_stream_si64(long long *__p, long long __a) {
|
||||
__builtin_ia32_movnti64(__p, __a);
|
||||
_mm_stream_si64(void *__p, long long __a) {
|
||||
__builtin_ia32_movnti64((long long *)__p, __a);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
29
lib/include/gfniintrin.h
vendored
29
lib/include/gfniintrin.h
vendored
@ -15,19 +15,36 @@
|
||||
#define __GFNIINTRIN_H
|
||||
|
||||
/* Default attributes for simple form (no masking). */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("gfni,no-evex512"), __min_vector_width__(128)))
|
||||
|
||||
/* Default attributes for YMM unmasked form. */
|
||||
#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS_Y \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx,gfni,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/* Default attributes for ZMM unmasked forms. */
|
||||
#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS_Z \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512f,evex512,gfni"), \
|
||||
__min_vector_width__(512)))
|
||||
/* Default attributes for ZMM masked forms. */
|
||||
#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS_Z_MASK \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,evex512,gfni"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
/* Default attributes for VLX masked forms. */
|
||||
#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS_VL128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS_VL256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
|
||||
818
lib/include/ia32intrin.h
vendored
818
lib/include/ia32intrin.h
vendored
File diff suppressed because it is too large
Load Diff
69
lib/include/immintrin.h
vendored
69
lib/include/immintrin.h
vendored
@ -291,11 +291,13 @@
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDPID__)
|
||||
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
|
||||
/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
|
||||
///
|
||||
/// \returns The 32-bit contents of the MSR.
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
|
||||
_rdpid_u32(void) {
|
||||
return __builtin_ia32_rdpid();
|
||||
@ -488,6 +490,15 @@ _writegsbase_u64(unsigned long long __V)
|
||||
* field inside of it.
|
||||
*/
|
||||
|
||||
/// Load a 16-bit value from memory and swap its bytes.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the 16-bit value to load.
|
||||
/// \returns The byte-swapped value.
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i16(void const * __P) {
|
||||
struct __loadu_i16 {
|
||||
@ -496,6 +507,16 @@ _loadbe_i16(void const * __P) {
|
||||
return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
|
||||
}
|
||||
|
||||
/// Swap the bytes of a 16-bit value and store it to memory.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the memory for storing the swapped value.
|
||||
/// \param __D
|
||||
/// The 16-bit value to be byte-swapped.
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i16(void * __P, short __D) {
|
||||
struct __storeu_i16 {
|
||||
@ -504,6 +525,15 @@ _storebe_i16(void * __P, short __D) {
|
||||
((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
|
||||
}
|
||||
|
||||
/// Load a 32-bit value from memory and swap its bytes.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the 32-bit value to load.
|
||||
/// \returns The byte-swapped value.
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i32(void const * __P) {
|
||||
struct __loadu_i32 {
|
||||
@ -512,6 +542,16 @@ _loadbe_i32(void const * __P) {
|
||||
return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
|
||||
}
|
||||
|
||||
/// Swap the bytes of a 32-bit value and store it to memory.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the memory for storing the swapped value.
|
||||
/// \param __D
|
||||
/// The 32-bit value to be byte-swapped.
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i32(void * __P, int __D) {
|
||||
struct __storeu_i32 {
|
||||
@ -521,6 +561,15 @@ _storebe_i32(void * __P, int __D) {
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Load a 64-bit value from memory and swap its bytes.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the 64-bit value to load.
|
||||
/// \returns The byte-swapped value.
|
||||
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i64(void const * __P) {
|
||||
struct __loadu_i64 {
|
||||
@ -529,6 +578,16 @@ _loadbe_i64(void const * __P) {
|
||||
return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
|
||||
}
|
||||
|
||||
/// Swap the bytes of a 64-bit value and store it to memory.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the memory for storing the swapped value.
|
||||
/// \param __D
|
||||
/// The 64-bit value to be byte-swapped.
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i64(void * __P, long long __D) {
|
||||
struct __storeu_i64 {
|
||||
@ -578,9 +637,13 @@ _storebe_i64(void * __P, long long __D) {
|
||||
#include <cetintrin.h>
|
||||
#endif
|
||||
|
||||
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
|
||||
* whereas others are also available at all times. */
|
||||
/* Intrinsics inside adcintrin.h are available at all times. */
|
||||
#include <adcintrin.h>
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__ADX__)
|
||||
#include <adxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDSEED__)
|
||||
|
||||
16
lib/include/intrin.h
vendored
16
lib/include/intrin.h
vendored
@ -572,6 +572,22 @@ unsigned char __readx18byte(unsigned long offset);
|
||||
unsigned short __readx18word(unsigned long offset);
|
||||
unsigned long __readx18dword(unsigned long offset);
|
||||
unsigned __int64 __readx18qword(unsigned long offset);
|
||||
|
||||
double _CopyDoubleFromInt64(__int64);
|
||||
float _CopyFloatFromInt32(__int32);
|
||||
__int32 _CopyInt32FromFloat(float);
|
||||
__int64 _CopyInt64FromDouble(double);
|
||||
|
||||
unsigned int _CountLeadingOnes(unsigned long);
|
||||
unsigned int _CountLeadingOnes64(unsigned __int64);
|
||||
unsigned int _CountLeadingSigns(long);
|
||||
unsigned int _CountLeadingSigns64(__int64);
|
||||
unsigned int _CountLeadingZeros(unsigned long);
|
||||
unsigned int _CountLeadingZeros64(unsigned _int64);
|
||||
unsigned int _CountOneBits(unsigned long);
|
||||
unsigned int _CountOneBits64(unsigned __int64);
|
||||
|
||||
void __cdecl __prefetch(void *);
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|
||||
14
lib/include/larchintrin.h
vendored
14
lib/include/larchintrin.h
vendored
@ -156,7 +156,7 @@ extern __inline unsigned char
|
||||
return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__iocsrrd_h(unsigned int _1) {
|
||||
return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
|
||||
@ -228,6 +228,18 @@ extern __inline void
|
||||
((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2)))
|
||||
#endif
|
||||
|
||||
#define __frecipe_s(/*float*/ _1) \
|
||||
(float)__builtin_loongarch_frecipe_s((float)_1)
|
||||
|
||||
#define __frecipe_d(/*double*/ _1) \
|
||||
(double)__builtin_loongarch_frecipe_d((double)_1)
|
||||
|
||||
#define __frsqrte_s(/*float*/ _1) \
|
||||
(float)__builtin_loongarch_frsqrte_s((float)_1)
|
||||
|
||||
#define __frsqrte_d(/*double*/ _1) \
|
||||
(double)__builtin_loongarch_frsqrte_d((double)_1)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
3884
lib/include/lasxintrin.h
vendored
Normal file
3884
lib/include/lasxintrin.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6
lib/include/limits.h
vendored
6
lib/include/limits.h
vendored
@ -66,10 +66,8 @@
|
||||
|
||||
#define CHAR_BIT __CHAR_BIT__
|
||||
|
||||
/* C2x 5.2.4.2.1 */
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
/* C23 5.2.4.2.1 */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
#define BOOL_WIDTH __BOOL_WIDTH__
|
||||
#define CHAR_WIDTH CHAR_BIT
|
||||
#define SCHAR_WIDTH CHAR_BIT
|
||||
|
||||
34
lib/include/llvm_libc_wrappers/assert.h
vendored
Normal file
34
lib/include/llvm_libc_wrappers/assert.h
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
//===-- Wrapper for C standard assert.h declarations on the GPU ------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
|
||||
|
||||
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
#include_next <assert.h>
|
||||
|
||||
#if __has_include(<llvm-libc-decls/assert.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
#define __LIBC_ATTRS __attribute__((device))
|
||||
#endif
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
#include <llvm-libc-decls/assert.h>
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
#undef __LIBC_ATTRS
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
|
||||
17
lib/include/llvm_libc_wrappers/ctype.h
vendored
17
lib/include/llvm_libc_wrappers/ctype.h
vendored
@ -13,8 +13,19 @@
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is
|
||||
// necessary to prevent it from doing that and remapping our implementation.
|
||||
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
|
||||
#pragma push_macro("__USE_EXTERN_INLINES")
|
||||
#undef __USE_EXTERN_INLINES
|
||||
#endif
|
||||
|
||||
#include_next <ctype.h>
|
||||
|
||||
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
|
||||
#pragma pop_macro("__USE_EXTERN_INLINES")
|
||||
#endif
|
||||
|
||||
#if __has_include(<llvm-libc-decls/ctype.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
@ -26,6 +37,7 @@
|
||||
|
||||
#pragma push_macro("isalnum")
|
||||
#pragma push_macro("isalpha")
|
||||
#pragma push_macro("isascii")
|
||||
#pragma push_macro("isblank")
|
||||
#pragma push_macro("iscntrl")
|
||||
#pragma push_macro("isdigit")
|
||||
@ -36,11 +48,13 @@
|
||||
#pragma push_macro("isspace")
|
||||
#pragma push_macro("isupper")
|
||||
#pragma push_macro("isxdigit")
|
||||
#pragma push_macro("toascii")
|
||||
#pragma push_macro("tolower")
|
||||
#pragma push_macro("toupper")
|
||||
|
||||
#undef isalnum
|
||||
#undef isalpha
|
||||
#undef isascii
|
||||
#undef iscntrl
|
||||
#undef isdigit
|
||||
#undef islower
|
||||
@ -51,6 +65,7 @@
|
||||
#undef isupper
|
||||
#undef isblank
|
||||
#undef isxdigit
|
||||
#undef toascii
|
||||
#undef tolower
|
||||
#undef toupper
|
||||
|
||||
@ -64,6 +79,7 @@
|
||||
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
|
||||
#pragma pop_macro("isalnum")
|
||||
#pragma pop_macro("isalpha")
|
||||
#pragma pop_macro("isascii")
|
||||
#pragma pop_macro("isblank")
|
||||
#pragma pop_macro("iscntrl")
|
||||
#pragma pop_macro("isdigit")
|
||||
@ -74,6 +90,7 @@
|
||||
#pragma pop_macro("isspace")
|
||||
#pragma pop_macro("isupper")
|
||||
#pragma pop_macro("isxdigit")
|
||||
#pragma pop_macro("toascii")
|
||||
#pragma pop_macro("tolower")
|
||||
#pragma pop_macro("toupper")
|
||||
#endif
|
||||
|
||||
52
lib/include/llvm_libc_wrappers/stdio.h
vendored
52
lib/include/llvm_libc_wrappers/stdio.h
vendored
@ -6,21 +6,58 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
|
||||
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
#include_next <stdio.h>
|
||||
|
||||
// In some old versions of glibc, other standard headers sometimes define
|
||||
// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h
|
||||
// to produce special definitions. Future includes of stdio.h when those
|
||||
// special macros are undefined are expected to produce the normal definitions
|
||||
// from stdio.h.
|
||||
//
|
||||
// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__)
|
||||
// unconditionally to the above include_next. Otherwise, after an occurrence of
|
||||
// the first glibc stdio.h use case described above, the include_next would be
|
||||
// skipped for remaining includes of stdio.h, leaving required symbols
|
||||
// undefined.
|
||||
//
|
||||
// We make the following assumptions to handle all use cases:
|
||||
//
|
||||
// 1. If the above include_next produces special glibc definitions, then (a) it
|
||||
// does not produce the normal definitions that we must intercept below, (b)
|
||||
// the current file was included from a glibc header that already defined
|
||||
// __GLIBC__ (usually by including glibc's <features.h>), and (c) the above
|
||||
// include_next does not define _STDIO_H. In that case, we skip the rest of
|
||||
// the current file and don't guard against future includes.
|
||||
// 2. If the above include_next produces the normal stdio.h definitions, then
|
||||
// either (a) __GLIBC__ is not defined because C headers are from some other
|
||||
// libc implementation or (b) the above include_next defines _STDIO_H to
|
||||
// prevent the above include_next from having any effect in the future.
|
||||
#if !defined(__GLIBC__) || defined(_STDIO_H)
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
|
||||
#if __has_include(<llvm-libc-decls/stdio.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
#define __LIBC_ATTRS __attribute__((device))
|
||||
#endif
|
||||
|
||||
// Some headers provide these as macros. Temporarily undefine them so they do
|
||||
// not conflict with any definitions for the GPU.
|
||||
|
||||
#pragma push_macro("stdout")
|
||||
#pragma push_macro("stdin")
|
||||
#pragma push_macro("stderr")
|
||||
|
||||
#undef stdout
|
||||
#undef stderr
|
||||
#undef stdin
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
#include <llvm-libc-decls/stdio.h>
|
||||
@ -29,6 +66,15 @@
|
||||
|
||||
#undef __LIBC_ATTRS
|
||||
|
||||
// Restore the original macros when compiling on the host.
|
||||
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
|
||||
#pragma pop_macro("stdout")
|
||||
#pragma pop_macro("stderr")
|
||||
#pragma pop_macro("stdin")
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
|
||||
#endif
|
||||
|
||||
5
lib/include/llvm_libc_wrappers/stdlib.h
vendored
5
lib/include/llvm_libc_wrappers/stdlib.h
vendored
@ -23,8 +23,11 @@
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
// The LLVM C library uses this type so we forward declare it.
|
||||
// The LLVM C library uses these named types so we forward declare them.
|
||||
typedef void (*__atexithandler_t)(void);
|
||||
typedef int (*__bsearchcompare_t)(const void *, const void *);
|
||||
typedef int (*__qsortcompare_t)(const void *, const void *);
|
||||
typedef int (*__qsortrcompare_t)(const void *, const void *, void *);
|
||||
|
||||
// Enforce ABI compatibility with the structs used by the LLVM C library.
|
||||
_Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!");
|
||||
|
||||
65
lib/include/llvm_libc_wrappers/string.h
vendored
65
lib/include/llvm_libc_wrappers/string.h
vendored
@ -13,9 +13,6 @@
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
// FIXME: The GNU headers provide C++ standard compliant headers when in C++
|
||||
// mode and the LLVM libc does not. We cannot enable memchr, strchr, strchrnul,
|
||||
// strpbrk, strrchr, strstr, or strcasestr until this is addressed.
|
||||
#include_next <string.h>
|
||||
|
||||
#if __has_include(<llvm-libc-decls/string.h>)
|
||||
@ -26,8 +23,70 @@
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
// The GNU headers provide C++ standard compliant headers when in C++ mode and
|
||||
// the LLVM libc does not. We need to manually provide the definitions using the
|
||||
// same prototypes.
|
||||
#if defined(__cplusplus) && defined(__GLIBC__) && \
|
||||
defined(__CORRECT_ISO_CPP_STRING_H_PROTO)
|
||||
|
||||
#ifndef __LIBC_ATTRS
|
||||
#define __LIBC_ATTRS
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
void *memccpy(void *__restrict, const void *__restrict, int,
|
||||
size_t) __LIBC_ATTRS;
|
||||
int memcmp(const void *, const void *, size_t) __LIBC_ATTRS;
|
||||
void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
|
||||
void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS;
|
||||
void *memmove(void *, const void *, size_t) __LIBC_ATTRS;
|
||||
void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
|
||||
void *memset(void *, int, size_t) __LIBC_ATTRS;
|
||||
char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
int strcmp(const char *, const char *) __LIBC_ATTRS;
|
||||
int strcoll(const char *, const char *) __LIBC_ATTRS;
|
||||
char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
size_t strcspn(const char *, const char *) __LIBC_ATTRS;
|
||||
char *strdup(const char *) __LIBC_ATTRS;
|
||||
size_t strlen(const char *) __LIBC_ATTRS;
|
||||
char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
int strncmp(const char *, const char *, size_t) __LIBC_ATTRS;
|
||||
char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
char *strndup(const char *, size_t) __LIBC_ATTRS;
|
||||
size_t strnlen(const char *, size_t) __LIBC_ATTRS;
|
||||
size_t strspn(const char *, const char *) __LIBC_ATTRS;
|
||||
char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
char *strtok_r(char *__restrict, const char *__restrict,
|
||||
char **__restrict) __LIBC_ATTRS;
|
||||
size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
}
|
||||
|
||||
extern "C++" {
|
||||
char *strstr(char *, const char *) noexcept __LIBC_ATTRS;
|
||||
const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS;
|
||||
char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS;
|
||||
const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS;
|
||||
char *strrchr(char *, int) noexcept __LIBC_ATTRS;
|
||||
const char *strrchr(const char *, int) noexcept __LIBC_ATTRS;
|
||||
char *strchr(char *, int) noexcept __LIBC_ATTRS;
|
||||
const char *strchr(const char *, int) noexcept __LIBC_ATTRS;
|
||||
char *strchrnul(char *, int) noexcept __LIBC_ATTRS;
|
||||
const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS;
|
||||
char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS;
|
||||
const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS;
|
||||
void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
}
|
||||
|
||||
#else
|
||||
#include <llvm-libc-decls/string.h>
|
||||
|
||||
#endif
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
#undef __LIBC_ATTRS
|
||||
|
||||
34
lib/include/llvm_libc_wrappers/time.h
vendored
Normal file
34
lib/include/llvm_libc_wrappers/time.h
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
//===-- Wrapper for C standard time.h declarations on the GPU -------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
|
||||
|
||||
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
#include_next <time.h>
|
||||
|
||||
#if __has_include(<llvm-libc-decls/time.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
#define __LIBC_ATTRS __attribute__((device))
|
||||
#endif
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!");
|
||||
|
||||
#include <llvm-libc-decls/time.h>
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
|
||||
3750
lib/include/lsxintrin.h
vendored
Normal file
3750
lib/include/lsxintrin.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
10
lib/include/mmintrin.h
vendored
10
lib/include/mmintrin.h
vendored
@ -22,7 +22,9 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
|
||||
typedef char __v8qi __attribute__((__vector_size__(8)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
|
||||
__min_vector_width__(64)))
|
||||
|
||||
/// Clears the MMX state by setting the state of the x87 stack registers
|
||||
/// to empty.
|
||||
@ -31,9 +33,9 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> EMMS </c> instruction.
|
||||
///
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
|
||||
_mm_empty(void)
|
||||
{
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__,
|
||||
__target__("mmx,no-evex512")))
|
||||
_mm_empty(void) {
|
||||
__builtin_ia32_emms();
|
||||
}
|
||||
|
||||
|
||||
157
lib/include/module.modulemap
vendored
157
lib/include/module.modulemap
vendored
@ -153,10 +153,163 @@ module _Builtin_intrinsics [system] [extern_c] {
|
||||
}
|
||||
}
|
||||
|
||||
module _Builtin_stddef_max_align_t [system] [extern_c] {
|
||||
header "__stddef_max_align_t.h"
|
||||
// Start -fbuiltin-headers-in-system-modules affected modules
|
||||
|
||||
// The following modules all ignore their headers when
|
||||
// -fbuiltin-headers-in-system-modules is passed, and many of
|
||||
// those headers join system modules when present.
|
||||
|
||||
// e.g. if -fbuiltin-headers-in-system-modules is passed, then
|
||||
// float.h will not be in the _Builtin_float module (that module
|
||||
// will be empty). If there is a system module that declares
|
||||
// `header "float.h"`, then the builtin float.h will join
|
||||
// that module. The system float.h (if present) will be treated
|
||||
// as a textual header in the sytem module.
|
||||
module _Builtin_float [system] {
|
||||
header "float.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_inttypes [system] {
|
||||
header "inttypes.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_iso646 [system] {
|
||||
header "iso646.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_limits [system] {
|
||||
header "limits.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdalign [system] {
|
||||
header "stdalign.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdarg [system] {
|
||||
textual header "stdarg.h"
|
||||
|
||||
explicit module __gnuc_va_list {
|
||||
header "__stdarg___gnuc_va_list.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module __va_copy {
|
||||
header "__stdarg___va_copy.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module va_arg {
|
||||
header "__stdarg_va_arg.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module va_copy {
|
||||
header "__stdarg_va_copy.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module va_list {
|
||||
header "__stdarg_va_list.h"
|
||||
export *
|
||||
}
|
||||
}
|
||||
|
||||
module _Builtin_stdatomic [system] {
|
||||
header "stdatomic.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdbool [system] {
|
||||
header "stdbool.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stddef [system] {
|
||||
textual header "stddef.h"
|
||||
|
||||
// __stddef_max_align_t.h is always in this module, even if
|
||||
// -fbuiltin-headers-in-system-modules is passed.
|
||||
explicit module max_align_t {
|
||||
header "__stddef_max_align_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module null {
|
||||
header "__stddef_null.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module nullptr_t {
|
||||
header "__stddef_nullptr_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module offsetof {
|
||||
header "__stddef_offsetof.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module ptrdiff_t {
|
||||
header "__stddef_ptrdiff_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module rsize_t {
|
||||
header "__stddef_rsize_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module size_t {
|
||||
header "__stddef_size_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module unreachable {
|
||||
header "__stddef_unreachable.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module wchar_t {
|
||||
header "__stddef_wchar_t.h"
|
||||
export *
|
||||
}
|
||||
}
|
||||
|
||||
// wint_t is provided by <wchar.h> and not <stddef.h>. It's here
|
||||
// for compatibility, but must be explicitly requested. Therefore
|
||||
// __stddef_wint_t.h is not part of _Builtin_stddef. It is always in
|
||||
// this module even if -fbuiltin-headers-in-system-modules is passed.
|
||||
module _Builtin_stddef_wint_t [system] {
|
||||
header "__stddef_wint_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdint [system] {
|
||||
header "stdint.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdnoreturn [system] {
|
||||
header "stdnoreturn.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_tgmath [system] {
|
||||
header "tgmath.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_unwind [system] {
|
||||
header "unwind.h"
|
||||
export *
|
||||
}
|
||||
// End -fbuiltin-headers-in-system-modules affected modules
|
||||
|
||||
module opencl_c {
|
||||
requires opencl
|
||||
header "opencl-c.h"
|
||||
|
||||
5
lib/include/opencl-c-base.h
vendored
5
lib/include/opencl-c-base.h
vendored
@ -45,6 +45,7 @@
|
||||
#define __opencl_c_ext_fp32_local_atomic_add 1
|
||||
#define __opencl_c_ext_fp32_global_atomic_min_max 1
|
||||
#define __opencl_c_ext_fp32_local_atomic_min_max 1
|
||||
#define __opencl_c_ext_image_raw10_raw12 1
|
||||
|
||||
#endif // defined(__SPIR__) || defined(__SPIRV__)
|
||||
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
@ -477,6 +478,10 @@ typedef enum memory_order
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_3_0
|
||||
#define CLK_UNORM_INT_101010_2 0x10E0
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0
|
||||
#ifdef __opencl_c_ext_image_raw10_raw12
|
||||
#define CLK_UNSIGNED_INT_RAW10_EXT 0x10E3
|
||||
#define CLK_UNSIGNED_INT_RAW12_EXT 0x10E4
|
||||
#endif // __opencl_c_ext_image_raw10_raw12
|
||||
|
||||
// Channel order, numbering must be aligned with cl_channel_order in cl.h
|
||||
//
|
||||
|
||||
2
lib/include/openmp_wrappers/cmath
vendored
2
lib/include/openmp_wrappers/cmath
vendored
@ -1,4 +1,4 @@
|
||||
/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -===
|
||||
/*===-- __clang_openmp_device_functions.h - OpenMP math declares -*- c++ -*-===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
3
lib/include/pmmintrin.h
vendored
3
lib/include/pmmintrin.h
vendored
@ -18,7 +18,8 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("sse3,no-evex512"), __min_vector_width__(128)))
|
||||
|
||||
/// Loads data from an unaligned memory location to elements in a 128-bit
|
||||
/// vector.
|
||||
|
||||
26
lib/include/ppc_wrappers/nmmintrin.h
vendored
Normal file
26
lib/include/ppc_wrappers/nmmintrin.h
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
/*===---- nmmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerpc64le.
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
Note that much code that uses Intel intrinsics can be rewritten in
|
||||
standard C or GNU C extensions, which are more portable and better
|
||||
optimized across multiple targets. */
|
||||
#endif
|
||||
|
||||
#ifndef NMMINTRIN_H_
|
||||
#define NMMINTRIN_H_
|
||||
|
||||
/* We just include SSE4.1 header file. */
|
||||
#include <smmintrin.h>
|
||||
|
||||
#endif /* NMMINTRIN_H_ */
|
||||
50
lib/include/ppc_wrappers/smmintrin.h
vendored
50
lib/include/ppc_wrappers/smmintrin.h
vendored
@ -14,7 +14,7 @@
|
||||
|
||||
#ifndef NO_WARN_X86_INTRINSICS
|
||||
/* This header is distributed to simplify porting x86_64 code that
|
||||
makes explicit use of Intel intrinsics to powerp64/powerpc64le.
|
||||
makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
|
||||
|
||||
It is the user's responsibility to determine if the results are
|
||||
acceptable and make additional changes as necessary.
|
||||
@ -68,10 +68,10 @@ extern __inline __m128d
|
||||
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_mffs();
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
__fpscr_save.__fpscr &= ~0xf8;
|
||||
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
#endif
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
@ -83,10 +83,15 @@ extern __inline __m128d
|
||||
|
||||
switch (__rounding) {
|
||||
case _MM_FROUND_TO_NEAREST_INT:
|
||||
__fpscr_save.__fr = __builtin_mffsl();
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__attribute__((fallthrough));
|
||||
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
|
||||
__builtin_set_fpscr_rn(0b00);
|
||||
__builtin_ppc_set_fpscr_rn(0b00);
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
a read/use of the variable before the FPSCR is modified, above.
|
||||
@ -102,7 +107,7 @@ extern __inline __m128d
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
__builtin_set_fpscr_rn(__fpscr_save.__fpscr);
|
||||
__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
|
||||
break;
|
||||
case _MM_FROUND_TO_NEG_INF:
|
||||
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
|
||||
@ -128,9 +133,14 @@ extern __inline __m128d
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
/* Restore enabled exceptions. */
|
||||
__fpscr_save.__fr = __builtin_mffsl();
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__fpscr_save.__fpscr |= __enables_save.__fpscr;
|
||||
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
}
|
||||
return (__m128d)__r;
|
||||
}
|
||||
@ -159,10 +169,10 @@ extern __inline __m128
|
||||
__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_mffs();
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
|
||||
__fpscr_save.__fpscr &= ~0xf8;
|
||||
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
#endif
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
@ -174,10 +184,15 @@ extern __inline __m128
|
||||
|
||||
switch (__rounding) {
|
||||
case _MM_FROUND_TO_NEAREST_INT:
|
||||
__fpscr_save.__fr = __builtin_mffsl();
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__attribute__((fallthrough));
|
||||
case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
|
||||
__builtin_set_fpscr_rn(0b00);
|
||||
__builtin_ppc_set_fpscr_rn(0b00);
|
||||
/* Insert an artificial "read/write" reference to the variable
|
||||
read below, to ensure the compiler does not schedule
|
||||
a read/use of the variable before the FPSCR is modified, above.
|
||||
@ -193,7 +208,7 @@ extern __inline __m128
|
||||
This can be removed if and when GCC PR102783 is fixed.
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
__builtin_set_fpscr_rn(__fpscr_save.__fpscr);
|
||||
__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
|
||||
break;
|
||||
case _MM_FROUND_TO_NEG_INF:
|
||||
case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
|
||||
@ -219,9 +234,14 @@ extern __inline __m128
|
||||
*/
|
||||
__asm__("" : : "wa"(__r));
|
||||
/* Restore enabled exceptions. */
|
||||
__fpscr_save.__fr = __builtin_mffsl();
|
||||
#ifdef _ARCH_PWR9
|
||||
__fpscr_save.__fr = __builtin_ppc_mffsl();
|
||||
#else
|
||||
__fpscr_save.__fr = __builtin_ppc_mffs();
|
||||
__fpscr_save.__fpscr &= 0x70007f0ffL;
|
||||
#endif
|
||||
__fpscr_save.__fpscr |= __enables_save.__fpscr;
|
||||
__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
|
||||
}
|
||||
return (__m128)__r;
|
||||
}
|
||||
|
||||
195
lib/include/riscv_bitmanip.h
vendored
Normal file
195
lib/include/riscv_bitmanip.h
vendored
Normal file
@ -0,0 +1,195 @@
|
||||
/*===---- riscv_bitmanip.h - RISC-V Zb* intrinsics --------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __RISCV_BITMANIP_H
|
||||
#define __RISCV_BITMANIP_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__riscv_zbb)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_orc_b_32(uint32_t __x) {
|
||||
return __builtin_riscv_orc_b_32(__x);
|
||||
}
|
||||
|
||||
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clz_32(uint32_t __x) {
|
||||
return __builtin_riscv_clz_32(__x);
|
||||
}
|
||||
|
||||
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_ctz_32(uint32_t __x) {
|
||||
return __builtin_riscv_ctz_32(__x);
|
||||
}
|
||||
|
||||
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_cpop_32(uint32_t __x) {
|
||||
return __builtin_popcount(__x);
|
||||
}
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_orc_b_64(uint64_t __x) {
|
||||
return __builtin_riscv_orc_b_64(__x);
|
||||
}
|
||||
|
||||
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clz_64(uint64_t __x) {
|
||||
return __builtin_riscv_clz_64(__x);
|
||||
}
|
||||
|
||||
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_ctz_64(uint64_t __x) {
|
||||
return __builtin_riscv_ctz_64(__x);
|
||||
}
|
||||
|
||||
static __inline__ unsigned __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_cpop_64(uint64_t __x) {
|
||||
return __builtin_popcountll(__x);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zbb)
|
||||
|
||||
#if defined(__riscv_zbb) || defined(__riscv_zbkb)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_rev8_32(uint32_t __x) {
|
||||
return __builtin_bswap32(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_rol_32(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_rotateleft32(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_ror_32(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_rotateright32(__x, __y);
|
||||
}
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_rev8_64(uint64_t __x) {
|
||||
return __builtin_bswap64(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_rol_64(uint64_t __x, uint32_t __y) {
|
||||
return __builtin_rotateleft64(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_ror_64(uint64_t __x, uint32_t __y) {
|
||||
return __builtin_rotateright64(__x, __y);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zbb) || defined(__riscv_zbkb)
|
||||
|
||||
#if defined(__riscv_zbkb)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_brev8_32(uint32_t __x) {
|
||||
return __builtin_riscv_brev8_32(__x);
|
||||
}
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_brev8_64(uint64_t __x) {
|
||||
return __builtin_riscv_brev8_64(__x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_unzip_32(uint32_t __x) {
|
||||
return __builtin_riscv_unzip_32(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_zip_32(uint32_t __x) {
|
||||
return __builtin_riscv_zip_32(__x);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zbkb)
|
||||
|
||||
#if defined(__riscv_zbc)
|
||||
#if __riscv_xlen == 32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clmulr_32(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_clmulr_32(__x, __y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clmulr_64(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_clmulr_64(__x, __y);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zbc)
|
||||
|
||||
#if defined(__riscv_zbkc) || defined(__riscv_zbc)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clmul_32(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_clmul_32(__x, __y);
|
||||
}
|
||||
|
||||
#if __riscv_xlen == 32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clmulh_32(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_clmulh_32(__x, __y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clmul_64(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_clmul_64(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_clmulh_64(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_clmulh_64(__x, __y);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zbkc) || defined(__riscv_zbc)
|
||||
|
||||
#if defined(__riscv_zbkx)
|
||||
#if __riscv_xlen == 32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_xperm4_32(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_xperm4_32(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_xperm8_32(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_xperm8_32(__x, __y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_xperm4_64(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_xperm4_64(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_xperm8_64(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_xperm8_64(__x, __y);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zbkx)
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
170
lib/include/riscv_crypto.h
vendored
Normal file
170
lib/include/riscv_crypto.h
vendored
Normal file
@ -0,0 +1,170 @@
|
||||
/*===---- riscv_crypto.h - RISC-V Zk* intrinsics ---------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __RISCV_CRYPTO_H
|
||||
#define __RISCV_CRYPTO_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__riscv_zknd)
|
||||
#if __riscv_xlen == 32
|
||||
#define __riscv_aes32dsi(x, y, bs) __builtin_riscv_aes32dsi(x, y, bs)
|
||||
#define __riscv_aes32dsmi(x, y, bs) __builtin_riscv_aes32dsmi(x, y, bs)
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_aes64ds(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_aes64ds(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_aes64dsm(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_aes64dsm(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_aes64im(uint64_t __x) {
|
||||
return __builtin_riscv_aes64im(__x);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zknd)
|
||||
|
||||
#if defined(__riscv_zkne)
|
||||
#if __riscv_xlen == 32
|
||||
#define __riscv_aes32esi(x, y, bs) __builtin_riscv_aes32esi(x, y, bs)
|
||||
#define __riscv_aes32esmi(x, y, bs) __builtin_riscv_aes32esmi(x, y, bs)
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_aes64es(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_aes64es(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_aes64esm(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_aes64esm(__x, __y);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zkne)
|
||||
|
||||
#if defined(__riscv_zknd) || defined(__riscv_zkne)
|
||||
#if __riscv_xlen == 64
|
||||
#define __riscv_aes64ks1i(x, rnum) __builtin_riscv_aes64ks1i(x, rnum)
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_aes64ks2(uint64_t __x, uint64_t __y) {
|
||||
return __builtin_riscv_aes64ks2(__x, __y);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zknd) || defined(__riscv_zkne)
|
||||
|
||||
#if defined(__riscv_zknh)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha256sig0(uint32_t __x) {
|
||||
return __builtin_riscv_sha256sig0(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha256sig1(uint32_t __x) {
|
||||
return __builtin_riscv_sha256sig1(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha256sum0(uint32_t __x) {
|
||||
return __builtin_riscv_sha256sum0(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha256sum1(uint32_t __x) {
|
||||
return __builtin_riscv_sha256sum1(__x);
|
||||
}
|
||||
|
||||
#if __riscv_xlen == 32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sig0h(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_sha512sig0h(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sig0l(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_sha512sig0l(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sig1h(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_sha512sig1h(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sig1l(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_sha512sig1l(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sum0r(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_sha512sum0r(__x, __y);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sum1r(uint32_t __x, uint32_t __y) {
|
||||
return __builtin_riscv_sha512sum1r(__x, __y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sig0(uint64_t __x) {
|
||||
return __builtin_riscv_sha512sig0(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sig1(uint64_t __x) {
|
||||
return __builtin_riscv_sha512sig1(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sum0(uint64_t __x) {
|
||||
return __builtin_riscv_sha512sum0(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sha512sum1(uint64_t __x) {
|
||||
return __builtin_riscv_sha512sum1(__x);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(__riscv_zknh)
|
||||
|
||||
#if defined(__riscv_zksh)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sm3p0(uint32_t __x) {
|
||||
return __builtin_riscv_sm3p0(__x);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__riscv_sm3p1(uint32_t __x) {
|
||||
return __builtin_riscv_sm3p1(__x);
|
||||
}
|
||||
#endif // defined(__riscv_zksh)
|
||||
|
||||
#if defined(__riscv_zksed)
|
||||
#define __riscv_sm4ed(x, y, bs) __builtin_riscv_sm4ed(x, y, bs);
|
||||
#define __riscv_sm4ks(x, y, bs) __builtin_riscv_sm4ks(x, y, bs);
|
||||
#endif // defined(__riscv_zksed)
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
6
lib/include/riscv_ntlh.h
vendored
6
lib/include/riscv_ntlh.h
vendored
@ -21,8 +21,6 @@ enum {
|
||||
__RISCV_NTLH_ALL
|
||||
};
|
||||
|
||||
#define __riscv_ntl_load(PTR, DOMAIN) __builtin_riscv_ntl_load((PTR), (DOMAIN))
|
||||
#define __riscv_ntl_store(PTR, VAL, DOMAIN) \
|
||||
__builtin_riscv_ntl_store((PTR), (VAL), (DOMAIN))
|
||||
|
||||
#define __riscv_ntl_load __builtin_riscv_ntl_load
|
||||
#define __riscv_ntl_store __builtin_riscv_ntl_store
|
||||
#endif
|
||||
31
lib/include/riscv_vector.h
vendored
31
lib/include/riscv_vector.h
vendored
@ -392,6 +392,37 @@ typedef __rvv_float64m2x4_t vfloat64m2x4_t;
|
||||
typedef __rvv_float64m4_t vfloat64m4_t;
|
||||
typedef __rvv_float64m4x2_t vfloat64m4x2_t;
|
||||
typedef __rvv_float64m8_t vfloat64m8_t;
|
||||
typedef __rvv_bfloat16mf4_t vbfloat16mf4_t;
|
||||
typedef __rvv_bfloat16mf4x2_t vbfloat16mf4x2_t;
|
||||
typedef __rvv_bfloat16mf4x3_t vbfloat16mf4x3_t;
|
||||
typedef __rvv_bfloat16mf4x4_t vbfloat16mf4x4_t;
|
||||
typedef __rvv_bfloat16mf4x5_t vbfloat16mf4x5_t;
|
||||
typedef __rvv_bfloat16mf4x6_t vbfloat16mf4x6_t;
|
||||
typedef __rvv_bfloat16mf4x7_t vbfloat16mf4x7_t;
|
||||
typedef __rvv_bfloat16mf4x8_t vbfloat16mf4x8_t;
|
||||
typedef __rvv_bfloat16mf2_t vbfloat16mf2_t;
|
||||
typedef __rvv_bfloat16mf2x2_t vbfloat16mf2x2_t;
|
||||
typedef __rvv_bfloat16mf2x3_t vbfloat16mf2x3_t;
|
||||
typedef __rvv_bfloat16mf2x4_t vbfloat16mf2x4_t;
|
||||
typedef __rvv_bfloat16mf2x5_t vbfloat16mf2x5_t;
|
||||
typedef __rvv_bfloat16mf2x6_t vbfloat16mf2x6_t;
|
||||
typedef __rvv_bfloat16mf2x7_t vbfloat16mf2x7_t;
|
||||
typedef __rvv_bfloat16mf2x8_t vbfloat16mf2x8_t;
|
||||
typedef __rvv_bfloat16m1_t vbfloat16m1_t;
|
||||
typedef __rvv_bfloat16m1x2_t vbfloat16m1x2_t;
|
||||
typedef __rvv_bfloat16m1x3_t vbfloat16m1x3_t;
|
||||
typedef __rvv_bfloat16m1x4_t vbfloat16m1x4_t;
|
||||
typedef __rvv_bfloat16m1x5_t vbfloat16m1x5_t;
|
||||
typedef __rvv_bfloat16m1x6_t vbfloat16m1x6_t;
|
||||
typedef __rvv_bfloat16m1x7_t vbfloat16m1x7_t;
|
||||
typedef __rvv_bfloat16m1x8_t vbfloat16m1x8_t;
|
||||
typedef __rvv_bfloat16m2_t vbfloat16m2_t;
|
||||
typedef __rvv_bfloat16m2x2_t vbfloat16m2x2_t;
|
||||
typedef __rvv_bfloat16m2x3_t vbfloat16m2x3_t;
|
||||
typedef __rvv_bfloat16m2x4_t vbfloat16m2x4_t;
|
||||
typedef __rvv_bfloat16m4_t vbfloat16m4_t;
|
||||
typedef __rvv_bfloat16m4x2_t vbfloat16m4x2_t;
|
||||
typedef __rvv_bfloat16m8_t vbfloat16m8_t;
|
||||
#define __riscv_v_intrinsic_overloading 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
6
lib/include/smmintrin.h
vendored
6
lib/include/smmintrin.h
vendored
@ -18,8 +18,8 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), \
|
||||
__min_vector_width__(128)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("sse4.1,no-evex512"), __min_vector_width__(128)))
|
||||
|
||||
/* SSE4 Rounding macros. */
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
||||
@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
|
||||
/// \returns A 128-bit integer vector containing the data stored at the
|
||||
/// specified memory location.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_stream_load_si128(__m128i const *__V) {
|
||||
_mm_stream_load_si128(const void *__V) {
|
||||
return (__m128i)__builtin_nontemporal_load((const __v2di *)__V);
|
||||
}
|
||||
|
||||
|
||||
4
lib/include/stdalign.h
vendored
4
lib/include/stdalign.h
vendored
@ -10,10 +10,8 @@
|
||||
#ifndef __STDALIGN_H
|
||||
#define __STDALIGN_H
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__cplusplus) || \
|
||||
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L)
|
||||
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L)
|
||||
#ifndef __cplusplus
|
||||
#define alignas _Alignas
|
||||
#define alignof _Alignof
|
||||
|
||||
90
lib/include/stdarg.h
vendored
90
lib/include/stdarg.h
vendored
@ -7,45 +7,73 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __STDARG_H
|
||||
/*
|
||||
* This header is designed to be included multiple times. If any of the __need_
|
||||
* macros are defined, then only that subset of interfaces are provided. This
|
||||
* can be useful for POSIX headers that need to not expose all of stdarg.h, but
|
||||
* need to use some of its interfaces. Otherwise this header provides all of
|
||||
* the expected interfaces.
|
||||
*
|
||||
* When clang modules are enabled, this header is a textual header. It ignores
|
||||
* its header guard so that multiple submodules can export its interfaces.
|
||||
* Take module SM with submodules A and B, whose headers both include stdarg.h
|
||||
* When SM.A builds, __STDARG_H will be defined. When SM.B builds, the
|
||||
* definition from SM.A will leak when building without local submodule
|
||||
* visibility. stdarg.h wouldn't include any of its implementation headers, and
|
||||
* SM.B wouldn't import any of the stdarg modules, and SM.B's `export *`
|
||||
* wouldn't export any stdarg interfaces as expected. However, since stdarg.h
|
||||
* ignores its header guard when building with modules, it all works as
|
||||
* expected.
|
||||
*
|
||||
* When clang modules are not enabled, the header guards can function in the
|
||||
* normal simple fashion.
|
||||
*/
|
||||
#if !defined(__STDARG_H) || __has_feature(modules) || \
|
||||
defined(__need___va_list) || defined(__need_va_list) || \
|
||||
defined(__need_va_arg) || defined(__need___va_copy) || \
|
||||
defined(__need_va_copy)
|
||||
|
||||
#ifndef __GNUC_VA_LIST
|
||||
#define __GNUC_VA_LIST
|
||||
typedef __builtin_va_list __gnuc_va_list;
|
||||
#endif
|
||||
|
||||
#ifdef __need___va_list
|
||||
#undef __need___va_list
|
||||
#else
|
||||
#if !defined(__need___va_list) && !defined(__need_va_list) && \
|
||||
!defined(__need_va_arg) && !defined(__need___va_copy) && \
|
||||
!defined(__need_va_copy)
|
||||
#define __STDARG_H
|
||||
#ifndef _VA_LIST
|
||||
typedef __builtin_va_list va_list;
|
||||
#define _VA_LIST
|
||||
#endif
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
/* C2x does not require the second parameter for va_start. */
|
||||
#define va_start(ap, ...) __builtin_va_start(ap, 0)
|
||||
#else
|
||||
/* Versions before C2x do require the second parameter. */
|
||||
#define va_start(ap, param) __builtin_va_start(ap, param)
|
||||
#endif
|
||||
#define va_end(ap) __builtin_va_end(ap)
|
||||
#define va_arg(ap, type) __builtin_va_arg(ap, type)
|
||||
|
||||
#define __need___va_list
|
||||
#define __need_va_list
|
||||
#define __need_va_arg
|
||||
#define __need___va_copy
|
||||
/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode
|
||||
* or -ansi is not specified, since it was not part of C90.
|
||||
*/
|
||||
#define __va_copy(d,s) __builtin_va_copy(d,s)
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(defined(__cplusplus) && __cplusplus >= 201103L) || \
|
||||
!defined(__STRICT_ANSI__)
|
||||
#define va_copy(dest, src) __builtin_va_copy(dest, src)
|
||||
#define __need_va_copy
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* __STDARG_H */
|
||||
#ifdef __need___va_list
|
||||
#include <__stdarg___gnuc_va_list.h>
|
||||
#undef __need___va_list
|
||||
#endif /* defined(__need___va_list) */
|
||||
|
||||
#endif /* not __STDARG_H */
|
||||
#ifdef __need_va_list
|
||||
#include <__stdarg_va_list.h>
|
||||
#undef __need_va_list
|
||||
#endif /* defined(__need_va_list) */
|
||||
|
||||
#ifdef __need_va_arg
|
||||
#include <__stdarg_va_arg.h>
|
||||
#undef __need_va_arg
|
||||
#endif /* defined(__need_va_arg) */
|
||||
|
||||
#ifdef __need___va_copy
|
||||
#include <__stdarg___va_copy.h>
|
||||
#undef __need___va_copy
|
||||
#endif /* defined(__need___va_copy) */
|
||||
|
||||
#ifdef __need_va_copy
|
||||
#include <__stdarg_va_copy.h>
|
||||
#undef __need_va_copy
|
||||
#endif /* defined(__need_va_copy) */
|
||||
|
||||
#endif
|
||||
|
||||
8
lib/include/stdatomic.h
vendored
8
lib/include/stdatomic.h
vendored
@ -45,16 +45,14 @@ extern "C" {
|
||||
#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE
|
||||
|
||||
/* 7.17.2 Initialization */
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L) || \
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L) || \
|
||||
defined(__cplusplus)
|
||||
/* ATOMIC_VAR_INIT was removed in C2x, but still remains in C++23. */
|
||||
/* ATOMIC_VAR_INIT was removed in C23, but still remains in C++23. */
|
||||
#define ATOMIC_VAR_INIT(value) (value)
|
||||
#endif
|
||||
|
||||
#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L && \
|
||||
__STDC_VERSION__ < 202000L) || \
|
||||
__STDC_VERSION__ < 202311L) || \
|
||||
(defined(__cplusplus) && __cplusplus >= 202002L)) && \
|
||||
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
|
||||
/* ATOMIC_VAR_INIT was deprecated in C17 and C++20. */
|
||||
|
||||
42
lib/include/stdckdint.h
vendored
Normal file
42
lib/include/stdckdint.h
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
/*===---- stdckdint.h - Standard header for checking integer----------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __STDCKDINT_H
|
||||
#define __STDCKDINT_H
|
||||
|
||||
/* If we're hosted, fall back to the system's stdckdint.h. FreeBSD, for
|
||||
* example, already has a Clang-compatible stdckdint.h header.
|
||||
*
|
||||
* The `stdckdint.h` header requires C 23 or newer.
|
||||
*/
|
||||
#if __STDC_HOSTED__ && __has_include_next(<stdckdint.h>)
|
||||
#include_next <stdckdint.h>
|
||||
#else
|
||||
|
||||
/* C23 7.20.1 Defines several macros for performing checked integer arithmetic*/
|
||||
|
||||
#define __STDC_VERSION_STDCKDINT_H__ 202311L
|
||||
|
||||
// Both A and B shall be any integer type other than "plain" char, bool, a bit-
|
||||
// precise integer type, or an enumerated type, and they need not be the same.
|
||||
|
||||
// R shall be a modifiable lvalue of any integer type other than "plain" char,
|
||||
// bool, a bit-precise integer type, or an enumerated type. It shouldn't be
|
||||
// short type, either. Otherwise, it may be unable to hold two the result of
|
||||
// operating two 'int's.
|
||||
|
||||
// A diagnostic message will be produced if A or B are not suitable integer
|
||||
// types, or if R is not a modifiable lvalue of a suitable integer type or R
|
||||
// is short type.
|
||||
#define ckd_add(R, A, B) __builtin_add_overflow((A), (B), (R))
|
||||
#define ckd_sub(R, A, B) __builtin_sub_overflow((A), (B), (R))
|
||||
#define ckd_mul(R, A, B) __builtin_mul_overflow((A), (B), (R))
|
||||
|
||||
#endif /* __STDC_HOSTED__ */
|
||||
#endif /* __STDCKDINT_H */
|
||||
166
lib/include/stddef.h
vendored
166
lib/include/stddef.h
vendored
@ -7,126 +7,116 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \
|
||||
defined(__need_size_t) || defined(__need_wchar_t) || \
|
||||
defined(__need_NULL) || defined(__need_wint_t)
|
||||
/*
|
||||
* This header is designed to be included multiple times. If any of the __need_
|
||||
* macros are defined, then only that subset of interfaces are provided. This
|
||||
* can be useful for POSIX headers that need to not expose all of stddef.h, but
|
||||
* need to use some of its interfaces. Otherwise this header provides all of
|
||||
* the expected interfaces.
|
||||
*
|
||||
* When clang modules are enabled, this header is a textual header. It ignores
|
||||
* its header guard so that multiple submodules can export its interfaces.
|
||||
* Take module SM with submodules A and B, whose headers both include stddef.h
|
||||
* When SM.A builds, __STDDEF_H will be defined. When SM.B builds, the
|
||||
* definition from SM.A will leak when building without local submodule
|
||||
* visibility. stddef.h wouldn't include any of its implementation headers, and
|
||||
* SM.B wouldn't import any of the stddef modules, and SM.B's `export *`
|
||||
* wouldn't export any stddef interfaces as expected. However, since stddef.h
|
||||
* ignores its header guard when building with modules, it all works as
|
||||
* expected.
|
||||
*
|
||||
* When clang modules are not enabled, the header guards can function in the
|
||||
* normal simple fashion.
|
||||
*/
|
||||
#if !defined(__STDDEF_H) || __has_feature(modules) || \
|
||||
(defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1) || \
|
||||
defined(__need_ptrdiff_t) || defined(__need_size_t) || \
|
||||
defined(__need_rsize_t) || defined(__need_wchar_t) || \
|
||||
defined(__need_NULL) || defined(__need_nullptr_t) || \
|
||||
defined(__need_unreachable) || defined(__need_max_align_t) || \
|
||||
defined(__need_offsetof) || defined(__need_wint_t)
|
||||
|
||||
#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \
|
||||
!defined(__need_wchar_t) && !defined(__need_NULL) && \
|
||||
!defined(__need_wint_t)
|
||||
/* Always define miscellaneous pieces when modules are available. */
|
||||
#if !__has_feature(modules)
|
||||
!defined(__need_rsize_t) && !defined(__need_wchar_t) && \
|
||||
!defined(__need_NULL) && !defined(__need_nullptr_t) && \
|
||||
!defined(__need_unreachable) && !defined(__need_max_align_t) && \
|
||||
!defined(__need_offsetof) && !defined(__need_wint_t)
|
||||
#define __STDDEF_H
|
||||
#endif
|
||||
#define __need_ptrdiff_t
|
||||
#define __need_size_t
|
||||
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
|
||||
* enabled. */
|
||||
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
|
||||
#define __need_rsize_t
|
||||
#endif
|
||||
#define __need_wchar_t
|
||||
#define __need_NULL
|
||||
#define __need_STDDEF_H_misc
|
||||
/* __need_wint_t is intentionally not defined here. */
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
|
||||
defined(__cplusplus)
|
||||
#define __need_nullptr_t
|
||||
#endif
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
#define __need_unreachable
|
||||
#endif
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
|
||||
(defined(__cplusplus) && __cplusplus >= 201103L)
|
||||
#define __need_max_align_t
|
||||
#endif
|
||||
#define __need_offsetof
|
||||
/* wint_t is provided by <wchar.h> and not <stddef.h>. It's here
|
||||
* for compatibility, but must be explicitly requested. Therefore
|
||||
* __need_wint_t is intentionally not defined here. */
|
||||
#endif
|
||||
|
||||
#if defined(__need_ptrdiff_t)
|
||||
#if !defined(_PTRDIFF_T) || __has_feature(modules)
|
||||
/* Always define ptrdiff_t when modules are available. */
|
||||
#if !__has_feature(modules)
|
||||
#define _PTRDIFF_T
|
||||
#endif
|
||||
typedef __PTRDIFF_TYPE__ ptrdiff_t;
|
||||
#endif
|
||||
#include <__stddef_ptrdiff_t.h>
|
||||
#undef __need_ptrdiff_t
|
||||
#endif /* defined(__need_ptrdiff_t) */
|
||||
|
||||
#if defined(__need_size_t)
|
||||
#if !defined(_SIZE_T) || __has_feature(modules)
|
||||
/* Always define size_t when modules are available. */
|
||||
#if !__has_feature(modules)
|
||||
#define _SIZE_T
|
||||
#endif
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
#endif
|
||||
#include <__stddef_size_t.h>
|
||||
#undef __need_size_t
|
||||
#endif /*defined(__need_size_t) */
|
||||
|
||||
#if defined(__need_STDDEF_H_misc)
|
||||
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
|
||||
* enabled. */
|
||||
#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \
|
||||
!defined(_RSIZE_T)) || __has_feature(modules)
|
||||
/* Always define rsize_t when modules are available. */
|
||||
#if !__has_feature(modules)
|
||||
#define _RSIZE_T
|
||||
#endif
|
||||
typedef __SIZE_TYPE__ rsize_t;
|
||||
#endif
|
||||
#endif /* defined(__need_STDDEF_H_misc) */
|
||||
#if defined(__need_rsize_t)
|
||||
#include <__stddef_rsize_t.h>
|
||||
#undef __need_rsize_t
|
||||
#endif /* defined(__need_rsize_t) */
|
||||
|
||||
#if defined(__need_wchar_t)
|
||||
#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
|
||||
/* Always define wchar_t when modules are available. */
|
||||
#if !defined(_WCHAR_T) || __has_feature(modules)
|
||||
#if !__has_feature(modules)
|
||||
#define _WCHAR_T
|
||||
#if defined(_MSC_EXTENSIONS)
|
||||
#define _WCHAR_T_DEFINED
|
||||
#endif
|
||||
#endif
|
||||
typedef __WCHAR_TYPE__ wchar_t;
|
||||
#endif
|
||||
#endif
|
||||
#include <__stddef_wchar_t.h>
|
||||
#undef __need_wchar_t
|
||||
#endif /* defined(__need_wchar_t) */
|
||||
|
||||
#if defined(__need_NULL)
|
||||
#undef NULL
|
||||
#ifdef __cplusplus
|
||||
# if !defined(__MINGW32__) && !defined(_MSC_VER)
|
||||
# define NULL __null
|
||||
# else
|
||||
# define NULL 0
|
||||
# endif
|
||||
#else
|
||||
# define NULL ((void*)0)
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
|
||||
namespace std { typedef decltype(nullptr) nullptr_t; }
|
||||
using ::std::nullptr_t;
|
||||
#endif
|
||||
#endif
|
||||
#include <__stddef_null.h>
|
||||
#undef __need_NULL
|
||||
#endif /* defined(__need_NULL) */
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
typedef typeof(nullptr) nullptr_t;
|
||||
#endif /* defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L */
|
||||
#if defined(__need_nullptr_t)
|
||||
#include <__stddef_nullptr_t.h>
|
||||
#undef __need_nullptr_t
|
||||
#endif /* defined(__need_nullptr_t) */
|
||||
|
||||
#if defined(__need_STDDEF_H_misc) && defined(__STDC_VERSION__) && \
|
||||
__STDC_VERSION__ >= 202000L
|
||||
#define unreachable() __builtin_unreachable()
|
||||
#endif /* defined(__need_STDDEF_H_misc) && >= C23 */
|
||||
#if defined(__need_unreachable)
|
||||
#include <__stddef_unreachable.h>
|
||||
#undef __need_unreachable
|
||||
#endif /* defined(__need_unreachable) */
|
||||
|
||||
#if defined(__need_STDDEF_H_misc)
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \
|
||||
(defined(__cplusplus) && __cplusplus >= 201103L)
|
||||
#include "__stddef_max_align_t.h"
|
||||
#endif
|
||||
#define offsetof(t, d) __builtin_offsetof(t, d)
|
||||
#undef __need_STDDEF_H_misc
|
||||
#endif /* defined(__need_STDDEF_H_misc) */
|
||||
#if defined(__need_max_align_t)
|
||||
#include <__stddef_max_align_t.h>
|
||||
#undef __need_max_align_t
|
||||
#endif /* defined(__need_max_align_t) */
|
||||
|
||||
#if defined(__need_offsetof)
|
||||
#include <__stddef_offsetof.h>
|
||||
#undef __need_offsetof
|
||||
#endif /* defined(__need_offsetof) */
|
||||
|
||||
/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
|
||||
__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
|
||||
#if defined(__need_wint_t)
|
||||
/* Always define wint_t when modules are available. */
|
||||
#if !defined(_WINT_T) || __has_feature(modules)
|
||||
#if !__has_feature(modules)
|
||||
#define _WINT_T
|
||||
#endif
|
||||
typedef __WINT_TYPE__ wint_t;
|
||||
#endif
|
||||
#include <__stddef_wint_t.h>
|
||||
#undef __need_wint_t
|
||||
#endif /* __need_wint_t */
|
||||
|
||||
|
||||
67
lib/include/stdint.h
vendored
67
lib/include/stdint.h
vendored
@ -499,9 +499,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# define INT64_MAX INT64_C( 9223372036854775807)
|
||||
# define INT64_MIN (-INT64_C( 9223372036854775807)-1)
|
||||
# define UINT64_MAX UINT64_C(18446744073709551615)
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT64_WIDTH 64
|
||||
# define INT64_WIDTH UINT64_WIDTH
|
||||
|
||||
@ -545,9 +544,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# define INT_FAST64_MAX __INT_LEAST64_MAX
|
||||
# define UINT_FAST64_MAX __UINT_LEAST64_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT_LEAST64_WIDTH __UINT_LEAST64_WIDTH
|
||||
# define INT_LEAST64_WIDTH UINT_LEAST64_WIDTH
|
||||
# define UINT_FAST64_WIDTH __UINT_LEAST64_WIDTH
|
||||
@ -586,9 +583,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# undef __UINT_LEAST8_MAX
|
||||
# define __UINT_LEAST8_MAX UINT56_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT56_WIDTH 56
|
||||
# define INT56_WIDTH UINT56_WIDTH
|
||||
# define UINT_LEAST56_WIDTH UINT56_WIDTH
|
||||
@ -635,9 +630,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# undef __UINT_LEAST8_MAX
|
||||
# define __UINT_LEAST8_MAX UINT48_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
#define UINT48_WIDTH 48
|
||||
#define INT48_WIDTH UINT48_WIDTH
|
||||
#define UINT_LEAST48_WIDTH UINT48_WIDTH
|
||||
@ -684,9 +677,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# undef __UINT_LEAST8_MAX
|
||||
# define __UINT_LEAST8_MAX UINT40_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT40_WIDTH 40
|
||||
# define INT40_WIDTH UINT40_WIDTH
|
||||
# define UINT_LEAST40_WIDTH UINT40_WIDTH
|
||||
@ -727,9 +718,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# undef __UINT_LEAST8_MAX
|
||||
# define __UINT_LEAST8_MAX UINT32_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT32_WIDTH 32
|
||||
# define INT32_WIDTH UINT32_WIDTH
|
||||
# undef __UINT_LEAST32_WIDTH
|
||||
@ -749,9 +738,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# define INT_FAST32_MAX __INT_LEAST32_MAX
|
||||
# define UINT_FAST32_MAX __UINT_LEAST32_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT_LEAST32_WIDTH __UINT_LEAST32_WIDTH
|
||||
# define INT_LEAST32_WIDTH UINT_LEAST32_WIDTH
|
||||
# define UINT_FAST32_WIDTH __UINT_LEAST32_WIDTH
|
||||
@ -784,9 +771,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# undef __UINT_LEAST8_MAX
|
||||
# define __UINT_LEAST8_MAX UINT24_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT24_WIDTH 24
|
||||
# define INT24_WIDTH UINT24_WIDTH
|
||||
# define UINT_LEAST24_WIDTH UINT24_WIDTH
|
||||
@ -819,9 +804,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# undef __UINT_LEAST8_MAX
|
||||
# define __UINT_LEAST8_MAX UINT16_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT16_WIDTH 16
|
||||
# define INT16_WIDTH UINT16_WIDTH
|
||||
# undef __UINT_LEAST16_WIDTH
|
||||
@ -839,9 +822,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# define INT_FAST16_MAX __INT_LEAST16_MAX
|
||||
# define UINT_FAST16_MAX __UINT_LEAST16_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT_LEAST16_WIDTH __UINT_LEAST16_WIDTH
|
||||
# define INT_LEAST16_WIDTH UINT_LEAST16_WIDTH
|
||||
# define UINT_FAST16_WIDTH __UINT_LEAST16_WIDTH
|
||||
@ -862,9 +843,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# undef __UINT_LEAST8_MAX
|
||||
# define __UINT_LEAST8_MAX UINT8_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT8_WIDTH 8
|
||||
# define INT8_WIDTH UINT8_WIDTH
|
||||
# undef __UINT_LEAST8_WIDTH
|
||||
@ -880,9 +859,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
# define INT_FAST8_MAX __INT_LEAST8_MAX
|
||||
# define UINT_FAST8_MAX __UINT_LEAST8_MAX
|
||||
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
# define UINT_LEAST8_WIDTH __UINT_LEAST8_WIDTH
|
||||
# define INT_LEAST8_WIDTH UINT_LEAST8_WIDTH
|
||||
# define UINT_FAST8_WIDTH __UINT_LEAST8_WIDTH
|
||||
@ -907,10 +884,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
#define PTRDIFF_MAX __PTRDIFF_MAX__
|
||||
#define SIZE_MAX __SIZE_MAX__
|
||||
|
||||
/* C2x 7.20.2.4 Width of integer types capable of holding object pointers. */
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
/* C23 7.22.2.4 Width of integer types capable of holding object pointers. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
/* NB: The C standard requires that these be the same value, but the compiler
|
||||
exposes separate internal width macros. */
|
||||
#define INTPTR_WIDTH __INTPTR_WIDTH__
|
||||
@ -928,10 +903,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
#define INTMAX_MAX __INTMAX_MAX__
|
||||
#define UINTMAX_MAX __UINTMAX_MAX__
|
||||
|
||||
/* C2x 7.20.2.5 Width of greatest-width integer types. */
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
/* C23 7.22.2.5 Width of greatest-width integer types. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
/* NB: The C standard requires that these be the same value, but the compiler
|
||||
exposes separate internal width macros. */
|
||||
#define INTMAX_WIDTH __INTMAX_WIDTH__
|
||||
@ -964,10 +937,8 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
|
||||
#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)
|
||||
|
||||
/* C2x 7.20.3.x Width of other integer types. */
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
/* C23 7.22.3.x Width of other integer types. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
#define PTRDIFF_WIDTH __PTRDIFF_WIDTH__
|
||||
#define SIG_ATOMIC_WIDTH __SIG_ATOMIC_WIDTH__
|
||||
#define SIZE_WIDTH __SIZE_WIDTH__
|
||||
|
||||
4
lib/include/stdnoreturn.h
vendored
4
lib/include/stdnoreturn.h
vendored
@ -15,8 +15,8 @@
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \
|
||||
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
|
||||
/* The noreturn macro is deprecated in C2x. We do not mark it as such because
|
||||
including the header file in C2x is also deprecated and we do not want to
|
||||
/* The noreturn macro is deprecated in C23. We do not mark it as such because
|
||||
including the header file in C23 is also deprecated and we do not want to
|
||||
issue a confusing diagnostic for code which includes <stdnoreturn.h>
|
||||
followed by code that writes [[noreturn]]. The issue with such code is not
|
||||
with the attribute, or the use of 'noreturn', but the inclusion of the
|
||||
|
||||
9
lib/include/tmmintrin.h
vendored
9
lib/include/tmmintrin.h
vendored
@ -17,8 +17,13 @@
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
|
||||
#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("ssse3,no-evex512"), __min_vector_width__(64)))
|
||||
#define __DEFAULT_FN_ATTRS_MMX \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("mmx,ssse3,no-evex512"), \
|
||||
__min_vector_width__(64)))
|
||||
|
||||
/// Computes the absolute value of each of the packed 8-bit signed
|
||||
/// integers in the source operand and stores the 8-bit unsigned integer
|
||||
|
||||
51
lib/include/usermsrintrin.h
vendored
Normal file
51
lib/include/usermsrintrin.h
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
/*===--------------- usermsrintrin.h - USERMSR intrinsics -----------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __X86GPRINTRIN_H
|
||||
#error "Never use <usermsrintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif // __X86GPRINTRIN_H
|
||||
|
||||
#ifndef __USERMSRINTRIN_H
|
||||
#define __USERMSRINTRIN_H
|
||||
#ifdef __x86_64__
|
||||
|
||||
/// Reads the contents of a 64-bit MSR specified in \a __A into \a dst.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> URDMSR </c> instruction.
|
||||
/// \param __A
|
||||
/// An unsigned long long.
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// DEST := MSR[__A]
|
||||
/// \endcode
|
||||
static __inline__ unsigned long long
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
|
||||
_urdmsr(unsigned long long __A) {
|
||||
return __builtin_ia32_urdmsr(__A);
|
||||
}
|
||||
|
||||
/// Writes the contents of \a __B into the 64-bit MSR specified in \a __A.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> UWRMSR </c> instruction.
|
||||
///
|
||||
/// \param __A
|
||||
/// An unsigned long long.
|
||||
/// \param __B
|
||||
/// An unsigned long long.
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// MSR[__A] := __B
|
||||
/// \endcode
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
|
||||
_uwrmsr(unsigned long long __A, unsigned long long __B) {
|
||||
return __builtin_ia32_uwrmsr(__A, __B);
|
||||
}
|
||||
|
||||
#endif // __x86_64__
|
||||
#endif // __USERMSRINTRIN_H
|
||||
6
lib/include/vaesintrin.h
vendored
6
lib/include/vaesintrin.h
vendored
@ -18,8 +18,10 @@
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256)))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS_F \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512f,evex512,vaes"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user