Merge branch 'llvm18'

Upgrades the LLVM, Clang, and LLD dependencies to LLVM 18.x

Related to #16270
This commit is contained in:
Andrew Kelley 2024-05-09 01:52:26 -07:00
commit bcb534c295
1107 changed files with 108768 additions and 112905 deletions

View File

@ -4,6 +4,7 @@ on:
push:
branches:
- master
- llvm18
concurrency:
# Cancels pending runs when a PR gets updated.
group: ${{ github.head_ref || github.run_id }}-${{ github.actor }}

View File

@ -140,9 +140,9 @@ else()
set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig")
endif()
find_package(llvm 17)
find_package(clang 17)
find_package(lld 17)
find_package(llvm 18)
find_package(clang 18)
find_package(lld 18)
if(ZIG_STATIC_ZLIB)
if (MSVC)
@ -526,42 +526,54 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/lib/std/zig/system/x86.zig"
"${CMAKE_SOURCE_DIR}/lib/std/zig/tokenizer.zig"
"${CMAKE_SOURCE_DIR}/src/Air.zig"
"${CMAKE_SOURCE_DIR}/src/Builtin.zig"
"${CMAKE_SOURCE_DIR}/src/Compilation.zig"
"${CMAKE_SOURCE_DIR}/src/Compilation/Config.zig"
"${CMAKE_SOURCE_DIR}/src/DarwinPosixSpawn.zig"
"${CMAKE_SOURCE_DIR}/src/InternPool.zig"
"${CMAKE_SOURCE_DIR}/src/Liveness.zig"
"${CMAKE_SOURCE_DIR}/src/Liveness/Verify.zig"
"${CMAKE_SOURCE_DIR}/src/Module.zig"
"${CMAKE_SOURCE_DIR}/src/Package.zig"
"${CMAKE_SOURCE_DIR}/src/Package/Fetch.zig"
"${CMAKE_SOURCE_DIR}/src/Package/Fetch/git.zig"
"${CMAKE_SOURCE_DIR}/src/Package/Manifest.zig"
"${CMAKE_SOURCE_DIR}/src/Package/Module.zig"
"${CMAKE_SOURCE_DIR}/src/RangeSet.zig"
"${CMAKE_SOURCE_DIR}/src/Sema.zig"
"${CMAKE_SOURCE_DIR}/src/Sema/bitcast.zig"
"${CMAKE_SOURCE_DIR}/src/Sema/comptime_ptr_access.zig"
"${CMAKE_SOURCE_DIR}/src/Value.zig"
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/CodeGen.zig"
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/Emit.zig"
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/Mir.zig"
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/abi.zig"
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/arm/CodeGen.zig"
"${CMAKE_SOURCE_DIR}/src/arch/arm/Emit.zig"
"${CMAKE_SOURCE_DIR}/src/arch/arm/Mir.zig"
"${CMAKE_SOURCE_DIR}/src/arch/arm/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/arm/abi.zig"
"${CMAKE_SOURCE_DIR}/src/arch/arm/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/CodeGen.zig"
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/Emit.zig"
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/Mir.zig"
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/abi.zig"
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/CodeGen.zig"
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/Emit.zig"
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/Mir.zig"
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/abi.zig"
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/wasm/CodeGen.zig"
"${CMAKE_SOURCE_DIR}/src/arch/wasm/Emit.zig"
"${CMAKE_SOURCE_DIR}/src/arch/wasm/Mir.zig"
"${CMAKE_SOURCE_DIR}/src/arch/wasm/abi.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86/bits.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/CodeGen.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Disassembler.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Emit.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Encoding.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Lower.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Mir.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/abi.zig"
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/bits.zig"
@ -574,7 +586,17 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/codegen/c.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/c/Type.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/llvm.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/BitcodeReader.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/Builder.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/bindings.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/bitcode_writer.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/ir.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/spirv.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/Assembler.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/Module.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/Section.zig"
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/spec.zig"
"${CMAKE_SOURCE_DIR}/src/crash_report.zig"
"${CMAKE_SOURCE_DIR}/src/glibc.zig"
"${CMAKE_SOURCE_DIR}/src/introspect.zig"
"${CMAKE_SOURCE_DIR}/src/libcxx.zig"
@ -586,7 +608,9 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/Coff/Atom.zig"
"${CMAKE_SOURCE_DIR}/src/link/Coff/ImportTable.zig"
"${CMAKE_SOURCE_DIR}/src/link/Coff/Object.zig"
"${CMAKE_SOURCE_DIR}/src/link/Coff/Relocation.zig"
"${CMAKE_SOURCE_DIR}/src/link/Coff/lld.zig"
"${CMAKE_SOURCE_DIR}/src/link/Dwarf.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/Archive.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/Atom.zig"
@ -599,6 +623,7 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/Elf/eh_frame.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/file.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/gc.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/merge_section.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/relocatable.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/relocation.zig"
"${CMAKE_SOURCE_DIR}/src/link/Elf/synthetic_sections.zig"
@ -617,9 +642,9 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/UnwindInfo.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/ZigObject.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Rebase.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Trie.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/eh_frame.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/file.zig"
@ -629,15 +654,32 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/synthetic.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"
"${CMAKE_SOURCE_DIR}/src/link/NvPtx.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
"${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin"
"${CMAKE_SOURCE_DIR}/src/link/SpirV.zig"
"${CMAKE_SOURCE_DIR}/src/link/SpirV/BinaryModule.zig"
"${CMAKE_SOURCE_DIR}/src/link/SpirV/deduplicate.zig"
"${CMAKE_SOURCE_DIR}/src/link/SpirV/lower_invocation_globals.zig"
"${CMAKE_SOURCE_DIR}/src/link/SpirV/prune_unused.zig"
"${CMAKE_SOURCE_DIR}/src/link/StringTable.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Archive.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Atom.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Object.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Symbol.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm/ZigObject.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm/file.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm/types.zig"
"${CMAKE_SOURCE_DIR}/src/link/aarch64.zig"
"${CMAKE_SOURCE_DIR}/src/link/riscv.zig"
"${CMAKE_SOURCE_DIR}/src/link/table_section.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse/test.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/yaml.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/yaml/test.zig"
"${CMAKE_SOURCE_DIR}/src/main.zig"
"${CMAKE_SOURCE_DIR}/src/mingw.zig"
"${CMAKE_SOURCE_DIR}/src/musl.zig"
@ -685,7 +727,7 @@ if(MSVC)
set(EXE_LDFLAGS "${EXE_LDFLAGS} /debug:fastlink")
endif()
else()
set(EXE_CXX_FLAGS "-std=c++17 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -Werror=type-limits -Wno-missing-braces -Wno-comment")
set(EXE_CXX_FLAGS "-std=c++17 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -Wno-type-limits -Wno-missing-braces -Wno-comment")
set(EXE_LDFLAGS " ")
if(MINGW)
set(EXE_CXX_FLAGS "${EXE_CXX_FLAGS} -Wno-format")
@ -866,9 +908,9 @@ target_include_directories(zig2 PUBLIC "${CMAKE_SOURCE_DIR}/stage1")
target_link_libraries(zig2 LINK_PUBLIC zigcpp)
if(MSVC)
target_link_libraries(zig2 LINK_PUBLIC ntdll.lib)
target_link_libraries(zig2 LINK_PUBLIC ntdll.lib ws2_32.lib)
elseif(MINGW)
target_link_libraries(zig2 LINK_PUBLIC ntdll)
target_link_libraries(zig2 LINK_PUBLIC ntdll ws2_32)
endif()
if(NOT MSVC)

View File

@ -334,6 +334,9 @@ pub fn build(b: *std.Build) !void {
}
if (target.result.os.tag == .windows) {
inline for (.{ exe, check_case_exe }) |artifact| {
// LLVM depends on networking as of version 18.
artifact.linkSystemLibrary("ws2_32");
artifact.linkSystemLibrary("version");
artifact.linkSystemLibrary("uuid");
artifact.linkSystemLibrary("ole32");
@ -650,7 +653,7 @@ const exe_cflags = [_][]const u8{
"-fvisibility-inlines-hidden",
"-fno-exceptions",
"-fno-rtti",
"-Werror=type-limits",
"-Wno-type-limits",
"-Wno-missing-braces",
"-Wno-comment",
};
@ -1039,6 +1042,7 @@ const clang_libs = [_][]const u8{
"clangAST",
"clangParse",
"clangSema",
"clangAPINotes",
"clangBasic",
"clangEdit",
"clangLex",
@ -1068,6 +1072,7 @@ const llvm_libs = [_][]const u8{
"LLVMXRay",
"LLVMLibDriver",
"LLVMDlltoolDriver",
"LLVMTextAPIBinaryReader",
"LLVMCoverage",
"LLVMLineEditor",
"LLVMXCoreDisassembler",
@ -1169,6 +1174,7 @@ const llvm_libs = [_][]const u8{
"LLVMAArch64Desc",
"LLVMAArch64Utils",
"LLVMAArch64Info",
"LLVMOrcDebugging",
"LLVMOrcJIT",
"LLVMWindowsDriver",
"LLVMMCJIT",
@ -1188,6 +1194,7 @@ const llvm_libs = [_][]const u8{
"LLVMMCDisassembler",
"LLVMLTO",
"LLVMPasses",
"LLVMHipStdPar",
"LLVMCFGuard",
"LLVMCoroutines",
"LLVMipo",
@ -1195,10 +1202,13 @@ const llvm_libs = [_][]const u8{
"LLVMLinker",
"LLVMInstrumentation",
"LLVMFrontendOpenMP",
"LLVMFrontendOffloading",
"LLVMFrontendOpenACC",
"LLVMFrontendHLSL",
"LLVMFrontendDriver",
"LLVMExtensions",
"LLVMDWARFLinkerParallel",
"LLVMDWARFLinkerClassic",
"LLVMDWARFLinker",
"LLVMGlobalISel",
"LLVMMIRParser",

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -9,10 +9,16 @@ set -e
ZIGDIR="$PWD"
TARGET="$ARCH-macos-none"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.467+0345d7866"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
PREFIX="$HOME/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"
if [ ! -d "$PREFIX" ]; then
cd $HOME
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
tar xf "$CACHE_BASENAME.tar.xz"
fi
cd $ZIGDIR
# Make the `zig version` number consistent.

View File

@ -9,10 +9,16 @@ set -e
ZIGDIR="$PWD"
TARGET="$ARCH-macos-none"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.467+0345d7866"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
PREFIX="$HOME/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"
if [ ! -d "$PREFIX" ]; then
cd $HOME
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
tar xf "$CACHE_BASENAME.tar.xz"
fi
cd $ZIGDIR
# Make the `zig version` number consistent.

View File

@ -1,5 +1,5 @@
$TARGET = "$($Env:ARCH)-windows-gnu"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.12.0-dev.2087+e9a18010b"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
$MCPU = "baseline"
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
$PREFIX_PATH = "$(Get-Location)\..\$ZIG_LLVM_CLANG_LLD_NAME"

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -8,7 +8,7 @@ set -e
ARCH="$(uname -m)"
TARGET="$ARCH-linux-musl"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
PREFIX="$HOME/deps/$CACHE_BASENAME"
ZIG="$PREFIX/bin/zig"

View File

@ -6,18 +6,17 @@ set -e
ZIGDIR="$PWD"
TARGET="$ARCH-macos-none"
MCPU="baseline"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.467+0345d7866"
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
PREFIX="$HOME/$CACHE_BASENAME"
JOBS="-j3"
rm -rf $PREFIX
cd $HOME
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
tar xf "$CACHE_BASENAME.tar.xz"
ZIG="$PREFIX/bin/zig"
if [ ! -d "$PREFIX" ]; then
cd $HOME
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
tar xf "$CACHE_BASENAME.tar.xz"
fi
cd $ZIGDIR
# Make the `zig version` number consistent.

View File

@ -1,10 +1,20 @@
$TARGET = "$($Env:ARCH)-windows-gnu"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.12.0-dev.2073+402fe565a"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
$MCPU = "baseline"
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"
$ZIG = "$PREFIX_PATH\bin\zig.exe"
$ZIG_LIB_DIR = "$(Get-Location)\lib"
if (!(Test-Path "$PREFIX_PATH.zip")) {
Write-Output "Downloading $ZIG_LLVM_CLANG_LLD_URL"
Invoke-WebRequest -Uri "$ZIG_LLVM_CLANG_LLD_URL" -OutFile "$PREFIX_PATH.zip"
Write-Output "Extracting..."
Add-Type -AssemblyName System.IO.Compression.FileSystem ;
[System.IO.Compression.ZipFile]::ExtractToDirectory("$PREFIX_PATH.zip", "$PREFIX_PATH\..")
}
function CheckLastExitCode {
if (!$?) {
exit 1
@ -25,6 +35,12 @@ Remove-Item -Path 'build-debug' -Recurse -Force -ErrorAction Ignore
New-Item -Path 'build-debug' -ItemType Directory
Set-Location -Path 'build-debug'
# Override the cache directories because they won't actually help other CI runs
# which will be testing alternate versions of zig, and ultimately would just
# fill up space on the hard drive for no reason.
$Env:ZIG_GLOBAL_CACHE_DIR="$(Get-Location)\zig-global-cache"
$Env:ZIG_LOCAL_CACHE_DIR="$(Get-Location)\zig-local-cache"
# CMake gives a syntax error when file paths with backward slashes are used.
# Here, we use forward slashes only to work around this.
& cmake .. `

View File

@ -1,10 +1,20 @@
$TARGET = "$($Env:ARCH)-windows-gnu"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.12.0-dev.2073+402fe565a"
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
$MCPU = "baseline"
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"
$ZIG = "$PREFIX_PATH\bin\zig.exe"
$ZIG_LIB_DIR = "$(Get-Location)\lib"
if (!(Test-Path "$PREFIX_PATH.zip")) {
Write-Output "Downloading $ZIG_LLVM_CLANG_LLD_URL"
Invoke-WebRequest -Uri "$ZIG_LLVM_CLANG_LLD_URL" -OutFile "$PREFIX_PATH.zip"
Write-Output "Extracting..."
Add-Type -AssemblyName System.IO.Compression.FileSystem ;
[System.IO.Compression.ZipFile]::ExtractToDirectory("$PREFIX_PATH.zip", "$PREFIX_PATH\..")
}
function CheckLastExitCode {
if (!$?) {
exit 1
@ -25,6 +35,12 @@ Remove-Item -Path 'build-release' -Recurse -Force -ErrorAction Ignore
New-Item -Path 'build-release' -ItemType Directory
Set-Location -Path 'build-release'
# Override the cache directories because they won't actually help other CI runs
# which will be testing alternate versions of zig, and ultimately would just
# fill up space on the hard drive for no reason.
$Env:ZIG_GLOBAL_CACHE_DIR="$(Get-Location)\zig-global-cache"
$Env:ZIG_LOCAL_CACHE_DIR="$(Get-Location)\zig-local-cache"
# CMake gives a syntax error when file paths with backward slashes are used.
# Here, we use forward slashes only to work around this.
& cmake .. `

View File

@ -17,9 +17,9 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
if(${LLVM_LINK_MODE} STREQUAL "shared")
find_library(CLANG_LIBRARIES
NAMES
libclang-cpp.so.17
clang-cpp-17.0
clang-cpp170
libclang-cpp.so.18
clang-cpp-18.0
clang-cpp180
clang-cpp
NAMES_PER_DIR
HINTS "${LLVM_LIBDIRS}"
@ -55,6 +55,7 @@ else()
FIND_AND_ADD_CLANG_LIB(clangAST)
FIND_AND_ADD_CLANG_LIB(clangParse)
FIND_AND_ADD_CLANG_LIB(clangSema)
FIND_AND_ADD_CLANG_LIB(clangAPINotes)
FIND_AND_ADD_CLANG_LIB(clangBasic)
FIND_AND_ADD_CLANG_LIB(clangEdit)
FIND_AND_ADD_CLANG_LIB(clangLex)

View File

@ -9,21 +9,21 @@
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
HINTS ${LLVM_INCLUDE_DIRS}
PATHS
/usr/lib/llvm-17/include
/usr/local/llvm170/include
/usr/local/llvm17/include
/usr/local/opt/llvm@17/include
/opt/homebrew/opt/llvm@17/include
/usr/lib/llvm-18/include
/usr/local/llvm180/include
/usr/local/llvm18/include
/usr/local/opt/llvm@18/include
/opt/homebrew/opt/llvm@18/include
/mingw64/include)
find_library(LLD_LIBRARY NAMES lld-17.0 lld170 lld NAMES_PER_DIR
find_library(LLD_LIBRARY NAMES lld-18.0 lld180 lld NAMES_PER_DIR
HINTS ${LLVM_LIBDIRS}
PATHS
/usr/lib/llvm-17/lib
/usr/local/llvm170/lib
/usr/local/llvm17/lib
/usr/local/opt/llvm@17/lib
/opt/homebrew/opt/llvm@17/lib
/usr/lib/llvm-18/lib
/usr/local/llvm180/lib
/usr/local/llvm18/lib
/usr/local/opt/llvm@18/lib
/opt/homebrew/opt/llvm@18/lib
)
if(EXISTS ${LLD_LIBRARY})
set(LLD_LIBRARIES ${LLD_LIBRARY})
@ -34,11 +34,11 @@ else()
HINTS ${LLVM_LIBDIRS}
PATHS
${LLD_LIBDIRS}
/usr/lib/llvm-17/lib
/usr/local/llvm170/lib
/usr/local/llvm17/lib
/usr/local/opt/llvm@17/lib
/opt/homebrew/opt/llvm@17/lib
/usr/lib/llvm-18/lib
/usr/local/llvm180/lib
/usr/local/llvm18/lib
/usr/local/opt/llvm@18/lib
/opt/homebrew/opt/llvm@18/lib
/mingw64/lib
/c/msys64/mingw64/lib
c:/msys64/mingw64/lib)

View File

@ -14,12 +14,12 @@ if(ZIG_USE_LLVM_CONFIG)
while(1)
unset(LLVM_CONFIG_EXE CACHE)
find_program(LLVM_CONFIG_EXE
NAMES llvm-config-17 llvm-config-17.0 llvm-config170 llvm-config17 llvm-config NAMES_PER_DIR
NAMES llvm-config-18 llvm-config-18.0 llvm-config180 llvm-config18 llvm-config NAMES_PER_DIR
PATHS
"/mingw64/bin"
"/c/msys64/mingw64/bin"
"c:/msys64/mingw64/bin"
"C:/Libraries/llvm-17.0.0/bin")
"C:/Libraries/llvm-18.0.0/bin")
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "")
@ -37,9 +37,9 @@ if(ZIG_USE_LLVM_CONFIG)
OUTPUT_STRIP_TRAILING_WHITESPACE)
get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY)
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 17 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 18 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 18)
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 18 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 19 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 19)
# Save the error message, in case this is the last llvm-config we find
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 17.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 18.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
# Ignore this directory and try the search again
list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}")
@ -63,9 +63,9 @@ if(ZIG_USE_LLVM_CONFIG)
if (LLVM_CONFIG_ERROR)
# Save the error message, in case this is the last llvm-config we find
if (ZIG_SHARED_LLVM)
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 17.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
else()
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 17.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
endif()
# Ignore this directory and try the search again
@ -195,6 +195,7 @@ else()
FIND_AND_ADD_LLVM_LIB(LLVMXRay)
FIND_AND_ADD_LLVM_LIB(LLVMLibDriver)
FIND_AND_ADD_LLVM_LIB(LLVMDlltoolDriver)
FIND_AND_ADD_LLVM_LIB(LLVMTextAPIBinaryReader)
FIND_AND_ADD_LLVM_LIB(LLVMCoverage)
FIND_AND_ADD_LLVM_LIB(LLVMLineEditor)
FIND_AND_ADD_LLVM_LIB(LLVMXCoreDisassembler)
@ -296,6 +297,7 @@ else()
FIND_AND_ADD_LLVM_LIB(LLVMAArch64Desc)
FIND_AND_ADD_LLVM_LIB(LLVMAArch64Utils)
FIND_AND_ADD_LLVM_LIB(LLVMAArch64Info)
FIND_AND_ADD_LLVM_LIB(LLVMOrcDebugging)
FIND_AND_ADD_LLVM_LIB(LLVMOrcJIT)
FIND_AND_ADD_LLVM_LIB(LLVMWindowsDriver)
FIND_AND_ADD_LLVM_LIB(LLVMMCJIT)
@ -315,6 +317,7 @@ else()
FIND_AND_ADD_LLVM_LIB(LLVMMCDisassembler)
FIND_AND_ADD_LLVM_LIB(LLVMLTO)
FIND_AND_ADD_LLVM_LIB(LLVMPasses)
FIND_AND_ADD_LLVM_LIB(LLVMHipStdPar)
FIND_AND_ADD_LLVM_LIB(LLVMCFGuard)
FIND_AND_ADD_LLVM_LIB(LLVMCoroutines)
FIND_AND_ADD_LLVM_LIB(LLVMipo)
@ -322,10 +325,13 @@ else()
FIND_AND_ADD_LLVM_LIB(LLVMLinker)
FIND_AND_ADD_LLVM_LIB(LLVMInstrumentation)
FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenMP)
FIND_AND_ADD_LLVM_LIB(LLVMFrontendOffloading)
FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenACC)
FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL)
FIND_AND_ADD_LLVM_LIB(LLVMFrontendDriver)
FIND_AND_ADD_LLVM_LIB(LLVMExtensions)
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerParallel)
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerClassic)
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker)
FIND_AND_ADD_LLVM_LIB(LLVMGlobalISel)
FIND_AND_ADD_LLVM_LIB(LLVMMIRParser)

View File

@ -1116,7 +1116,7 @@ pub fn alignof(ty: Type, comp: *const Compilation) u29 {
.bit_int => @min(
std.math.ceilPowerOfTwoPromote(u16, (ty.data.int.bits + 7) / 8),
comp.target.maxIntAlignment(),
16, // comp.target.maxIntAlignment(), please use your own logic for this value as it is implementation-defined
),
.float => comp.target.c_type_alignment(.float),

View File

@ -502,8 +502,8 @@ __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
__DEVICE__ int __popc(unsigned int __a) { return __nv_popc(__a); }
__DEVICE__ int __popcll(unsigned long long __a) { return __nv_popcll(__a); }
__DEVICE__ float __powf(float __a, float __b) {
return __nv_fast_powf(__a, __b);
}

View File

@ -285,8 +285,8 @@ __DEVICE__ double __nv_normcdfinv(double __a);
__DEVICE__ float __nv_normcdfinvf(float __a);
__DEVICE__ float __nv_normf(int __a, const float *__b);
__DEVICE__ double __nv_norm(int __a, const double *__b);
__DEVICE__ int __nv_popc(int __a);
__DEVICE__ int __nv_popcll(long long __a);
__DEVICE__ int __nv_popc(unsigned int __a);
__DEVICE__ int __nv_popcll(unsigned long long __a);
__DEVICE__ double __nv_pow(double __a, double __b);
__DEVICE__ float __nv_powf(float __a, float __b);
__DEVICE__ double __nv_powi(double __a, int __b);

View File

@ -36,7 +36,7 @@
// because the OpenMP overlay requires constexpr functions here but prior to
// c++14 void return functions could not be constexpr.
#pragma push_macro("__DEVICE_VOID__")
#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
#if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L
#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
#else
#define __DEVICE_VOID__ __DEVICE__
@ -45,9 +45,9 @@
// libdevice provides fast low precision and slow full-recision implementations
// for some functions. Which one gets selected depends on
// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
// -ffast-math or -fcuda-approx-transcendentals are in effect.
// -ffast-math or -fgpu-approx-transcendentals are in effect.
#pragma push_macro("__FAST_OR_SLOW")
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __FAST_OR_SLOW(fast, slow) fast
#else
#define __FAST_OR_SLOW(fast, slow) slow

View File

@ -196,12 +196,12 @@ inline __host__ double __signbitd(double x) {
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.
// sin and exp. This is controlled in clang by -fgpu-approx-transcendentals.
//
// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.
// slow divides), so we need to scope our define carefully here.
#pragma push_macro("__USE_FAST_MATH__")
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __USE_FAST_MATH__ 1
#endif

View File

@ -14,9 +14,6 @@
#endif
#if !defined(__HIPCC_RTC__)
#if defined(__cplusplus)
#include <algorithm>
#endif
#include <limits.h>
#include <stdint.h>
#ifdef __OPENMP_AMDGCN__
@ -32,6 +29,17 @@
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
#endif
// Device library provides fast low precision and slow full-recision
// implementations for some functions. Which one gets selected depends on
// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
// -ffast-math or -fgpu-approx-transcendentals are in effect.
#pragma push_macro("__FAST_OR_SLOW")
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
#define __FAST_OR_SLOW(fast, slow) fast
#else
#define __FAST_OR_SLOW(fast, slow) slow
#endif
// A few functions return bool type starting only in C++11.
#pragma push_macro("__RETURN_TYPE")
#ifdef __OPENMP_AMDGCN__
@ -139,21 +147,180 @@ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
}
// BEGIN FLOAT
// BEGIN INTRINSICS
__DEVICE__
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
__DEVICE__
float __exp10f(float __x) {
const float __log2_10 = 0x1.a934f0p+1f;
return __builtin_amdgcn_exp2f(__log2_10 * __x);
}
__DEVICE__
float __expf(float __x) {
const float __log2_e = 0x1.715476p+0;
return __builtin_amdgcn_exp2f(__log2_e * __x);
}
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
__DEVICE__
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
__DEVICE__
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
__DEVICE__
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fadd_rn(float __x, float __y) { return __x + __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
__DEVICE__
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
__DEVICE__
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __x / __y; }
#endif
__DEVICE__
float __fdividef(float __x, float __y) { return __x / __y; }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmaf_rd(float __x, float __y, float __z) {
return __ocml_fma_rtn_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __ocml_fma_rte_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_ru(float __x, float __y, float __z) {
return __ocml_fma_rtp_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rz(float __x, float __y, float __z) {
return __ocml_fma_rtz_f32(__x, __y, __z);
}
#else
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __builtin_fmaf(__x, __y, __z);
}
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
__DEVICE__
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
__DEVICE__
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
__DEVICE__
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fmul_rn(float __x, float __y) { return __x * __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
__DEVICE__
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
__DEVICE__
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
__DEVICE__
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
#else
__DEVICE__
float __frcp_rn(float __x) { return 1.0f / __x; }
#endif
__DEVICE__
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
__DEVICE__
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
__DEVICE__
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
#else
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
__DEVICE__
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
__DEVICE__
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
__DEVICE__
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fsub_rn(float __x, float __y) { return __x - __y; }
#endif
__DEVICE__
float __log10f(float __x) { return __builtin_log10f(__x); }
__DEVICE__
float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
__DEVICE__
float __logf(float __x) { return __builtin_logf(__x); }
__DEVICE__
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
__DEVICE__
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
__DEVICE__
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
*__sinptr = __ocml_native_sin_f32(__x);
*__cosptr = __ocml_native_cos_f32(__x);
}
__DEVICE__
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
__DEVICE__
float __tanf(float __x) {
return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
}
// END INTRINSICS
#if defined(__cplusplus)
__DEVICE__
int abs(int __x) {
int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
return __builtin_abs(__x);
}
__DEVICE__
long labs(long __x) {
long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
return __builtin_labs(__x);
}
__DEVICE__
long long llabs(long long __x) {
long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
return (__x ^ __sgn) - __sgn;
return __builtin_llabs(__x);
}
#endif
@ -188,7 +355,7 @@ __DEVICE__
float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
__DEVICE__
float cosf(float __x) { return __ocml_cos_f32(__x); }
float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
__DEVICE__
float coshf(float __x) { return __ocml_cosh_f32(__x); }
@ -321,13 +488,13 @@ __DEVICE__
float log1pf(float __x) { return __ocml_log1p_f32(__x); }
__DEVICE__
float log2f(float __x) { return __builtin_log2f(__x); }
float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); }
__DEVICE__
float logbf(float __x) { return __ocml_logb_f32(__x); }
__DEVICE__
float logf(float __x) { return __builtin_logf(__x); }
float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); }
__DEVICE__
long int lrintf(float __x) { return __builtin_rintf(__x); }
@ -401,7 +568,7 @@ float normf(int __dim,
++__a;
}
return __ocml_sqrt_f32(__r);
return __builtin_sqrtf(__r);
}
__DEVICE__
@ -483,9 +650,13 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
__sincosf(__x, __sinptr, __cosptr);
#else
*__sinptr =
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
#endif
}
__DEVICE__
@ -500,7 +671,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) {
}
__DEVICE__
float sinf(float __x) { return __ocml_sin_f32(__x); }
float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
__DEVICE__
float sinhf(float __x) { return __ocml_sinh_f32(__x); }
@ -509,7 +680,7 @@ __DEVICE__
float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
__DEVICE__
float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
float sqrtf(float __x) { return __builtin_sqrtf(__x); }
__DEVICE__
float tanf(float __x) { return __ocml_tan_f32(__x); }
@ -551,158 +722,7 @@ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
return __x1;
}
// BEGIN INTRINSICS
__DEVICE__
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
__DEVICE__
float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
__DEVICE__
float __expf(float __x) { return __ocml_native_exp_f32(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
__DEVICE__
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
__DEVICE__
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
__DEVICE__
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fadd_rn(float __x, float __y) { return __x + __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
__DEVICE__
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
__DEVICE__
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fdiv_rn(float __x, float __y) { return __x / __y; }
#endif
__DEVICE__
float __fdividef(float __x, float __y) { return __x / __y; }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmaf_rd(float __x, float __y, float __z) {
return __ocml_fma_rtn_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __ocml_fma_rte_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_ru(float __x, float __y, float __z) {
return __ocml_fma_rtp_f32(__x, __y, __z);
}
__DEVICE__
float __fmaf_rz(float __x, float __y, float __z) {
return __ocml_fma_rtz_f32(__x, __y, __z);
}
#else
__DEVICE__
float __fmaf_rn(float __x, float __y, float __z) {
return __builtin_fmaf(__x, __y, __z);
}
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
__DEVICE__
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
__DEVICE__
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
__DEVICE__
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fmul_rn(float __x, float __y) { return __x * __y; }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
__DEVICE__
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
__DEVICE__
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
__DEVICE__
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
#else
__DEVICE__
float __frcp_rn(float __x) { return 1.0f / __x; }
#endif
__DEVICE__
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
__DEVICE__
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
__DEVICE__
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
#else
__DEVICE__
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
__DEVICE__
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
__DEVICE__
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
__DEVICE__
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
#else
__DEVICE__
float __fsub_rn(float __x, float __y) { return __x - __y; }
#endif
__DEVICE__
float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
__DEVICE__
float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
__DEVICE__
float __logf(float __x) { return __ocml_native_log_f32(__x); }
__DEVICE__
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
__DEVICE__
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
__DEVICE__
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
*__sinptr = __ocml_native_sin_f32(__x);
*__cosptr = __ocml_native_cos_f32(__x);
}
__DEVICE__
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
__DEVICE__
float __tanf(float __x) { return __ocml_tan_f32(__x); }
// END INTRINSICS
// END FLOAT
// BEGIN DOUBLE
@ -941,7 +961,7 @@ double norm(int __dim,
++__a;
}
return __ocml_sqrt_f64(__r);
return __builtin_sqrt(__r);
}
__DEVICE__
@ -1064,7 +1084,7 @@ __DEVICE__
double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
__DEVICE__
double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
double sqrt(double __x) { return __builtin_sqrt(__x); }
__DEVICE__
double tan(double __x) { return __ocml_tan_f64(__x); }
@ -1198,7 +1218,7 @@ __DEVICE__
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
#else
__DEVICE__
double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
#endif
#if defined OCML_BASIC_ROUNDED_OPERATIONS
@ -1288,16 +1308,17 @@ double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
__host__ inline static int min(int __arg1, int __arg2) {
return std::min(__arg1, __arg2);
return __arg1 < __arg2 ? __arg1 : __arg2;
}
__host__ inline static int max(int __arg1, int __arg2) {
return std::max(__arg1, __arg2);
return __arg1 > __arg2 ? __arg1 : __arg2;
}
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
#endif
#pragma pop_macro("__DEVICE__")
#pragma pop_macro("__RETURN_TYPE")
#pragma pop_macro("__FAST_OR_SLOW")
#endif // __CLANG_HIP_MATH_H__

View File

@ -46,6 +46,67 @@ extern "C" {
}
#endif //__cplusplus
#if !defined(__HIPCC_RTC__)
#if __has_include("hip/hip_version.h")
#include "hip/hip_version.h"
#endif // __has_include("hip/hip_version.h")
#endif // __HIPCC_RTC__
typedef __SIZE_TYPE__ __hip_size_t;
#ifdef __cplusplus
extern "C" {
#endif //__cplusplus
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
#if __has_feature(address_sanitizer)
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
unsigned long long __pc);
__device__ void __asan_free_impl(unsigned long long __addr,
unsigned long long __pc);
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
return (void *)__asan_malloc_impl(__size, __pc);
}
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
__asan_free_impl((unsigned long long)__ptr, __pc);
}
#else // __has_feature(address_sanitizer)
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return (void *) __ockl_dm_alloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__ockl_dm_dealloc((unsigned long long)__ptr);
}
#endif // __has_feature(address_sanitizer)
#else // HIP version check
#if __HIP_ENABLE_DEVICE_MALLOC__
__device__ void *__hip_malloc(__hip_size_t __size);
__device__ void *__hip_free(void *__ptr);
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return __hip_malloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__hip_free(__ptr);
}
#else // __HIP_ENABLE_DEVICE_MALLOC__
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__builtin_trap();
return (void *)0;
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__builtin_trap();
}
#endif // __HIP_ENABLE_DEVICE_MALLOC__
#endif // HIP version check
#ifdef __cplusplus
} // extern "C"
#endif //__cplusplus
#if !defined(__HIPCC_RTC__)
#include <cmath>
#include <cstdlib>
@ -71,59 +132,6 @@ typedef __SIZE_TYPE__ size_t;
#define INT_MAX __INTMAX_MAX__
#endif // __HIPCC_RTC__
typedef __SIZE_TYPE__ __hip_size_t;
#ifdef __cplusplus
extern "C" {
#endif //__cplusplus
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
#if __has_feature(address_sanitizer)
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
return (void *)__asan_malloc_impl(__size, __pc);
}
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
__asan_free_impl((unsigned long long)__ptr, __pc);
}
#else
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return (void *) __ockl_dm_alloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__ockl_dm_dealloc((unsigned long long)__ptr);
}
#endif // __has_feature(address_sanitizer)
#else // HIP version check
#if __HIP_ENABLE_DEVICE_MALLOC__
__device__ void *__hip_malloc(__hip_size_t __size);
__device__ void *__hip_free(void *__ptr);
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return __hip_malloc(__size);
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__hip_free(__ptr);
}
#else
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__builtin_trap();
return (void *)0;
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__builtin_trap();
}
#endif
#endif // HIP version check
#ifdef __cplusplus
} // extern "C"
#endif //__cplusplus
#include <__clang_hip_libdevice_declares.h>
#include <__clang_hip_math.h>
#include <__clang_hip_stdlib.h>

13
lib/include/__stdarg___gnuc_va_list.h vendored Normal file
View File

@ -0,0 +1,13 @@
/*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __GNUC_VA_LIST
#define __GNUC_VA_LIST
typedef __builtin_va_list __gnuc_va_list;
#endif

12
lib/include/__stdarg___va_copy.h vendored Normal file
View File

@ -0,0 +1,12 @@
/*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __va_copy
#define __va_copy(d, s) __builtin_va_copy(d, s)
#endif

22
lib/include/__stdarg_va_arg.h vendored Normal file
View File

@ -0,0 +1,22 @@
/*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef va_arg
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
/* C23 does not require the second parameter for va_start. */
#define va_start(ap, ...) __builtin_va_start(ap, 0)
#else
/* Versions before C23 do require the second parameter. */
#define va_start(ap, param) __builtin_va_start(ap, param)
#endif
#define va_end(ap) __builtin_va_end(ap)
#define va_arg(ap, type) __builtin_va_arg(ap, type)
#endif

12
lib/include/__stdarg_va_copy.h vendored Normal file
View File

@ -0,0 +1,12 @@
/*===---- __stdarg_va_copy.h - Definition of va_copy------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef va_copy
#define va_copy(dest, src) __builtin_va_copy(dest, src)
#endif

13
lib/include/__stdarg_va_list.h vendored Normal file
View File

@ -0,0 +1,13 @@
/*===---- __stdarg_va_list.h - Definition of va_list -----------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef _VA_LIST
#define _VA_LIST
typedef __builtin_va_list va_list;
#endif

View File

@ -1,4 +1,4 @@
/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===
/*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.

29
lib/include/__stddef_null.h vendored Normal file
View File

@ -0,0 +1,29 @@
/*===---- __stddef_null.h - Definition of NULL -----------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if !defined(NULL) || !__building_module(_Builtin_stddef)
/* linux/stddef.h will define NULL to 0. glibc (and other) headers then define
* __need_NULL and rely on stddef.h to redefine NULL to the correct value again.
* Modules don't support redefining macros like that, but support that pattern
* in the non-modules case.
*/
#undef NULL
#ifdef __cplusplus
#if !defined(__MINGW32__) && !defined(_MSC_VER)
#define NULL __null
#else
#define NULL 0
#endif
#else
#define NULL ((void*)0)
#endif
#endif

29
lib/include/__stddef_nullptr_t.h vendored Normal file
View File

@ -0,0 +1,29 @@
/*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_NULLPTR_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _NULLPTR_T
#ifdef __cplusplus
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
namespace std {
typedef decltype(nullptr) nullptr_t;
}
using ::std::nullptr_t;
#endif
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
typedef typeof(nullptr) nullptr_t;
#endif
#endif

17
lib/include/__stddef_offsetof.h vendored Normal file
View File

@ -0,0 +1,17 @@
/*===---- __stddef_offsetof.h - Definition of offsetof ---------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(offsetof) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define offsetof(t, d) __builtin_offsetof(t, d)
#endif

20
lib/include/__stddef_ptrdiff_t.h vendored Normal file
View File

@ -0,0 +1,20 @@
/*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_PTRDIFF_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _PTRDIFF_T
typedef __PTRDIFF_TYPE__ ptrdiff_t;
#endif

20
lib/include/__stddef_rsize_t.h vendored Normal file
View File

@ -0,0 +1,20 @@
/*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_RSIZE_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _RSIZE_T
typedef __SIZE_TYPE__ rsize_t;
#endif

20
lib/include/__stddef_size_t.h vendored Normal file
View File

@ -0,0 +1,20 @@
/*===---- __stddef_size_t.h - Definition of size_t -------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_SIZE_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _SIZE_T
typedef __SIZE_TYPE__ size_t;
#endif

21
lib/include/__stddef_unreachable.h vendored Normal file
View File

@ -0,0 +1,21 @@
/*===---- __stddef_unreachable.h - Definition of unreachable ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __cplusplus
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(unreachable) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define unreachable() __builtin_unreachable()
#endif
#endif

28
lib/include/__stddef_wchar_t.h vendored Normal file
View File

@ -0,0 +1,28 @@
/*===---- __stddef_wchar.h - Definition of wchar_t -------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
/*
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
* and needs to behave as if it was textual.
*/
#if !defined(_WCHAR_T) || \
(__has_feature(modules) && !__building_module(_Builtin_stddef))
#define _WCHAR_T
#ifdef _MSC_EXTENSIONS
#define _WCHAR_T_DEFINED
#endif
typedef __WCHAR_TYPE__ wchar_t;
#endif
#endif

15
lib/include/__stddef_wint_t.h vendored Normal file
View File

@ -0,0 +1,15 @@
/*===---- __stddef_wint.h - Definition of wint_t ---------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef _WINT_T
#define _WINT_T
typedef __WINT_TYPE__ wint_t;
#endif

160
lib/include/adcintrin.h vendored Normal file
View File

@ -0,0 +1,160 @@
/*===---- adcintrin.h - ADC intrinsics -------------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __ADCINTRIN_H
#define __ADCINTRIN_H
#if !defined(__i386__) && !defined(__x86_64__)
#error "This header is only meant to be used on x86 and x64 architecture"
#endif
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
/* Use C++ inline semantics in C++, GNU inline for C mode. */
#if defined(__cplusplus)
#define __INLINE __inline
#else
#define __INLINE static __inline
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 32-bit unsigned addend.
/// \param __y
/// A 32-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c SBB instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 32-bit unsigned minuend.
/// \param __y
/// The 32-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 64-bit unsigned addend.
/// \param __y
/// A 64-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_addcarry_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 64-bit unsigned minuend.
/// \param __y
/// The 64-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_subborrow_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
}
#endif
#if defined(__cplusplus)
}
#endif
#undef __INLINE
#undef __DEFAULT_FN_ATTRS
#endif /* __ADCINTRIN_H */

View File

@ -15,7 +15,8 @@
#define __ADXINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
/* Use C++ inline semantics in C++, GNU inline for C mode. */
#if defined(__cplusplus)
@ -53,10 +54,10 @@ extern "C" {
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
unsigned int *__p) {
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
@ -84,137 +85,10 @@ __INLINE unsigned char
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif
/* Intrinsics that are also available if __ADX__ is undefined. */
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 32-bit unsigned addend.
/// \param __y
/// A 32-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
/// at \a __p, and returns the 8-bit carry-out (carry flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x + __y + temp)
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// A 64-bit unsigned addend.
/// \param __y
/// A 64-bit unsigned addend.
/// \param __p
/// Pointer to memory for storing the sum.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_addcarry_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store32(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c SBB instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 32-bit unsigned minuend.
/// \param __y
/// The 32-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
unsigned int __x,
unsigned int __y,
unsigned int *__p) {
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
}
#ifdef __x86_64__
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
/// and returns the 8-bit carry-out (carry or overflow flag).
///
/// \code{.operation}
/// temp := (__cf == 0) ? 0 : 1
/// Store64(__p, __x - (__y + temp))
/// result := CF
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c ADC instruction.
///
/// \param __cf
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
/// \param __x
/// The 64-bit unsigned minuend.
/// \param __y
/// The 64-bit unsigned subtrahend.
/// \param __p
/// Pointer to memory for storing the difference.
/// \returns The 8-bit unsigned carry-out value.
__INLINE unsigned char __DEFAULT_FN_ATTRS
_subborrow_u64(unsigned char __cf, unsigned long long __x,
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p) {
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif
@ -222,6 +96,7 @@ _subborrow_u64(unsigned char __cf, unsigned long long __x,
}
#endif
#undef __INLINE
#undef __DEFAULT_FN_ATTRS
#endif /* __ADXINTRIN_H */

43
lib/include/altivec.h vendored
View File

@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
int __b) {
vector signed char __res = (vector signed char)(0);
__res[__b & 0x7] = __a;
const vector signed char __zero = (vector signed char)0;
vector signed char __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_promote(unsigned char __a, int __b) {
vector unsigned char __res = (vector unsigned char)(0);
__res[__b & 0x7] = __a;
const vector unsigned char __zero = (vector unsigned char)(0);
vector unsigned char __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}
static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
vector short __res = (vector short)(0);
const vector short __zero = (vector short)(0);
vector short __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_promote(unsigned short __a, int __b) {
vector unsigned short __res = (vector unsigned short)(0);
const vector unsigned short __zero = (vector unsigned short)(0);
vector unsigned short __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}
static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
vector int __res = (vector int)(0);
const vector int __zero = (vector int)(0);
vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
int __b) {
vector unsigned int __res = (vector unsigned int)(0);
const vector unsigned int __zero = (vector unsigned int)(0);
vector unsigned int __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
vector float __res = (vector float)(0);
const vector float __zero = (vector float)(0);
vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}
#ifdef __VSX__
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
vector double __res = (vector double)(0);
const vector double __zero = (vector double)(0);
vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector signed long long __ATTRS_o_ai
vec_promote(signed long long __a, int __b) {
vector signed long long __res = (vector signed long long)(0);
const vector signed long long __zero = (vector signed long long)(0);
vector signed long long __res =
__builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_promote(unsigned long long __a, int __b) {
vector unsigned long long __res = (vector unsigned long long)(0);
const vector unsigned long long __zero = (vector unsigned long long)(0);
vector unsigned long long __res =
__builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}

View File

@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y)
/// \param __a
/// The 64-bit double-precision floating-point register value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_sd(double *__p, __m128d __a)
_mm_stream_sd(void *__p, __m128d __a)
{
__builtin_ia32_movntsd(__p, (__v2df)__a);
__builtin_ia32_movntsd((double *)__p, (__v2df)__a);
}
/// Stores a 32-bit single-precision floating-point value in a 32-bit
@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a)
/// \param __a
/// The 32-bit single-precision floating-point register value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_ss(float *__p, __m128 __a)
_mm_stream_ss(void *__p, __m128 __a)
{
__builtin_ia32_movntss(__p, (__v4sf)__a);
__builtin_ia32_movntss((float *)__p, (__v4sf)__a);
}
#undef __DEFAULT_FN_ATTRS

142
lib/include/arm_acle.h vendored
View File

@ -4,6 +4,13 @@
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* The Arm C Language Extensions specifications can be found in the following
* link: https://github.com/ARM-software/acle/releases
*
* The ACLE section numbers are subject to change. When consulting the
* specifications, it is recommended to search using section titles if
* the section numbers look outdated.
*
*===-----------------------------------------------------------------------===
*/
@ -20,8 +27,8 @@
extern "C" {
#endif
/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
/* 8.3 Memory barriers */
/* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
/* 7.3 Memory barriers */
#if !__has_builtin(__dmb)
#define __dmb(i) __builtin_arm_dmb(i)
#endif
@ -32,7 +39,7 @@ extern "C" {
#define __isb(i) __builtin_arm_isb(i)
#endif
/* 8.4 Hints */
/* 7.4 Hints */
#if !__has_builtin(__wfi)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
@ -68,7 +75,7 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
#define __dbg(t) __builtin_arm_dbg(t)
#endif
/* 8.5 Swap */
/* 7.5 Swap */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__swp(uint32_t __x, volatile uint32_t *__p) {
uint32_t v;
@ -78,8 +85,8 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
return v;
}
/* 8.6 Memory prefetch intrinsics */
/* 8.6.1 Data prefetch */
/* 7.6 Memory prefetch intrinsics */
/* 7.6.1 Data prefetch */
#define __pld(addr) __pldx(0, 0, 0, addr)
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@ -90,7 +97,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
#endif
/* 8.6.2 Instruction prefetch */
/* 7.6.2 Instruction prefetch */
#define __pli(addr) __plix(0, 0, addr)
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
@ -101,15 +108,15 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
__builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
#endif
/* 8.7 NOP */
/* 7.7 NOP */
#if !defined(_MSC_VER) || !defined(__aarch64__)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
__builtin_arm_nop();
}
#endif
/* 9 DATA-PROCESSING INTRINSICS */
/* 9.2 Miscellaneous data-processing intrinsics */
/* 8 DATA-PROCESSING INTRINSICS */
/* 8.2 Miscellaneous data-processing intrinsics */
/* ROR */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__ror(uint32_t __x, uint32_t __y) {
@ -248,9 +255,7 @@ __rbitl(unsigned long __t) {
#endif
}
/*
* 9.3 16-bit multiplications
*/
/* 8.3 16-bit multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
__smulbb(int32_t __a, int32_t __b) {
@ -279,18 +284,18 @@ __smulwt(int32_t __a, int32_t __b) {
#endif
/*
* 9.4 Saturating intrinsics
* 8.4 Saturating intrinsics
*
* FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
* intrinsics are implemented and the flag is enabled.
*/
/* 9.4.1 Width-specified saturation intrinsics */
/* 8.4.1 Width-specified saturation intrinsics */
#if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
#define __ssat(x, y) __builtin_arm_ssat(x, y)
#define __usat(x, y) __builtin_arm_usat(x, y)
#endif
/* 9.4.2 Saturating addition and subtraction intrinsics */
/* 8.4.2 Saturating addition and subtraction intrinsics */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qadd(int32_t __t, int32_t __v) {
@ -308,7 +313,7 @@ __qdbl(int32_t __t) {
}
#endif
/* 9.4.3 Accumultating multiplications */
/* 8.4.3 Accumultating multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
@ -337,13 +342,13 @@ __smlawt(int32_t __a, int32_t __b, int32_t __c) {
#endif
/* 9.5.4 Parallel 16-bit saturation */
/* 8.5.4 Parallel 16-bit saturation */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
#define __usat16(x, y) __builtin_arm_usat16(x, y)
#endif
/* 9.5.5 Packing and unpacking */
/* 8.5.5 Packing and unpacking */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
typedef int32_t int8x4_t;
typedef int32_t int16x2_t;
@ -368,7 +373,7 @@ __uxtb16(int8x4_t __a) {
}
#endif
/* 9.5.6 Parallel selection */
/* 8.5.6 Parallel selection */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
__sel(uint8x4_t __a, uint8x4_t __b) {
@ -376,7 +381,7 @@ __sel(uint8x4_t __a, uint8x4_t __b) {
}
#endif
/* 9.5.7 Parallel 8-bit addition and subtraction */
/* 8.5.7 Parallel 8-bit addition and subtraction */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
__qadd8(int8x4_t __a, int8x4_t __b) {
@ -428,7 +433,7 @@ __usub8(uint8x4_t __a, uint8x4_t __b) {
}
#endif
/* 9.5.8 Sum of 8-bit absolute differences */
/* 8.5.8 Sum of 8-bit absolute differences */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__usad8(uint8x4_t __a, uint8x4_t __b) {
@ -440,7 +445,7 @@ __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
}
#endif
/* 9.5.9 Parallel 16-bit addition and subtraction */
/* 8.5.9 Parallel 16-bit addition and subtraction */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
__qadd16(int16x2_t __a, int16x2_t __b) {
@ -540,7 +545,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) {
}
#endif
/* 9.5.10 Parallel 16-bit multiplications */
/* 8.5.10 Parallel 16-bit multiplications */
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
@ -592,7 +597,22 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
}
#endif
/* 9.7 CRC32 intrinsics */
/* 8.6 Floating-point data-processing intrinsics */
#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING) && \
(__ARM_FEATURE_DIRECTED_ROUNDING)) && \
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ double __attribute__((__always_inline__, __nodebug__))
__rintn(double __a) {
return __builtin_roundeven(__a);
}
static __inline__ float __attribute__((__always_inline__, __nodebug__))
__rintnf(float __a) {
return __builtin_roundevenf(__a);
}
#endif
/* 8.8 CRC32 intrinsics */
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
@ -636,6 +656,7 @@ __crc32cd(uint32_t __a, uint64_t __b) {
}
#endif
/* 8.6 Floating-point data-processing intrinsics */
/* Armv8.3-A Javascript conversion intrinsic */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
@ -687,7 +708,7 @@ __rint64x(double __a) {
}
#endif
/* Armv8.7-A load/store 64-byte intrinsics */
/* 8.9 Armv8.7-A load/store 64-byte intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
typedef struct {
uint64_t val[8];
@ -713,7 +734,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
}
#endif
/* 10.1 Special register intrinsics */
/* 11.1 Special register intrinsics */
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
#define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg)
@ -727,7 +748,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
/* Memory Tagging Extensions (MTE) Intrinsics */
/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
@ -736,12 +757,71 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
/* Memory Operations Intrinsics */
/* 18 Memory Operations Intrinsics */
#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
__builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
#endif
/* Transactional Memory Extension (TME) Intrinsics */
/* 11.3 Coprocessor Intrinsics */
#if defined(__ARM_FEATURE_COPROC)
#if (__ARM_FEATURE_COPROC & 0x1)
#if (__ARM_ARCH < 8)
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
#endif /* __ARM_ARCH < 8 */
#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p)
#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p)
#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \
__builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2)
#define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \
__builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2)
#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8)
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
#endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */
#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__)
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
#endif /* ___ARM_ARCH_8M_MAIN__ */
#endif /* __ARM_FEATURE_COPROC & 0x1 */
#if (__ARM_FEATURE_COPROC & 0x2)
#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \
__builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)
#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p)
#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p)
#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p)
#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p)
#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \
__builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)
#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \
__builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2)
#endif
#if (__ARM_FEATURE_COPROC & 0x4)
#define __arm_mcrr(coproc, opc1, value, CRm) \
__builtin_arm_mcrr(coproc, opc1, value, CRm)
#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm)
#endif
#if (__ARM_FEATURE_COPROC & 0x8)
#define __arm_mcrr2(coproc, opc1, value, CRm) \
__builtin_arm_mcrr2(coproc, opc1, value, CRm)
#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm)
#endif
#endif // __ARM_FEATURE_COPROC
/* 17 Transactional Memory Extension (TME) Intrinsics */
#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
#define _TMFAILURE_REASON 0x00007fffu
@ -763,7 +843,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#endif /* __ARM_FEATURE_TME */
/* Armv8.5-A Random number generation intrinsics */
/* 8.7 Armv8.5-A Random number generation intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
__rndr(uint64_t *__p) {

412
lib/include/arm_neon.h vendored
View File

@ -35,12 +35,7 @@
#include <stdint.h>
#include <arm_bf16.h>
typedef float float32_t;
typedef __fp16 float16_t;
#ifdef __aarch64__
typedef double float64_t;
#endif
#include <arm_vector_types.h>
#ifdef __aarch64__
typedef uint8_t poly8_t;
typedef uint16_t poly16_t;
@ -51,30 +46,6 @@ typedef int8_t poly8_t;
typedef int16_t poly16_t;
typedef int64_t poly64_t;
#endif
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
#ifdef __aarch64__
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
#endif
typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
@ -82,96 +53,6 @@ typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
typedef struct int8x8x2_t {
int8x8_t val[2];
} int8x8x2_t;
typedef struct int8x16x2_t {
int8x16_t val[2];
} int8x16x2_t;
typedef struct int16x4x2_t {
int16x4_t val[2];
} int16x4x2_t;
typedef struct int16x8x2_t {
int16x8_t val[2];
} int16x8x2_t;
typedef struct int32x2x2_t {
int32x2_t val[2];
} int32x2x2_t;
typedef struct int32x4x2_t {
int32x4_t val[2];
} int32x4x2_t;
typedef struct int64x1x2_t {
int64x1_t val[2];
} int64x1x2_t;
typedef struct int64x2x2_t {
int64x2_t val[2];
} int64x2x2_t;
typedef struct uint8x8x2_t {
uint8x8_t val[2];
} uint8x8x2_t;
typedef struct uint8x16x2_t {
uint8x16_t val[2];
} uint8x16x2_t;
typedef struct uint16x4x2_t {
uint16x4_t val[2];
} uint16x4x2_t;
typedef struct uint16x8x2_t {
uint16x8_t val[2];
} uint16x8x2_t;
typedef struct uint32x2x2_t {
uint32x2_t val[2];
} uint32x2x2_t;
typedef struct uint32x4x2_t {
uint32x4_t val[2];
} uint32x4x2_t;
typedef struct uint64x1x2_t {
uint64x1_t val[2];
} uint64x1x2_t;
typedef struct uint64x2x2_t {
uint64x2_t val[2];
} uint64x2x2_t;
typedef struct float16x4x2_t {
float16x4_t val[2];
} float16x4x2_t;
typedef struct float16x8x2_t {
float16x8_t val[2];
} float16x8x2_t;
typedef struct float32x2x2_t {
float32x2_t val[2];
} float32x2x2_t;
typedef struct float32x4x2_t {
float32x4_t val[2];
} float32x4x2_t;
#ifdef __aarch64__
typedef struct float64x1x2_t {
float64x1_t val[2];
} float64x1x2_t;
typedef struct float64x2x2_t {
float64x2_t val[2];
} float64x2x2_t;
#endif
typedef struct poly8x8x2_t {
poly8x8_t val[2];
} poly8x8x2_t;
@ -196,96 +77,6 @@ typedef struct poly64x2x2_t {
poly64x2_t val[2];
} poly64x2x2_t;
typedef struct int8x8x3_t {
int8x8_t val[3];
} int8x8x3_t;
typedef struct int8x16x3_t {
int8x16_t val[3];
} int8x16x3_t;
typedef struct int16x4x3_t {
int16x4_t val[3];
} int16x4x3_t;
typedef struct int16x8x3_t {
int16x8_t val[3];
} int16x8x3_t;
typedef struct int32x2x3_t {
int32x2_t val[3];
} int32x2x3_t;
typedef struct int32x4x3_t {
int32x4_t val[3];
} int32x4x3_t;
typedef struct int64x1x3_t {
int64x1_t val[3];
} int64x1x3_t;
typedef struct int64x2x3_t {
int64x2_t val[3];
} int64x2x3_t;
typedef struct uint8x8x3_t {
uint8x8_t val[3];
} uint8x8x3_t;
typedef struct uint8x16x3_t {
uint8x16_t val[3];
} uint8x16x3_t;
typedef struct uint16x4x3_t {
uint16x4_t val[3];
} uint16x4x3_t;
typedef struct uint16x8x3_t {
uint16x8_t val[3];
} uint16x8x3_t;
typedef struct uint32x2x3_t {
uint32x2_t val[3];
} uint32x2x3_t;
typedef struct uint32x4x3_t {
uint32x4_t val[3];
} uint32x4x3_t;
typedef struct uint64x1x3_t {
uint64x1_t val[3];
} uint64x1x3_t;
typedef struct uint64x2x3_t {
uint64x2_t val[3];
} uint64x2x3_t;
typedef struct float16x4x3_t {
float16x4_t val[3];
} float16x4x3_t;
typedef struct float16x8x3_t {
float16x8_t val[3];
} float16x8x3_t;
typedef struct float32x2x3_t {
float32x2_t val[3];
} float32x2x3_t;
typedef struct float32x4x3_t {
float32x4_t val[3];
} float32x4x3_t;
#ifdef __aarch64__
typedef struct float64x1x3_t {
float64x1_t val[3];
} float64x1x3_t;
typedef struct float64x2x3_t {
float64x2_t val[3];
} float64x2x3_t;
#endif
typedef struct poly8x8x3_t {
poly8x8_t val[3];
} poly8x8x3_t;
@ -310,96 +101,6 @@ typedef struct poly64x2x3_t {
poly64x2_t val[3];
} poly64x2x3_t;
typedef struct int8x8x4_t {
int8x8_t val[4];
} int8x8x4_t;
typedef struct int8x16x4_t {
int8x16_t val[4];
} int8x16x4_t;
typedef struct int16x4x4_t {
int16x4_t val[4];
} int16x4x4_t;
typedef struct int16x8x4_t {
int16x8_t val[4];
} int16x8x4_t;
typedef struct int32x2x4_t {
int32x2_t val[4];
} int32x2x4_t;
typedef struct int32x4x4_t {
int32x4_t val[4];
} int32x4x4_t;
typedef struct int64x1x4_t {
int64x1_t val[4];
} int64x1x4_t;
typedef struct int64x2x4_t {
int64x2_t val[4];
} int64x2x4_t;
typedef struct uint8x8x4_t {
uint8x8_t val[4];
} uint8x8x4_t;
typedef struct uint8x16x4_t {
uint8x16_t val[4];
} uint8x16x4_t;
typedef struct uint16x4x4_t {
uint16x4_t val[4];
} uint16x4x4_t;
typedef struct uint16x8x4_t {
uint16x8_t val[4];
} uint16x8x4_t;
typedef struct uint32x2x4_t {
uint32x2_t val[4];
} uint32x2x4_t;
typedef struct uint32x4x4_t {
uint32x4_t val[4];
} uint32x4x4_t;
typedef struct uint64x1x4_t {
uint64x1_t val[4];
} uint64x1x4_t;
typedef struct uint64x2x4_t {
uint64x2_t val[4];
} uint64x2x4_t;
typedef struct float16x4x4_t {
float16x4_t val[4];
} float16x4x4_t;
typedef struct float16x8x4_t {
float16x8_t val[4];
} float16x8x4_t;
typedef struct float32x2x4_t {
float32x2_t val[4];
} float32x2x4_t;
typedef struct float32x4x4_t {
float32x4_t val[4];
} float32x4x4_t;
#ifdef __aarch64__
typedef struct float64x1x4_t {
float64x1_t val[4];
} float64x1x4_t;
typedef struct float64x2x4_t {
float64x2_t val[4];
} float64x2x4_t;
#endif
typedef struct poly8x8x4_t {
poly8x8_t val[4];
} poly8x8x4_t;
@ -424,33 +125,6 @@ typedef struct poly64x2x4_t {
poly64x2_t val[4];
} poly64x2x4_t;
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
typedef struct bfloat16x4x2_t {
bfloat16x4_t val[2];
} bfloat16x4x2_t;
typedef struct bfloat16x8x2_t {
bfloat16x8_t val[2];
} bfloat16x8x2_t;
typedef struct bfloat16x4x3_t {
bfloat16x4_t val[3];
} bfloat16x4x3_t;
typedef struct bfloat16x8x3_t {
bfloat16x8_t val[3];
} bfloat16x8x3_t;
typedef struct bfloat16x4x4_t {
bfloat16x4_t val[4];
} bfloat16x4x4_t;
typedef struct bfloat16x8x4_t {
bfloat16x8_t val[4];
} bfloat16x8x4_t;
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
#ifdef __LITTLE_ENDIAN__
@ -66600,6 +66274,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32x_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10);
return __ret;
}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) {
float32x4_t __ret;
@ -66632,6 +66327,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32z_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10);
return __ret;
}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) {
float32x4_t __ret;
@ -66664,6 +66380,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64x_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10);
return __ret;
}
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) {
float32x4_t __ret;
@ -66696,6 +66433,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0)
}
#endif
#ifdef __LITTLE_ENDIAN__
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
float64x2_t __ret;
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42);
return __ret;
}
#else
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
float64x2_t __ret;
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42);
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
return __ret;
}
#endif
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64z_f64(float64x1_t __p0) {
float64x1_t __ret;
__ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10);
return __ret;
}
#endif
#if defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)
#ifdef __LITTLE_ENDIAN__

2412
lib/include/arm_sme.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,642 +0,0 @@
/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics ------===
*
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __ARM_SME_H
#define __ARM_SME_H
#if !defined(__LITTLE_ENDIAN__)
#error "Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h"
#endif
#include <arm_sve.h>
/* Function attributes */
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__))
#ifdef __cplusplus
extern "C" {
#endif
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsb(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsd(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsh(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw), arm_streaming_compatible, arm_preserves_za))
uint64_t svcntsw(void);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8), arm_streaming, arm_shared_za))
void svld1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128), arm_streaming, arm_shared_za))
void svld1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16), arm_streaming, arm_shared_za))
void svld1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32), arm_streaming, arm_shared_za))
void svld1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64), arm_streaming, arm_shared_za))
void svld1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8), arm_streaming, arm_shared_za))
void svld1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8), arm_streaming, arm_shared_za))
void svld1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128), arm_streaming, arm_shared_za))
void svld1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16), arm_streaming, arm_shared_za))
void svld1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32), arm_streaming, arm_shared_za))
void svld1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64), arm_streaming, arm_shared_za))
void svld1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8), arm_streaming, arm_shared_za))
void svld1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8), arm_streaming, arm_shared_za, arm_preserves_za))
void svst1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za), arm_streaming_compatible, arm_shared_za))
void svzero_mask_za(uint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za), arm_streaming_compatible, arm_shared_za))
void svzero_za();
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za), arm_streaming_compatible, arm_shared_za))
void svldr_vnum_za(uint32_t, uint64_t, void const *);
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za), arm_streaming_compatible, arm_shared_za, arm_preserves_za))
void svstr_vnum_za(uint32_t, uint64_t, void *);
#ifdef __cplusplus
} // extern "C"
#endif
#undef __ai
#endif /* __ARM_SME_H */

7829
lib/include/arm_sve.h vendored

File diff suppressed because it is too large Load Diff

345
lib/include/arm_vector_types.h vendored Normal file
View File

@ -0,0 +1,345 @@
/*===---- arm_vector_types - ARM vector type ------===
*
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)
#error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead"
#endif
#ifndef __ARM_NEON_TYPES_H
#define __ARM_NEON_TYPES_H
typedef float float32_t;
typedef __fp16 float16_t;
#ifdef __aarch64__
typedef double float64_t;
#endif
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
#ifdef __aarch64__
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
#endif
typedef struct int8x8x2_t {
int8x8_t val[2];
} int8x8x2_t;
typedef struct int8x16x2_t {
int8x16_t val[2];
} int8x16x2_t;
typedef struct int16x4x2_t {
int16x4_t val[2];
} int16x4x2_t;
typedef struct int16x8x2_t {
int16x8_t val[2];
} int16x8x2_t;
typedef struct int32x2x2_t {
int32x2_t val[2];
} int32x2x2_t;
typedef struct int32x4x2_t {
int32x4_t val[2];
} int32x4x2_t;
typedef struct int64x1x2_t {
int64x1_t val[2];
} int64x1x2_t;
typedef struct int64x2x2_t {
int64x2_t val[2];
} int64x2x2_t;
typedef struct uint8x8x2_t {
uint8x8_t val[2];
} uint8x8x2_t;
typedef struct uint8x16x2_t {
uint8x16_t val[2];
} uint8x16x2_t;
typedef struct uint16x4x2_t {
uint16x4_t val[2];
} uint16x4x2_t;
typedef struct uint16x8x2_t {
uint16x8_t val[2];
} uint16x8x2_t;
typedef struct uint32x2x2_t {
uint32x2_t val[2];
} uint32x2x2_t;
typedef struct uint32x4x2_t {
uint32x4_t val[2];
} uint32x4x2_t;
typedef struct uint64x1x2_t {
uint64x1_t val[2];
} uint64x1x2_t;
typedef struct uint64x2x2_t {
uint64x2_t val[2];
} uint64x2x2_t;
typedef struct float16x4x2_t {
float16x4_t val[2];
} float16x4x2_t;
typedef struct float16x8x2_t {
float16x8_t val[2];
} float16x8x2_t;
typedef struct float32x2x2_t {
float32x2_t val[2];
} float32x2x2_t;
typedef struct float32x4x2_t {
float32x4_t val[2];
} float32x4x2_t;
#ifdef __aarch64__
typedef struct float64x1x2_t {
float64x1_t val[2];
} float64x1x2_t;
typedef struct float64x2x2_t {
float64x2_t val[2];
} float64x2x2_t;
#endif
typedef struct int8x8x3_t {
int8x8_t val[3];
} int8x8x3_t;
typedef struct int8x16x3_t {
int8x16_t val[3];
} int8x16x3_t;
typedef struct int16x4x3_t {
int16x4_t val[3];
} int16x4x3_t;
typedef struct int16x8x3_t {
int16x8_t val[3];
} int16x8x3_t;
typedef struct int32x2x3_t {
int32x2_t val[3];
} int32x2x3_t;
typedef struct int32x4x3_t {
int32x4_t val[3];
} int32x4x3_t;
typedef struct int64x1x3_t {
int64x1_t val[3];
} int64x1x3_t;
typedef struct int64x2x3_t {
int64x2_t val[3];
} int64x2x3_t;
typedef struct uint8x8x3_t {
uint8x8_t val[3];
} uint8x8x3_t;
typedef struct uint8x16x3_t {
uint8x16_t val[3];
} uint8x16x3_t;
typedef struct uint16x4x3_t {
uint16x4_t val[3];
} uint16x4x3_t;
typedef struct uint16x8x3_t {
uint16x8_t val[3];
} uint16x8x3_t;
typedef struct uint32x2x3_t {
uint32x2_t val[3];
} uint32x2x3_t;
typedef struct uint32x4x3_t {
uint32x4_t val[3];
} uint32x4x3_t;
typedef struct uint64x1x3_t {
uint64x1_t val[3];
} uint64x1x3_t;
typedef struct uint64x2x3_t {
uint64x2_t val[3];
} uint64x2x3_t;
typedef struct float16x4x3_t {
float16x4_t val[3];
} float16x4x3_t;
typedef struct float16x8x3_t {
float16x8_t val[3];
} float16x8x3_t;
typedef struct float32x2x3_t {
float32x2_t val[3];
} float32x2x3_t;
typedef struct float32x4x3_t {
float32x4_t val[3];
} float32x4x3_t;
#ifdef __aarch64__
typedef struct float64x1x3_t {
float64x1_t val[3];
} float64x1x3_t;
typedef struct float64x2x3_t {
float64x2_t val[3];
} float64x2x3_t;
#endif
typedef struct int8x8x4_t {
int8x8_t val[4];
} int8x8x4_t;
typedef struct int8x16x4_t {
int8x16_t val[4];
} int8x16x4_t;
typedef struct int16x4x4_t {
int16x4_t val[4];
} int16x4x4_t;
typedef struct int16x8x4_t {
int16x8_t val[4];
} int16x8x4_t;
typedef struct int32x2x4_t {
int32x2_t val[4];
} int32x2x4_t;
typedef struct int32x4x4_t {
int32x4_t val[4];
} int32x4x4_t;
typedef struct int64x1x4_t {
int64x1_t val[4];
} int64x1x4_t;
typedef struct int64x2x4_t {
int64x2_t val[4];
} int64x2x4_t;
typedef struct uint8x8x4_t {
uint8x8_t val[4];
} uint8x8x4_t;
typedef struct uint8x16x4_t {
uint8x16_t val[4];
} uint8x16x4_t;
typedef struct uint16x4x4_t {
uint16x4_t val[4];
} uint16x4x4_t;
typedef struct uint16x8x4_t {
uint16x8_t val[4];
} uint16x8x4_t;
typedef struct uint32x2x4_t {
uint32x2_t val[4];
} uint32x2x4_t;
typedef struct uint32x4x4_t {
uint32x4_t val[4];
} uint32x4x4_t;
typedef struct uint64x1x4_t {
uint64x1_t val[4];
} uint64x1x4_t;
typedef struct uint64x2x4_t {
uint64x2_t val[4];
} uint64x2x4_t;
typedef struct float16x4x4_t {
float16x4_t val[4];
} float16x4x4_t;
typedef struct float16x8x4_t {
float16x8_t val[4];
} float16x8x4_t;
typedef struct float32x2x4_t {
float32x2_t val[4];
} float32x2x4_t;
typedef struct float32x4x4_t {
float32x4_t val[4];
} float32x4x4_t;
#ifdef __aarch64__
typedef struct float64x1x4_t {
float64x1_t val[4];
} float64x1x4_t;
typedef struct float64x2x4_t {
float64x2_t val[4];
} float64x2x4_t;
#endif
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
typedef struct bfloat16x4x2_t {
bfloat16x4_t val[2];
} bfloat16x4x2_t;
typedef struct bfloat16x8x2_t {
bfloat16x8_t val[2];
} bfloat16x8x2_t;
typedef struct bfloat16x4x3_t {
bfloat16x4_t val[3];
} bfloat16x4x3_t;
typedef struct bfloat16x8x3_t {
bfloat16x8_t val[3];
} bfloat16x8x3_t;
typedef struct bfloat16x4x4_t {
bfloat16x4_t val[4];
} bfloat16x4x4_t;
typedef struct bfloat16x8x4_t {
bfloat16x8_t val[4];
} bfloat16x8x4_t;
#endif // __ARM_NEON_TYPES_H

View File

@ -15,8 +15,12 @@
#define __AVX2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx2,no-evex512"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx2,no-evex512"), __min_vector_width__(128)))
/* SSE4 Multiple Packed Sums of Absolute Difference. */
/// Computes sixteen sum of absolute difference (SAD) operations on sets of
@ -1307,6 +1311,23 @@ _mm256_min_epu32(__m256i __a, __m256i __b)
return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
}
/// Creates a 32-bit integer mask from the most significant bit of each byte
/// in the 256-bit integer vector in \a __a and returns the result.
///
/// \code{.operation}
/// FOR i := 0 TO 31
/// j := i*8
/// result[i] := __a[j+7]
/// ENDFOR
/// \endcode
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the \c VPMOVMSKB instruction.
///
/// \param __a
/// A 256-bit integer vector containing the source bytes.
/// \returns The 32-bit integer mask.
static __inline__ int __DEFAULT_FN_ATTRS256
_mm256_movemask_epi8(__m256i __a)
{
@ -2962,7 +2983,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
/// A pointer to the 32-byte aligned memory containing the vector to load.
/// \returns A 256-bit integer vector loaded from memory.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_stream_load_si256(__m256i const *__V)
_mm256_stream_load_si256(const void *__V)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);

View File

@ -20,10 +20,11 @@ typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \
__min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bf16,no-evex512")))
/// Convert One BF16 Data to One Single Float Data.
///

View File

@ -15,7 +15,10 @@
#define __AVX512BITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bitalg,evex512"), \
__min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)

View File

@ -18,8 +18,12 @@ typedef unsigned int __mmask32;
typedef unsigned long long __mmask64;
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,no-evex512")))
static __inline __mmask32 __DEFAULT_FN_ATTRS
_knot_mask32(__mmask32 __M)
@ -27,9 +31,7 @@ _knot_mask32(__mmask32 __M)
return __builtin_ia32_knotsi(__M);
}
static __inline __mmask64 __DEFAULT_FN_ATTRS
_knot_mask64(__mmask64 __M)
{
static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
return __builtin_ia32_knotdi(__M);
}
@ -39,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kand_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
}
@ -51,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kandn_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
}
@ -63,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
}
@ -75,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
}
@ -87,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}
@ -112,14 +109,12 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
@ -148,14 +143,12 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}
@ -171,9 +164,8 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kadd_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
}
@ -214,8 +206,7 @@ _load_mask32(__mmask32 *__A) {
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_load_mask64(__mmask64 *__A) {
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
}
@ -224,8 +215,8 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
}
static __inline__ void __DEFAULT_FN_ATTRS
_store_mask64(__mmask64 *__A, __mmask64 __B) {
static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
__mmask64 __B) {
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
}
@ -1714,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
(__v64qi) _mm512_setzero_si512());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
__mmask64 __B) {
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}

View File

@ -15,7 +15,9 @@
#define __AVX512CDINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512cd,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_conflict_epi64 (__m512i __A)

View File

@ -15,8 +15,10 @@
#define __AVX512DQINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512dq,no-evex512")))
static __inline __mmask8 __DEFAULT_FN_ATTRS
_knot_mask8(__mmask8 __M)

View File

@ -167,9 +167,13 @@ typedef enum
} _MM_MANTISSA_SIGN_ENUM;
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,no-evex512"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,no-evex512")))
/* Create vectors with repeated elements */

View File

@ -22,13 +22,15 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
__min_vector_width__(512)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16,no-evex512"), \
__min_vector_width__(128)))
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {

View File

@ -15,7 +15,9 @@
#define __IFMAINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512ifma,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)

View File

@ -15,8 +15,14 @@
#define __IFMAVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512ifma,avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512ifma,avx512vl,no-evex512"), \
__min_vector_width__(256)))
#define _mm_madd52hi_epu64(X, Y, Z) \
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \

View File

@ -14,9 +14,6 @@
#ifndef __AVX512PFINTRIN_H
#define __AVX512PFINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(void const *)(addr), (int)(scale), \
@ -92,6 +89,4 @@
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(void *)(addr), (int)(scale), (int)(hint))
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -15,7 +15,7 @@
#define __AVX512VBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS

View File

@ -15,8 +15,9 @@
#define __VBMIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)

View File

@ -15,9 +15,14 @@
#define __VBMIVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)

View File

@ -15,12 +15,14 @@
#ifndef __AVX512VLBF16INTRIN_H
#define __AVX512VLBF16INTRIN_H
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bf16,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bf16,no-evex512"), \
__min_vector_width__(256)))
/// Convert Two Packed Single Data to One Packed BF16 Data.
///

View File

@ -15,8 +15,14 @@
#define __AVX512VLBITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A)

View File

@ -15,8 +15,14 @@
#define __AVX512VLBWINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bw,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bw,no-evex512"), \
__min_vector_width__(256)))
/* Integer compare */

View File

@ -14,9 +14,14 @@
#define __AVX512VLCDINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmb_epi64 (__mmask8 __A)

View File

@ -15,8 +15,14 @@
#define __AVX512VLDQINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512dq,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512dq,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {

View File

@ -19,11 +19,11 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16, avx512vl"), \
__target__("avx512fp16,avx512vl,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512fp16, avx512vl"), \
__target__("avx512fp16,avx512vl,no-evex512"), \
__min_vector_width__(128)))
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {

View File

@ -14,8 +14,14 @@
#ifndef __AVX512VLINTRIN_H
#define __AVX512VLINTRIN_H
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,no-evex512"), \
__min_vector_width__(256)))
typedef short __v2hi __attribute__((__vector_size__(4)));
typedef char __v4qi __attribute__((__vector_size__(4)));

View File

@ -15,8 +15,14 @@
#define __AVX512VLVBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vbmi2,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vbmi2,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)

View File

@ -15,8 +15,14 @@
#define __AVX512VLVNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vnni,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vnni,no-evex512"), \
__min_vector_width__(256)))
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed

View File

@ -28,12 +28,14 @@
#ifndef _AVX512VLVP2INTERSECT_H
#define _AVX512VLVP2INTERSECT_H
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
__min_vector_width__(256)))
/// Store, in an even/odd pair of mask registers, the indicators of the
/// locations of value matches between dwords in operands __a and __b.

View File

@ -15,8 +15,9 @@
#define __AVX512VNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vnni,evex512"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)

View File

@ -28,8 +28,9 @@
#ifndef _AVX512VP2INTERSECT_H
#define _AVX512VP2INTERSECT_H
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vp2intersect,evex512"), \
__min_vector_width__(512)))
/// Store, in an even/odd pair of mask registers, the indicators of the

View File

@ -17,7 +17,9 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vpopcntdq,evex512"), \
__min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);

View File

@ -17,9 +17,13 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
__min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi64(__m128i __A) {

View File

@ -50,8 +50,12 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
#endif
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
__min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
__min_vector_width__(128)))
/* Arithmetic */
/// Adds two 256-bit vectors of [4 x double].
@ -3563,7 +3567,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
/// \param __b
/// A 256-bit integer vector containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_si256(__m256i *__a, __m256i __b)
_mm256_stream_si256(void *__a, __m256i __b)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
@ -3583,7 +3587,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x double] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_pd(double *__a, __m256d __b)
_mm256_stream_pd(void *__a, __m256d __b)
{
typedef __v4df __v4df_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
@ -3604,7 +3608,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
/// \param __a
/// A 256-bit vector of [8 x float] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_ps(float *__p, __m256 __a)
_mm256_stream_ps(void *__p, __m256 __a)
{
typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);

View File

@ -19,18 +19,17 @@
to use it as a potentially faster version of BSF. */
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#define _tzcnt_u16(a) (__tzcnt_u16((a)))
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 16-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see _tzcnt_u16
static __inline__ unsigned short __RELAXED_FN_ATTRS
__tzcnt_u16(unsigned short __X)
{
@ -41,13 +40,30 @@ __tzcnt_u16(unsigned short __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// \code
/// unsigned short _tzcnt_u16(unsigned short __X);
/// \endcode
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 16-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see __tzcnt_u16
#define _tzcnt_u16 __tzcnt_u16
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see _mm_tzcnt_32
/// \see { _mm_tzcnt_32 _tzcnt_u32 }
static __inline__ unsigned int __RELAXED_FN_ATTRS
__tzcnt_u32(unsigned int __X)
{
@ -58,20 +74,35 @@ __tzcnt_u32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An 32-bit integer containing the number of trailing zero bits in
/// \returns A 32-bit integer containing the number of trailing zero bits in
/// the operand.
/// \see __tzcnt_u32
/// \see { __tzcnt_u32 _tzcnt_u32 }
static __inline__ int __RELAXED_FN_ATTRS
_mm_tzcnt_32(unsigned int __X)
{
return (int)__builtin_ia32_tzcnt_u32(__X);
}
#define _tzcnt_u32(a) (__tzcnt_u32((a)))
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned int _tzcnt_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see { _mm_tzcnt_32 __tzcnt_u32 }
#define _tzcnt_u32 __tzcnt_u32
#ifdef __x86_64__
@ -79,13 +110,13 @@ _mm_tzcnt_32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see _mm_tzcnt_64
/// \see { _mm_tzcnt_64 _tzcnt_u64 }
static __inline__ unsigned long long __RELAXED_FN_ATTRS
__tzcnt_u64(unsigned long long __X)
{
@ -96,20 +127,35 @@ __tzcnt_u64(unsigned long long __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An 64-bit integer containing the number of trailing zero bits in
/// the operand.
/// \see __tzcnt_u64
/// \see { __tzcnt_u64 _tzcnt_u64 }
static __inline__ long long __RELAXED_FN_ATTRS
_mm_tzcnt_64(unsigned long long __X)
{
return (long long)__builtin_ia32_tzcnt_u64(__X);
}
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned long long _tzcnt_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c TZCNT instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of trailing zero
/// bits in the operand.
/// \see { _mm_tzcnt_64 __tzcnt_u64
#define _tzcnt_u64 __tzcnt_u64
#endif /* __x86_64__ */
@ -121,21 +167,12 @@ _mm_tzcnt_64(unsigned long long __X)
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
#define _andn_u32(a, b) (__andn_u32((a), (b)))
/* _bextr_u32 != __bextr_u32 */
#define _blsi_u32(a) (__blsi_u32((a)))
#define _blsmsk_u32(a) (__blsmsk_u32((a)))
#define _blsr_u32(a) (__blsr_u32((a)))
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned integer containing one of the operands.
@ -143,19 +180,40 @@ _mm_tzcnt_64(unsigned long long __X)
/// An unsigned integer containing one of the operands.
/// \returns An unsigned integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see _andn_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__andn_u32(unsigned int __X, unsigned int __Y)
{
return ~__X & __Y;
}
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
/// \endcode
///
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned integer containing one of the operands.
/// \param __Y
/// An unsigned integer containing one of the operands.
/// \returns An unsigned integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see __andn_u32
#define _andn_u32 __andn_u32
/* AMD-specified, double-leading-underscore version of BEXTR */
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@ -178,7 +236,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@ -203,7 +261,7 @@ _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be extracted.
@ -224,33 +282,89 @@ _bextr2_u32(unsigned int __X, unsigned int __Y) {
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be cleared.
/// \returns An unsigned integer containing the result of clearing the bits from
/// the source operand.
/// \see _blsi_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsi_u32(unsigned int __X)
{
return __X & -__X;
}
/// Clears all bits in the source except for the least significant bit
/// containing a value of 1 and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned int _blsi_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned integer whose bits are to be cleared.
/// \returns An unsigned integer containing the result of clearing the bits from
/// the source operand.
/// \see __blsi_u32
#define _blsi_u32 __blsi_u32
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned integer used to create the mask.
/// \returns An unsigned integer containing the newly created mask.
/// \see _blsmsk_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsmsk_u32(unsigned int __X)
{
return __X ^ (__X - 1);
}
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
/// \code
/// unsigned int _blsmsk_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned integer used to create the mask.
/// \returns An unsigned integer containing the newly created mask.
/// \see __blsmsk_u32
#define _blsmsk_u32 __blsmsk_u32
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned integer containing the operand to be cleared.
/// \returns An unsigned integer containing the result of clearing the source
/// operand.
/// \see _blsr_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsmsk_u32(unsigned int __X)
__blsr_u32(unsigned int __X)
{
return __X ^ (__X - 1);
return __X & (__X - 1);
}
/// Clears the least significant bit that is set to 1 in the source
@ -258,35 +372,27 @@ __blsmsk_u32(unsigned int __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
/// \code
/// unsigned int _bls4_u32(unsigned int __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned integer containing the operand to be cleared.
/// \returns An unsigned integer containing the result of clearing the source
/// operand.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsr_u32(unsigned int __X)
{
return __X & (__X - 1);
}
/// \see __blsr_u32
#define _blsr_u32 __blsr_u32
#ifdef __x86_64__
#define _andn_u64(a, b) (__andn_u64((a), (b)))
/* _bextr_u64 != __bextr_u64 */
#define _blsi_u64(a) (__blsi_u64((a)))
#define _blsmsk_u64(a) (__blsmsk_u64((a)))
#define _blsr_u64(a) (__blsr_u64((a)))
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing one of the operands.
@ -294,19 +400,41 @@ __blsr_u32(unsigned int __X)
/// An unsigned 64-bit integer containing one of the operands.
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see _andn_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__andn_u64 (unsigned long long __X, unsigned long long __Y)
{
return ~__X & __Y;
}
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned long long _andn_u64(unsigned long long __X,
/// unsigned long long __Y);
/// \endcode
///
/// This intrinsic corresponds to the \c ANDN instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing one of the operands.
/// \param __Y
/// An unsigned 64-bit integer containing one of the operands.
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
/// operand with the one's complement of the first operand.
/// \see __andn_u64
#define _andn_u64 __andn_u64
/* AMD-specified, double-leading-underscore version of BEXTR */
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@ -329,7 +457,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@ -354,7 +482,7 @@ _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
/// This intrinsic corresponds to the \c BEXTR instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be extracted.
@ -375,33 +503,89 @@ _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// bits from the source operand.
/// \see _blsi_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsi_u64(unsigned long long __X)
{
return __X & -__X;
}
/// Clears all bits in the source except for the least significant bit
/// containing a value of 1 and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// unsigned long long _blsi_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSI instruction.
///
/// \param __X
/// An unsigned 64-bit integer whose bits are to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// bits from the source operand.
/// \see __blsi_u64
#define _blsi_u64 __blsi_u64
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned 64-bit integer used to create the mask.
/// \returns An unsigned 64-bit integer containing the newly created mask.
/// \see _blsmsk_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsmsk_u64(unsigned long long __X)
{
return __X ^ (__X - 1);
}
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
/// \code
/// unsigned long long _blsmsk_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSMSK instruction.
///
/// \param __X
/// An unsigned 64-bit integer used to create the mask.
/// \returns An unsigned 64-bit integer containing the newly created mask.
/// \see __blsmsk_u64
#define _blsmsk_u64 __blsmsk_u64
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing the operand to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// source operand.
/// \see _blsr_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsmsk_u64(unsigned long long __X)
__blsr_u64(unsigned long long __X)
{
return __X ^ (__X - 1);
return __X & (__X - 1);
}
/// Clears the least significant bit that is set to 1 in the source
@ -409,17 +593,18 @@ __blsmsk_u64(unsigned long long __X)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
/// \code
/// unsigned long long _blsr_u64(unsigned long long __X);
/// \endcode
///
/// This intrinsic corresponds to the \c BLSR instruction.
///
/// \param __X
/// An unsigned 64-bit integer containing the operand to be cleared.
/// \returns An unsigned 64-bit integer containing the result of clearing the
/// source operand.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsr_u64(unsigned long long __X)
{
return __X & (__X - 1);
}
/// \see __blsr_u64
#define _blsr_u64 __blsr_u64
#endif /* __x86_64__ */

View File

@ -0,0 +1,9 @@
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
// `__attribute((__noinline__))`. In order to avoid compilation error,
// temporarily unset __noinline__ when we include affected libstdc++ header.
#pragma push_macro("__noinline__")
#undef __noinline__
#include_next "bits/basic_string.h"
#pragma pop_macro("__noinline__")

View File

@ -0,0 +1,9 @@
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
// `__attribute((__noinline__))`. In order to avoid compilation error,
// temporarily unset __noinline__ when we include affected libstdc++ header.
#pragma push_macro("__noinline__")
#undef __noinline__
#include_next "bits/basic_string.tcc"
#pragma pop_macro("__noinline__")

View File

@ -50,11 +50,11 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
__min_vector_width__(128)))
__attribute__((__always_inline__, __nodebug__, \
__target__("sse2,no-evex512"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_MMX \
__attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \
__min_vector_width__(64)))
__attribute__((__always_inline__, __nodebug__, \
__target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
/// Adds lower double-precision values in both operands and returns the
/// sum in the lower 64 bits of the result. The upper 64 bits of the result
@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A vector of [2 x double] containing the 64-bit values to be stored.
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
__m128d __a) {
__builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
}
@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A 128-bit integer vector containing the values to be stored.
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
__m128i __a) {
__builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
}
@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
/// A 32-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
_mm_stream_si32(int *__p, int __a) {
__builtin_ia32_movnti(__p, __a);
_mm_stream_si32(void *__p, int __a) {
__builtin_ia32_movnti((int *)__p, __a);
}
#ifdef __x86_64__
@ -4003,8 +4003,8 @@ static __inline__ void
/// A 64-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
_mm_stream_si64(long long *__p, long long __a) {
__builtin_ia32_movnti64(__p, __a);
_mm_stream_si64(void *__p, long long __a) {
__builtin_ia32_movnti64((long long *)__p, __a);
}
#endif

View File

@ -15,19 +15,36 @@
#define __GFNIINTRIN_H
/* Default attributes for simple form (no masking). */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("gfni,no-evex512"), __min_vector_width__(128)))
/* Default attributes for YMM unmasked form. */
#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS_Y \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx,gfni,no-evex512"), \
__min_vector_width__(256)))
/* Default attributes for ZMM unmasked forms. */
#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS_Z \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,evex512,gfni"), \
__min_vector_width__(512)))
/* Default attributes for ZMM masked forms. */
#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS_Z_MASK \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,evex512,gfni"), \
__min_vector_width__(512)))
/* Default attributes for VLX masked forms. */
#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS_VL128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
__min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_VL256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
__min_vector_width__(256)))
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \

File diff suppressed because it is too large Load Diff

View File

@ -291,11 +291,13 @@
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPID__)
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
///
/// \returns The 32-bit contents of the MSR.
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void) {
return __builtin_ia32_rdpid();
@ -488,6 +490,15 @@ _writegsbase_u64(unsigned long long __V)
* field inside of it.
*/
/// Load a 16-bit value from memory and swap its bytes.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the 16-bit value to load.
/// \returns The byte-swapped value.
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P) {
struct __loadu_i16 {
@ -496,6 +507,16 @@ _loadbe_i16(void const * __P) {
return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
}
/// Swap the bytes of a 16-bit value and store it to memory.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the memory for storing the swapped value.
/// \param __D
/// The 16-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P, short __D) {
struct __storeu_i16 {
@ -504,6 +525,15 @@ _storebe_i16(void * __P, short __D) {
((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
}
/// Load a 32-bit value from memory and swap its bytes.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the 32-bit value to load.
/// \returns The byte-swapped value.
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P) {
struct __loadu_i32 {
@ -512,6 +542,16 @@ _loadbe_i32(void const * __P) {
return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
}
/// Swap the bytes of a 32-bit value and store it to memory.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the memory for storing the swapped value.
/// \param __D
/// The 32-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P, int __D) {
struct __storeu_i32 {
@ -521,6 +561,15 @@ _storebe_i32(void * __P, int __D) {
}
#ifdef __x86_64__
/// Load a 64-bit value from memory and swap its bytes.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the 64-bit value to load.
/// \returns The byte-swapped value.
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P) {
struct __loadu_i64 {
@ -529,6 +578,16 @@ _loadbe_i64(void const * __P) {
return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
}
/// Swap the bytes of a 64-bit value and store it to memory.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the MOVBE instruction.
///
/// \param __P
/// A pointer to the memory for storing the swapped value.
/// \param __D
/// The 64-bit value to be byte-swapped.
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P, long long __D) {
struct __storeu_i64 {
@ -578,9 +637,13 @@ _storebe_i64(void * __P, long long __D) {
#include <cetintrin.h>
#endif
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
* whereas others are also available at all times. */
/* Intrinsics inside adcintrin.h are available at all times. */
#include <adcintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__ADX__)
#include <adxintrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDSEED__)

16
lib/include/intrin.h vendored
View File

@ -572,6 +572,22 @@ unsigned char __readx18byte(unsigned long offset);
unsigned short __readx18word(unsigned long offset);
unsigned long __readx18dword(unsigned long offset);
unsigned __int64 __readx18qword(unsigned long offset);
double _CopyDoubleFromInt64(__int64);
float _CopyFloatFromInt32(__int32);
__int32 _CopyInt32FromFloat(float);
__int64 _CopyInt64FromDouble(double);
unsigned int _CountLeadingOnes(unsigned long);
unsigned int _CountLeadingOnes64(unsigned __int64);
unsigned int _CountLeadingSigns(long);
unsigned int _CountLeadingSigns64(__int64);
unsigned int _CountLeadingZeros(unsigned long);
unsigned int _CountLeadingZeros64(unsigned _int64);
unsigned int _CountOneBits(unsigned long);
unsigned int _CountOneBits64(unsigned __int64);
void __cdecl __prefetch(void *);
#endif
/*----------------------------------------------------------------------------*\

View File

@ -156,7 +156,7 @@ extern __inline unsigned char
return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
}
extern __inline unsigned char
extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__iocsrrd_h(unsigned int _1) {
return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
@ -228,6 +228,18 @@ extern __inline void
((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2)))
#endif
#define __frecipe_s(/*float*/ _1) \
(float)__builtin_loongarch_frecipe_s((float)_1)
#define __frecipe_d(/*double*/ _1) \
(double)__builtin_loongarch_frecipe_d((double)_1)
#define __frsqrte_s(/*float*/ _1) \
(float)__builtin_loongarch_frsqrte_s((float)_1)
#define __frsqrte_d(/*double*/ _1) \
(double)__builtin_loongarch_frsqrte_d((double)_1)
#ifdef __cplusplus
}
#endif

3884
lib/include/lasxintrin.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -66,10 +66,8 @@
#define CHAR_BIT __CHAR_BIT__
/* C2x 5.2.4.2.1 */
/* FIXME: This is using the placeholder dates Clang produces for these macros
in C2x mode; switch to the correct values once they've been published. */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
/* C23 5.2.4.2.1 */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define BOOL_WIDTH __BOOL_WIDTH__
#define CHAR_WIDTH CHAR_BIT
#define SCHAR_WIDTH CHAR_BIT

34
lib/include/llvm_libc_wrappers/assert.h vendored Normal file
View File

@ -0,0 +1,34 @@
//===-- Wrapper for C standard assert.h declarations on the GPU ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
#error "This file is for GPU offloading compilation only"
#endif
#include_next <assert.h>
#if __has_include(<llvm-libc-decls/assert.h>)
#if defined(__HIP__) || defined(__CUDA__)
#define __LIBC_ATTRS __attribute__((device))
#endif
#pragma omp begin declare target
#include <llvm-libc-decls/assert.h>
#pragma omp end declare target
#undef __LIBC_ATTRS
#endif
#endif // __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__

View File

@ -13,8 +13,19 @@
#error "This file is for GPU offloading compilation only"
#endif
// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is
// necessary to prevent it from doing that and remapping our implementation.
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
#pragma push_macro("__USE_EXTERN_INLINES")
#undef __USE_EXTERN_INLINES
#endif
#include_next <ctype.h>
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
#pragma pop_macro("__USE_EXTERN_INLINES")
#endif
#if __has_include(<llvm-libc-decls/ctype.h>)
#if defined(__HIP__) || defined(__CUDA__)
@ -26,6 +37,7 @@
#pragma push_macro("isalnum")
#pragma push_macro("isalpha")
#pragma push_macro("isascii")
#pragma push_macro("isblank")
#pragma push_macro("iscntrl")
#pragma push_macro("isdigit")
@ -36,11 +48,13 @@
#pragma push_macro("isspace")
#pragma push_macro("isupper")
#pragma push_macro("isxdigit")
#pragma push_macro("toascii")
#pragma push_macro("tolower")
#pragma push_macro("toupper")
#undef isalnum
#undef isalpha
#undef isascii
#undef iscntrl
#undef isdigit
#undef islower
@ -51,6 +65,7 @@
#undef isupper
#undef isblank
#undef isxdigit
#undef toascii
#undef tolower
#undef toupper
@ -64,6 +79,7 @@
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
#pragma pop_macro("isalnum")
#pragma pop_macro("isalpha")
#pragma pop_macro("isascii")
#pragma pop_macro("isblank")
#pragma pop_macro("iscntrl")
#pragma pop_macro("isdigit")
@ -74,6 +90,7 @@
#pragma pop_macro("isspace")
#pragma pop_macro("isupper")
#pragma pop_macro("isxdigit")
#pragma pop_macro("toascii")
#pragma pop_macro("tolower")
#pragma pop_macro("toupper")
#endif

View File

@ -6,21 +6,58 @@
//
//===----------------------------------------------------------------------===//
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
#error "This file is for GPU offloading compilation only"
#endif
#include_next <stdio.h>
// In some old versions of glibc, other standard headers sometimes define
// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h
// to produce special definitions. Future includes of stdio.h when those
// special macros are undefined are expected to produce the normal definitions
// from stdio.h.
//
// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__)
// unconditionally to the above include_next. Otherwise, after an occurrence of
// the first glibc stdio.h use case described above, the include_next would be
// skipped for remaining includes of stdio.h, leaving required symbols
// undefined.
//
// We make the following assumptions to handle all use cases:
//
// 1. If the above include_next produces special glibc definitions, then (a) it
// does not produce the normal definitions that we must intercept below, (b)
// the current file was included from a glibc header that already defined
// __GLIBC__ (usually by including glibc's <features.h>), and (c) the above
// include_next does not define _STDIO_H. In that case, we skip the rest of
// the current file and don't guard against future includes.
// 2. If the above include_next produces the normal stdio.h definitions, then
// either (a) __GLIBC__ is not defined because C headers are from some other
// libc implementation or (b) the above include_next defines _STDIO_H to
// prevent the above include_next from having any effect in the future.
#if !defined(__GLIBC__) || defined(_STDIO_H)
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#if __has_include(<llvm-libc-decls/stdio.h>)
#if defined(__HIP__) || defined(__CUDA__)
#define __LIBC_ATTRS __attribute__((device))
#endif
// Some headers provide these as macros. Temporarily undefine them so they do
// not conflict with any definitions for the GPU.
#pragma push_macro("stdout")
#pragma push_macro("stdin")
#pragma push_macro("stderr")
#undef stdout
#undef stderr
#undef stdin
#pragma omp begin declare target
#include <llvm-libc-decls/stdio.h>
@ -29,6 +66,15 @@
#undef __LIBC_ATTRS
// Restore the original macros when compiling on the host.
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
#pragma pop_macro("stdout")
#pragma pop_macro("stderr")
#pragma pop_macro("stdin")
#endif
#endif
#endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
#endif

View File

@ -23,8 +23,11 @@
#pragma omp begin declare target
// The LLVM C library uses this type so we forward declare it.
// The LLVM C library uses these named types so we forward declare them.
typedef void (*__atexithandler_t)(void);
typedef int (*__bsearchcompare_t)(const void *, const void *);
typedef int (*__qsortcompare_t)(const void *, const void *);
typedef int (*__qsortrcompare_t)(const void *, const void *, void *);
// Enforce ABI compatibility with the structs used by the LLVM C library.
_Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!");

View File

@ -13,9 +13,6 @@
#error "This file is for GPU offloading compilation only"
#endif
// FIXME: The GNU headers provide C++ standard compliant headers when in C++
// mode and the LLVM libc does not. We cannot enable memchr, strchr, strchrnul,
// strpbrk, strrchr, strstr, or strcasestr until this is addressed.
#include_next <string.h>
#if __has_include(<llvm-libc-decls/string.h>)
@ -26,8 +23,70 @@
#pragma omp begin declare target
// The GNU headers provide C++ standard compliant headers when in C++ mode and
// the LLVM libc does not. We need to manually provide the definitions using the
// same prototypes.
#if defined(__cplusplus) && defined(__GLIBC__) && \
defined(__CORRECT_ISO_CPP_STRING_H_PROTO)
#ifndef __LIBC_ATTRS
#define __LIBC_ATTRS
#endif
extern "C" {
void *memccpy(void *__restrict, const void *__restrict, int,
size_t) __LIBC_ATTRS;
int memcmp(const void *, const void *, size_t) __LIBC_ATTRS;
void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS;
void *memmove(void *, const void *, size_t) __LIBC_ATTRS;
void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
void *memset(void *, int, size_t) __LIBC_ATTRS;
char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS;
int strcmp(const char *, const char *) __LIBC_ATTRS;
int strcoll(const char *, const char *) __LIBC_ATTRS;
char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
size_t strcspn(const char *, const char *) __LIBC_ATTRS;
char *strdup(const char *) __LIBC_ATTRS;
size_t strlen(const char *) __LIBC_ATTRS;
char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
int strncmp(const char *, const char *, size_t) __LIBC_ATTRS;
char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
char *strndup(const char *, size_t) __LIBC_ATTRS;
size_t strnlen(const char *, size_t) __LIBC_ATTRS;
size_t strspn(const char *, const char *) __LIBC_ATTRS;
char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS;
char *strtok_r(char *__restrict, const char *__restrict,
char **__restrict) __LIBC_ATTRS;
size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
}
extern "C++" {
char *strstr(char *, const char *) noexcept __LIBC_ATTRS;
const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS;
char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS;
const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS;
char *strrchr(char *, int) noexcept __LIBC_ATTRS;
const char *strrchr(const char *, int) noexcept __LIBC_ATTRS;
char *strchr(char *, int) noexcept __LIBC_ATTRS;
const char *strchr(const char *, int) noexcept __LIBC_ATTRS;
char *strchrnul(char *, int) noexcept __LIBC_ATTRS;
const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS;
char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS;
const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS;
void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
}
#else
#include <llvm-libc-decls/string.h>
#endif
#pragma omp end declare target
#undef __LIBC_ATTRS

34
lib/include/llvm_libc_wrappers/time.h vendored Normal file
View File

@ -0,0 +1,34 @@
//===-- Wrapper for C standard time.h declarations on the GPU -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
#define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
#error "This file is for GPU offloading compilation only"
#endif
#include_next <time.h>
#if __has_include(<llvm-libc-decls/time.h>)
#if defined(__HIP__) || defined(__CUDA__)
#define __LIBC_ATTRS __attribute__((device))
#endif
#pragma omp begin declare target
_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!");
#include <llvm-libc-decls/time.h>
#pragma omp end declare target
#endif
#endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__

3750
lib/include/lsxintrin.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,9 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
__min_vector_width__(64)))
/// Clears the MMX state by setting the state of the x87 stack registers
/// to empty.
@ -31,10 +33,10 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
///
/// This intrinsic corresponds to the <c> EMMS </c> instruction.
///
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
_mm_empty(void)
{
__builtin_ia32_emms();
static __inline__ void __attribute__((__always_inline__, __nodebug__,
__target__("mmx,no-evex512")))
_mm_empty(void) {
__builtin_ia32_emms();
}
/// Constructs a 64-bit integer vector, setting the lower 32 bits to the

View File

@ -153,10 +153,163 @@ module _Builtin_intrinsics [system] [extern_c] {
}
}
module _Builtin_stddef_max_align_t [system] [extern_c] {
header "__stddef_max_align_t.h"
// Start -fbuiltin-headers-in-system-modules affected modules
// The following modules all ignore their headers when
// -fbuiltin-headers-in-system-modules is passed, and many of
// those headers join system modules when present.
// e.g. if -fbuiltin-headers-in-system-modules is passed, then
// float.h will not be in the _Builtin_float module (that module
// will be empty). If there is a system module that declares
// `header "float.h"`, then the builtin float.h will join
// that module. The system float.h (if present) will be treated
// as a textual header in the sytem module.
module _Builtin_float [system] {
header "float.h"
export *
}
module _Builtin_inttypes [system] {
header "inttypes.h"
export *
}
module _Builtin_iso646 [system] {
header "iso646.h"
export *
}
module _Builtin_limits [system] {
header "limits.h"
export *
}
module _Builtin_stdalign [system] {
header "stdalign.h"
export *
}
module _Builtin_stdarg [system] {
textual header "stdarg.h"
explicit module __gnuc_va_list {
header "__stdarg___gnuc_va_list.h"
export *
}
explicit module __va_copy {
header "__stdarg___va_copy.h"
export *
}
explicit module va_arg {
header "__stdarg_va_arg.h"
export *
}
explicit module va_copy {
header "__stdarg_va_copy.h"
export *
}
explicit module va_list {
header "__stdarg_va_list.h"
export *
}
}
module _Builtin_stdatomic [system] {
header "stdatomic.h"
export *
}
module _Builtin_stdbool [system] {
header "stdbool.h"
export *
}
module _Builtin_stddef [system] {
textual header "stddef.h"
// __stddef_max_align_t.h is always in this module, even if
// -fbuiltin-headers-in-system-modules is passed.
explicit module max_align_t {
header "__stddef_max_align_t.h"
export *
}
explicit module null {
header "__stddef_null.h"
export *
}
explicit module nullptr_t {
header "__stddef_nullptr_t.h"
export *
}
explicit module offsetof {
header "__stddef_offsetof.h"
export *
}
explicit module ptrdiff_t {
header "__stddef_ptrdiff_t.h"
export *
}
explicit module rsize_t {
header "__stddef_rsize_t.h"
export *
}
explicit module size_t {
header "__stddef_size_t.h"
export *
}
explicit module unreachable {
header "__stddef_unreachable.h"
export *
}
explicit module wchar_t {
header "__stddef_wchar_t.h"
export *
}
}
// wint_t is provided by <wchar.h> and not <stddef.h>. It's here
// for compatibility, but must be explicitly requested. Therefore
// __stddef_wint_t.h is not part of _Builtin_stddef. It is always in
// this module even if -fbuiltin-headers-in-system-modules is passed.
module _Builtin_stddef_wint_t [system] {
header "__stddef_wint_t.h"
export *
}
module _Builtin_stdint [system] {
header "stdint.h"
export *
}
module _Builtin_stdnoreturn [system] {
header "stdnoreturn.h"
export *
}
module _Builtin_tgmath [system] {
header "tgmath.h"
export *
}
module _Builtin_unwind [system] {
header "unwind.h"
export *
}
// End -fbuiltin-headers-in-system-modules affected modules
module opencl_c {
requires opencl
header "opencl-c.h"

View File

@ -45,6 +45,7 @@
#define __opencl_c_ext_fp32_local_atomic_add 1
#define __opencl_c_ext_fp32_global_atomic_min_max 1
#define __opencl_c_ext_fp32_local_atomic_min_max 1
#define __opencl_c_ext_image_raw10_raw12 1
#endif // defined(__SPIR__) || defined(__SPIRV__)
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
@ -477,6 +478,10 @@ typedef enum memory_order
#if __OPENCL_C_VERSION__ >= CL_VERSION_3_0
#define CLK_UNORM_INT_101010_2 0x10E0
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0
#ifdef __opencl_c_ext_image_raw10_raw12
#define CLK_UNSIGNED_INT_RAW10_EXT 0x10E3
#define CLK_UNSIGNED_INT_RAW12_EXT 0x10E4
#endif // __opencl_c_ext_image_raw10_raw12
// Channel order, numbering must be aligned with cl_channel_order in cl.h
//

View File

@ -1,4 +1,4 @@
/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -===
/*===-- __clang_openmp_device_functions.h - OpenMP math declares -*- c++ -*-===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.

View File

@ -17,8 +17,9 @@
#include <emmintrin.h>
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("sse3,no-evex512"), __min_vector_width__(128)))
/// Loads data from an unaligned memory location to elements in a 128-bit
/// vector.

Some files were not shown because too many files have changed in this diff Show More