mirror of
https://github.com/ziglang/zig.git
synced 2026-02-09 11:03:30 +00:00
Merge branch 'llvm18'
Upgrades the LLVM, Clang, and LLD dependencies to LLVM 18.x Related to #16270
This commit is contained in:
commit
bcb534c295
1
.github/workflows/ci.yaml
vendored
1
.github/workflows/ci.yaml
vendored
@ -4,6 +4,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- llvm18
|
||||
concurrency:
|
||||
# Cancels pending runs when a PR gets updated.
|
||||
group: ${{ github.head_ref || github.run_id }}-${{ github.actor }}
|
||||
|
||||
@ -140,9 +140,9 @@ else()
|
||||
set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig")
|
||||
endif()
|
||||
|
||||
find_package(llvm 17)
|
||||
find_package(clang 17)
|
||||
find_package(lld 17)
|
||||
find_package(llvm 18)
|
||||
find_package(clang 18)
|
||||
find_package(lld 18)
|
||||
|
||||
if(ZIG_STATIC_ZLIB)
|
||||
if (MSVC)
|
||||
@ -526,42 +526,54 @@ set(ZIG_STAGE2_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/lib/std/zig/system/x86.zig"
|
||||
"${CMAKE_SOURCE_DIR}/lib/std/zig/tokenizer.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Air.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Builtin.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Compilation.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Compilation/Config.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/DarwinPosixSpawn.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/InternPool.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Liveness.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Liveness/Verify.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Module.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Package.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Package/Fetch.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Package/Fetch/git.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Package/Manifest.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Package/Module.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/RangeSet.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Sema.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Sema/bitcast.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Sema/comptime_ptr_access.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/Value.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/CodeGen.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/Emit.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/Mir.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/abi.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/aarch64/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/arm/CodeGen.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/arm/Emit.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/arm/Mir.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/arm/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/arm/abi.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/arm/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/CodeGen.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/Emit.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/Mir.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/abi.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/riscv64/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/CodeGen.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/Emit.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/Mir.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/abi.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/sparc64/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/wasm/CodeGen.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/wasm/Emit.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/wasm/Mir.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/wasm/abi.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86/bits.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/CodeGen.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Disassembler.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Emit.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Encoding.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Lower.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/Mir.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/abi.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/arch/x86_64/bits.zig"
|
||||
@ -574,7 +586,17 @@ set(ZIG_STAGE2_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/c.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/c/Type.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/llvm.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/BitcodeReader.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/Builder.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/bindings.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/bitcode_writer.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/llvm/ir.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/spirv.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/Assembler.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/Module.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/Section.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen/spirv/spec.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/crash_report.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/glibc.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/introspect.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/libcxx.zig"
|
||||
@ -586,7 +608,9 @@ set(ZIG_STAGE2_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Coff/Atom.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Coff/ImportTable.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Coff/Object.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Coff/Relocation.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Coff/lld.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Dwarf.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/Archive.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/Atom.zig"
|
||||
@ -599,6 +623,7 @@ set(ZIG_STAGE2_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/eh_frame.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/file.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/gc.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/merge_section.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/relocatable.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/relocation.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Elf/synthetic_sections.zig"
|
||||
@ -617,9 +642,9 @@ set(ZIG_STAGE2_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/UnwindInfo.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/ZigObject.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Rebase.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Trie.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/eh_frame.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/file.zig"
|
||||
@ -629,15 +654,32 @@ set(ZIG_STAGE2_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/synthetic.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/NvPtx.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Plan9.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/SpirV.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/SpirV/BinaryModule.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/SpirV/deduplicate.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/SpirV/lower_invocation_globals.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/SpirV/prune_unused.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/StringTable.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Archive.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Atom.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Object.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm/Symbol.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm/ZigObject.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm/file.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/Wasm/types.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/aarch64.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/riscv.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/table_section.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/tapi.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse/test.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/tapi/yaml.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/link/tapi/yaml/test.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/main.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/mingw.zig"
|
||||
"${CMAKE_SOURCE_DIR}/src/musl.zig"
|
||||
@ -685,7 +727,7 @@ if(MSVC)
|
||||
set(EXE_LDFLAGS "${EXE_LDFLAGS} /debug:fastlink")
|
||||
endif()
|
||||
else()
|
||||
set(EXE_CXX_FLAGS "-std=c++17 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -Werror=type-limits -Wno-missing-braces -Wno-comment")
|
||||
set(EXE_CXX_FLAGS "-std=c++17 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -Wno-type-limits -Wno-missing-braces -Wno-comment")
|
||||
set(EXE_LDFLAGS " ")
|
||||
if(MINGW)
|
||||
set(EXE_CXX_FLAGS "${EXE_CXX_FLAGS} -Wno-format")
|
||||
@ -866,9 +908,9 @@ target_include_directories(zig2 PUBLIC "${CMAKE_SOURCE_DIR}/stage1")
|
||||
target_link_libraries(zig2 LINK_PUBLIC zigcpp)
|
||||
|
||||
if(MSVC)
|
||||
target_link_libraries(zig2 LINK_PUBLIC ntdll.lib)
|
||||
target_link_libraries(zig2 LINK_PUBLIC ntdll.lib ws2_32.lib)
|
||||
elseif(MINGW)
|
||||
target_link_libraries(zig2 LINK_PUBLIC ntdll)
|
||||
target_link_libraries(zig2 LINK_PUBLIC ntdll ws2_32)
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
|
||||
12
build.zig
12
build.zig
@ -334,6 +334,9 @@ pub fn build(b: *std.Build) !void {
|
||||
}
|
||||
if (target.result.os.tag == .windows) {
|
||||
inline for (.{ exe, check_case_exe }) |artifact| {
|
||||
// LLVM depends on networking as of version 18.
|
||||
artifact.linkSystemLibrary("ws2_32");
|
||||
|
||||
artifact.linkSystemLibrary("version");
|
||||
artifact.linkSystemLibrary("uuid");
|
||||
artifact.linkSystemLibrary("ole32");
|
||||
@ -650,7 +653,7 @@ const exe_cflags = [_][]const u8{
|
||||
"-fvisibility-inlines-hidden",
|
||||
"-fno-exceptions",
|
||||
"-fno-rtti",
|
||||
"-Werror=type-limits",
|
||||
"-Wno-type-limits",
|
||||
"-Wno-missing-braces",
|
||||
"-Wno-comment",
|
||||
};
|
||||
@ -1039,6 +1042,7 @@ const clang_libs = [_][]const u8{
|
||||
"clangAST",
|
||||
"clangParse",
|
||||
"clangSema",
|
||||
"clangAPINotes",
|
||||
"clangBasic",
|
||||
"clangEdit",
|
||||
"clangLex",
|
||||
@ -1068,6 +1072,7 @@ const llvm_libs = [_][]const u8{
|
||||
"LLVMXRay",
|
||||
"LLVMLibDriver",
|
||||
"LLVMDlltoolDriver",
|
||||
"LLVMTextAPIBinaryReader",
|
||||
"LLVMCoverage",
|
||||
"LLVMLineEditor",
|
||||
"LLVMXCoreDisassembler",
|
||||
@ -1169,6 +1174,7 @@ const llvm_libs = [_][]const u8{
|
||||
"LLVMAArch64Desc",
|
||||
"LLVMAArch64Utils",
|
||||
"LLVMAArch64Info",
|
||||
"LLVMOrcDebugging",
|
||||
"LLVMOrcJIT",
|
||||
"LLVMWindowsDriver",
|
||||
"LLVMMCJIT",
|
||||
@ -1188,6 +1194,7 @@ const llvm_libs = [_][]const u8{
|
||||
"LLVMMCDisassembler",
|
||||
"LLVMLTO",
|
||||
"LLVMPasses",
|
||||
"LLVMHipStdPar",
|
||||
"LLVMCFGuard",
|
||||
"LLVMCoroutines",
|
||||
"LLVMipo",
|
||||
@ -1195,10 +1202,13 @@ const llvm_libs = [_][]const u8{
|
||||
"LLVMLinker",
|
||||
"LLVMInstrumentation",
|
||||
"LLVMFrontendOpenMP",
|
||||
"LLVMFrontendOffloading",
|
||||
"LLVMFrontendOpenACC",
|
||||
"LLVMFrontendHLSL",
|
||||
"LLVMFrontendDriver",
|
||||
"LLVMExtensions",
|
||||
"LLVMDWARFLinkerParallel",
|
||||
"LLVMDWARFLinkerClassic",
|
||||
"LLVMDWARFLinker",
|
||||
"LLVMGlobalISel",
|
||||
"LLVMMIRParser",
|
||||
|
||||
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
||||
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
||||
@ -9,10 +9,16 @@ set -e
|
||||
ZIGDIR="$PWD"
|
||||
TARGET="$ARCH-macos-none"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.467+0345d7866"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
if [ ! -d "$PREFIX" ]; then
|
||||
cd $HOME
|
||||
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
|
||||
tar xf "$CACHE_BASENAME.tar.xz"
|
||||
fi
|
||||
|
||||
cd $ZIGDIR
|
||||
|
||||
# Make the `zig version` number consistent.
|
||||
|
||||
@ -9,10 +9,16 @@ set -e
|
||||
ZIGDIR="$PWD"
|
||||
TARGET="$ARCH-macos-none"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.467+0345d7866"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
if [ ! -d "$PREFIX" ]; then
|
||||
cd $HOME
|
||||
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
|
||||
tar xf "$CACHE_BASENAME.tar.xz"
|
||||
fi
|
||||
|
||||
cd $ZIGDIR
|
||||
|
||||
# Make the `zig version` number consistent.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
$TARGET = "$($Env:ARCH)-windows-gnu"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.12.0-dev.2087+e9a18010b"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
$MCPU = "baseline"
|
||||
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
|
||||
$PREFIX_PATH = "$(Get-Location)\..\$ZIG_LLVM_CLANG_LLD_NAME"
|
||||
|
||||
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
||||
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.203+d3bc1cfc4"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
||||
@ -6,18 +6,17 @@ set -e
|
||||
ZIGDIR="$PWD"
|
||||
TARGET="$ARCH-macos-none"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.12.0-dev.467+0345d7866"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
JOBS="-j3"
|
||||
|
||||
rm -rf $PREFIX
|
||||
cd $HOME
|
||||
|
||||
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
|
||||
tar xf "$CACHE_BASENAME.tar.xz"
|
||||
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
if [ ! -d "$PREFIX" ]; then
|
||||
cd $HOME
|
||||
curl -L -O "https://ziglang.org/deps/$CACHE_BASENAME.tar.xz"
|
||||
tar xf "$CACHE_BASENAME.tar.xz"
|
||||
fi
|
||||
|
||||
cd $ZIGDIR
|
||||
|
||||
# Make the `zig version` number consistent.
|
||||
|
||||
@ -1,10 +1,20 @@
|
||||
$TARGET = "$($Env:ARCH)-windows-gnu"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.12.0-dev.2073+402fe565a"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
$MCPU = "baseline"
|
||||
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
|
||||
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"
|
||||
$ZIG = "$PREFIX_PATH\bin\zig.exe"
|
||||
$ZIG_LIB_DIR = "$(Get-Location)\lib"
|
||||
|
||||
if (!(Test-Path "$PREFIX_PATH.zip")) {
|
||||
Write-Output "Downloading $ZIG_LLVM_CLANG_LLD_URL"
|
||||
Invoke-WebRequest -Uri "$ZIG_LLVM_CLANG_LLD_URL" -OutFile "$PREFIX_PATH.zip"
|
||||
|
||||
Write-Output "Extracting..."
|
||||
Add-Type -AssemblyName System.IO.Compression.FileSystem ;
|
||||
[System.IO.Compression.ZipFile]::ExtractToDirectory("$PREFIX_PATH.zip", "$PREFIX_PATH\..")
|
||||
}
|
||||
|
||||
function CheckLastExitCode {
|
||||
if (!$?) {
|
||||
exit 1
|
||||
@ -25,6 +35,12 @@ Remove-Item -Path 'build-debug' -Recurse -Force -ErrorAction Ignore
|
||||
New-Item -Path 'build-debug' -ItemType Directory
|
||||
Set-Location -Path 'build-debug'
|
||||
|
||||
# Override the cache directories because they won't actually help other CI runs
|
||||
# which will be testing alternate versions of zig, and ultimately would just
|
||||
# fill up space on the hard drive for no reason.
|
||||
$Env:ZIG_GLOBAL_CACHE_DIR="$(Get-Location)\zig-global-cache"
|
||||
$Env:ZIG_LOCAL_CACHE_DIR="$(Get-Location)\zig-local-cache"
|
||||
|
||||
# CMake gives a syntax error when file paths with backward slashes are used.
|
||||
# Here, we use forward slashes only to work around this.
|
||||
& cmake .. `
|
||||
|
||||
@ -1,10 +1,20 @@
|
||||
$TARGET = "$($Env:ARCH)-windows-gnu"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.12.0-dev.2073+402fe565a"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
$MCPU = "baseline"
|
||||
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
|
||||
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"
|
||||
$ZIG = "$PREFIX_PATH\bin\zig.exe"
|
||||
$ZIG_LIB_DIR = "$(Get-Location)\lib"
|
||||
|
||||
if (!(Test-Path "$PREFIX_PATH.zip")) {
|
||||
Write-Output "Downloading $ZIG_LLVM_CLANG_LLD_URL"
|
||||
Invoke-WebRequest -Uri "$ZIG_LLVM_CLANG_LLD_URL" -OutFile "$PREFIX_PATH.zip"
|
||||
|
||||
Write-Output "Extracting..."
|
||||
Add-Type -AssemblyName System.IO.Compression.FileSystem ;
|
||||
[System.IO.Compression.ZipFile]::ExtractToDirectory("$PREFIX_PATH.zip", "$PREFIX_PATH\..")
|
||||
}
|
||||
|
||||
function CheckLastExitCode {
|
||||
if (!$?) {
|
||||
exit 1
|
||||
@ -25,6 +35,12 @@ Remove-Item -Path 'build-release' -Recurse -Force -ErrorAction Ignore
|
||||
New-Item -Path 'build-release' -ItemType Directory
|
||||
Set-Location -Path 'build-release'
|
||||
|
||||
# Override the cache directories because they won't actually help other CI runs
|
||||
# which will be testing alternate versions of zig, and ultimately would just
|
||||
# fill up space on the hard drive for no reason.
|
||||
$Env:ZIG_GLOBAL_CACHE_DIR="$(Get-Location)\zig-global-cache"
|
||||
$Env:ZIG_LOCAL_CACHE_DIR="$(Get-Location)\zig-local-cache"
|
||||
|
||||
# CMake gives a syntax error when file paths with backward slashes are used.
|
||||
# Here, we use forward slashes only to work around this.
|
||||
& cmake .. `
|
||||
|
||||
@ -17,9 +17,9 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
|
||||
if(${LLVM_LINK_MODE} STREQUAL "shared")
|
||||
find_library(CLANG_LIBRARIES
|
||||
NAMES
|
||||
libclang-cpp.so.17
|
||||
clang-cpp-17.0
|
||||
clang-cpp170
|
||||
libclang-cpp.so.18
|
||||
clang-cpp-18.0
|
||||
clang-cpp180
|
||||
clang-cpp
|
||||
NAMES_PER_DIR
|
||||
HINTS "${LLVM_LIBDIRS}"
|
||||
@ -55,6 +55,7 @@ else()
|
||||
FIND_AND_ADD_CLANG_LIB(clangAST)
|
||||
FIND_AND_ADD_CLANG_LIB(clangParse)
|
||||
FIND_AND_ADD_CLANG_LIB(clangSema)
|
||||
FIND_AND_ADD_CLANG_LIB(clangAPINotes)
|
||||
FIND_AND_ADD_CLANG_LIB(clangBasic)
|
||||
FIND_AND_ADD_CLANG_LIB(clangEdit)
|
||||
FIND_AND_ADD_CLANG_LIB(clangLex)
|
||||
|
||||
@ -9,21 +9,21 @@
|
||||
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
|
||||
HINTS ${LLVM_INCLUDE_DIRS}
|
||||
PATHS
|
||||
/usr/lib/llvm-17/include
|
||||
/usr/local/llvm170/include
|
||||
/usr/local/llvm17/include
|
||||
/usr/local/opt/llvm@17/include
|
||||
/opt/homebrew/opt/llvm@17/include
|
||||
/usr/lib/llvm-18/include
|
||||
/usr/local/llvm180/include
|
||||
/usr/local/llvm18/include
|
||||
/usr/local/opt/llvm@18/include
|
||||
/opt/homebrew/opt/llvm@18/include
|
||||
/mingw64/include)
|
||||
|
||||
find_library(LLD_LIBRARY NAMES lld-17.0 lld170 lld NAMES_PER_DIR
|
||||
find_library(LLD_LIBRARY NAMES lld-18.0 lld180 lld NAMES_PER_DIR
|
||||
HINTS ${LLVM_LIBDIRS}
|
||||
PATHS
|
||||
/usr/lib/llvm-17/lib
|
||||
/usr/local/llvm170/lib
|
||||
/usr/local/llvm17/lib
|
||||
/usr/local/opt/llvm@17/lib
|
||||
/opt/homebrew/opt/llvm@17/lib
|
||||
/usr/lib/llvm-18/lib
|
||||
/usr/local/llvm180/lib
|
||||
/usr/local/llvm18/lib
|
||||
/usr/local/opt/llvm@18/lib
|
||||
/opt/homebrew/opt/llvm@18/lib
|
||||
)
|
||||
if(EXISTS ${LLD_LIBRARY})
|
||||
set(LLD_LIBRARIES ${LLD_LIBRARY})
|
||||
@ -34,11 +34,11 @@ else()
|
||||
HINTS ${LLVM_LIBDIRS}
|
||||
PATHS
|
||||
${LLD_LIBDIRS}
|
||||
/usr/lib/llvm-17/lib
|
||||
/usr/local/llvm170/lib
|
||||
/usr/local/llvm17/lib
|
||||
/usr/local/opt/llvm@17/lib
|
||||
/opt/homebrew/opt/llvm@17/lib
|
||||
/usr/lib/llvm-18/lib
|
||||
/usr/local/llvm180/lib
|
||||
/usr/local/llvm18/lib
|
||||
/usr/local/opt/llvm@18/lib
|
||||
/opt/homebrew/opt/llvm@18/lib
|
||||
/mingw64/lib
|
||||
/c/msys64/mingw64/lib
|
||||
c:/msys64/mingw64/lib)
|
||||
|
||||
@ -14,12 +14,12 @@ if(ZIG_USE_LLVM_CONFIG)
|
||||
while(1)
|
||||
unset(LLVM_CONFIG_EXE CACHE)
|
||||
find_program(LLVM_CONFIG_EXE
|
||||
NAMES llvm-config-17 llvm-config-17.0 llvm-config170 llvm-config17 llvm-config NAMES_PER_DIR
|
||||
NAMES llvm-config-18 llvm-config-18.0 llvm-config180 llvm-config18 llvm-config NAMES_PER_DIR
|
||||
PATHS
|
||||
"/mingw64/bin"
|
||||
"/c/msys64/mingw64/bin"
|
||||
"c:/msys64/mingw64/bin"
|
||||
"C:/Libraries/llvm-17.0.0/bin")
|
||||
"C:/Libraries/llvm-18.0.0/bin")
|
||||
|
||||
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
|
||||
if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "")
|
||||
@ -37,9 +37,9 @@ if(ZIG_USE_LLVM_CONFIG)
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY)
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 17 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 18 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 18)
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 18 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 19 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 19)
|
||||
# Save the error message, in case this is the last llvm-config we find
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 17.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 18.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
|
||||
|
||||
# Ignore this directory and try the search again
|
||||
list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}")
|
||||
@ -63,9 +63,9 @@ if(ZIG_USE_LLVM_CONFIG)
|
||||
if (LLVM_CONFIG_ERROR)
|
||||
# Save the error message, in case this is the last llvm-config we find
|
||||
if (ZIG_SHARED_LLVM)
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 17.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
|
||||
else()
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 17.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
|
||||
endif()
|
||||
|
||||
# Ignore this directory and try the search again
|
||||
@ -195,6 +195,7 @@ else()
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMXRay)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMLibDriver)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMDlltoolDriver)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMTextAPIBinaryReader)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMCoverage)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMLineEditor)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMXCoreDisassembler)
|
||||
@ -296,6 +297,7 @@ else()
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMAArch64Desc)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMAArch64Utils)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMAArch64Info)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMOrcDebugging)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMOrcJIT)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMWindowsDriver)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMMCJIT)
|
||||
@ -315,6 +317,7 @@ else()
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMMCDisassembler)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMLTO)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMPasses)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMHipStdPar)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMCFGuard)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMCoroutines)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMipo)
|
||||
@ -322,10 +325,13 @@ else()
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMLinker)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMInstrumentation)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenMP)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMFrontendOffloading)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenACC)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMFrontendDriver)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMExtensions)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerParallel)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerClassic)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMGlobalISel)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMMIRParser)
|
||||
|
||||
2
lib/compiler/aro/aro/Type.zig
vendored
2
lib/compiler/aro/aro/Type.zig
vendored
@ -1116,7 +1116,7 @@ pub fn alignof(ty: Type, comp: *const Compilation) u29 {
|
||||
|
||||
.bit_int => @min(
|
||||
std.math.ceilPowerOfTwoPromote(u16, (ty.data.int.bits + 7) / 8),
|
||||
comp.target.maxIntAlignment(),
|
||||
16, // comp.target.maxIntAlignment(), please use your own logic for this value as it is implementation-defined
|
||||
),
|
||||
|
||||
.float => comp.target.c_type_alignment(.float),
|
||||
|
||||
4
lib/include/__clang_cuda_device_functions.h
vendored
4
lib/include/__clang_cuda_device_functions.h
vendored
@ -502,8 +502,8 @@ __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
|
||||
__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
|
||||
__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
|
||||
__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
|
||||
__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
|
||||
__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
|
||||
__DEVICE__ int __popc(unsigned int __a) { return __nv_popc(__a); }
|
||||
__DEVICE__ int __popcll(unsigned long long __a) { return __nv_popcll(__a); }
|
||||
__DEVICE__ float __powf(float __a, float __b) {
|
||||
return __nv_fast_powf(__a, __b);
|
||||
}
|
||||
|
||||
@ -285,8 +285,8 @@ __DEVICE__ double __nv_normcdfinv(double __a);
|
||||
__DEVICE__ float __nv_normcdfinvf(float __a);
|
||||
__DEVICE__ float __nv_normf(int __a, const float *__b);
|
||||
__DEVICE__ double __nv_norm(int __a, const double *__b);
|
||||
__DEVICE__ int __nv_popc(int __a);
|
||||
__DEVICE__ int __nv_popcll(long long __a);
|
||||
__DEVICE__ int __nv_popc(unsigned int __a);
|
||||
__DEVICE__ int __nv_popcll(unsigned long long __a);
|
||||
__DEVICE__ double __nv_pow(double __a, double __b);
|
||||
__DEVICE__ float __nv_powf(float __a, float __b);
|
||||
__DEVICE__ double __nv_powi(double __a, int __b);
|
||||
|
||||
6
lib/include/__clang_cuda_math.h
vendored
6
lib/include/__clang_cuda_math.h
vendored
@ -36,7 +36,7 @@
|
||||
// because the OpenMP overlay requires constexpr functions here but prior to
|
||||
// c++14 void return functions could not be constexpr.
|
||||
#pragma push_macro("__DEVICE_VOID__")
|
||||
#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
|
||||
#if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L
|
||||
#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
|
||||
#else
|
||||
#define __DEVICE_VOID__ __DEVICE__
|
||||
@ -45,9 +45,9 @@
|
||||
// libdevice provides fast low precision and slow full-recision implementations
|
||||
// for some functions. Which one gets selected depends on
|
||||
// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
|
||||
// -ffast-math or -fcuda-approx-transcendentals are in effect.
|
||||
// -ffast-math or -fgpu-approx-transcendentals are in effect.
|
||||
#pragma push_macro("__FAST_OR_SLOW")
|
||||
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
|
||||
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
|
||||
#define __FAST_OR_SLOW(fast, slow) fast
|
||||
#else
|
||||
#define __FAST_OR_SLOW(fast, slow) slow
|
||||
|
||||
4
lib/include/__clang_cuda_runtime_wrapper.h
vendored
4
lib/include/__clang_cuda_runtime_wrapper.h
vendored
@ -196,12 +196,12 @@ inline __host__ double __signbitd(double x) {
|
||||
|
||||
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
|
||||
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
|
||||
// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.
|
||||
// sin and exp. This is controlled in clang by -fgpu-approx-transcendentals.
|
||||
//
|
||||
// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.
|
||||
// slow divides), so we need to scope our define carefully here.
|
||||
#pragma push_macro("__USE_FAST_MATH__")
|
||||
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
|
||||
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
|
||||
#define __USE_FAST_MATH__ 1
|
||||
#endif
|
||||
|
||||
|
||||
363
lib/include/__clang_hip_math.h
vendored
363
lib/include/__clang_hip_math.h
vendored
@ -14,9 +14,6 @@
|
||||
#endif
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#if defined(__cplusplus)
|
||||
#include <algorithm>
|
||||
#endif
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
@ -32,6 +29,17 @@
|
||||
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
// Device library provides fast low precision and slow full-recision
|
||||
// implementations for some functions. Which one gets selected depends on
|
||||
// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
|
||||
// -ffast-math or -fgpu-approx-transcendentals are in effect.
|
||||
#pragma push_macro("__FAST_OR_SLOW")
|
||||
#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
|
||||
#define __FAST_OR_SLOW(fast, slow) fast
|
||||
#else
|
||||
#define __FAST_OR_SLOW(fast, slow) slow
|
||||
#endif
|
||||
|
||||
// A few functions return bool type starting only in C++11.
|
||||
#pragma push_macro("__RETURN_TYPE")
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
@ -139,21 +147,180 @@ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
|
||||
}
|
||||
|
||||
// BEGIN FLOAT
|
||||
|
||||
// BEGIN INTRINSICS
|
||||
|
||||
__DEVICE__
|
||||
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __exp10f(float __x) {
|
||||
const float __log2_10 = 0x1.a934f0p+1f;
|
||||
return __builtin_amdgcn_exp2f(__log2_10 * __x);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float __expf(float __x) {
|
||||
const float __log2_e = 0x1.715476p+0;
|
||||
return __builtin_amdgcn_exp2f(__log2_e * __x);
|
||||
}
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __x + __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __x / __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __fdividef(float __x, float __y) { return __x / __y; }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmaf_rd(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtn_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rte_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_ru(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtp_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rz(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtz_f32(__x, __y, __z);
|
||||
}
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __builtin_fmaf(__x, __y, __z);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __x * __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return 1.0f / __x; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __x - __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __log10f(float __x) { return __builtin_log10f(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __logf(float __x) { return __builtin_logf(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
|
||||
|
||||
__DEVICE__
|
||||
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
|
||||
|
||||
__DEVICE__
|
||||
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
*__sinptr = __ocml_native_sin_f32(__x);
|
||||
*__cosptr = __ocml_native_cos_f32(__x);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __tanf(float __x) {
|
||||
return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
|
||||
}
|
||||
// END INTRINSICS
|
||||
|
||||
#if defined(__cplusplus)
|
||||
__DEVICE__
|
||||
int abs(int __x) {
|
||||
int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
|
||||
return (__x ^ __sgn) - __sgn;
|
||||
return __builtin_abs(__x);
|
||||
}
|
||||
__DEVICE__
|
||||
long labs(long __x) {
|
||||
long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
|
||||
return (__x ^ __sgn) - __sgn;
|
||||
return __builtin_labs(__x);
|
||||
}
|
||||
__DEVICE__
|
||||
long long llabs(long long __x) {
|
||||
long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
|
||||
return (__x ^ __sgn) - __sgn;
|
||||
return __builtin_llabs(__x);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -188,7 +355,7 @@ __DEVICE__
|
||||
float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
|
||||
|
||||
__DEVICE__
|
||||
float cosf(float __x) { return __ocml_cos_f32(__x); }
|
||||
float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float coshf(float __x) { return __ocml_cosh_f32(__x); }
|
||||
@ -321,13 +488,13 @@ __DEVICE__
|
||||
float log1pf(float __x) { return __ocml_log1p_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float log2f(float __x) { return __builtin_log2f(__x); }
|
||||
float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float logbf(float __x) { return __ocml_logb_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float logf(float __x) { return __builtin_logf(__x); }
|
||||
float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
long int lrintf(float __x) { return __builtin_rintf(__x); }
|
||||
@ -401,7 +568,7 @@ float normf(int __dim,
|
||||
++__a;
|
||||
}
|
||||
|
||||
return __ocml_sqrt_f32(__r);
|
||||
return __builtin_sqrtf(__r);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
@ -483,9 +650,13 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
|
||||
__sincosf(__x, __sinptr, __cosptr);
|
||||
#else
|
||||
*__sinptr =
|
||||
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
#endif
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
@ -500,7 +671,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) {
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float sinf(float __x) { return __ocml_sin_f32(__x); }
|
||||
float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float sinhf(float __x) { return __ocml_sinh_f32(__x); }
|
||||
@ -509,7 +680,7 @@ __DEVICE__
|
||||
float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
|
||||
float sqrtf(float __x) { return __builtin_sqrtf(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float tanf(float __x) { return __ocml_tan_f32(__x); }
|
||||
@ -551,158 +722,7 @@ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
|
||||
return __x1;
|
||||
}
|
||||
|
||||
// BEGIN INTRINSICS
|
||||
|
||||
__DEVICE__
|
||||
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __expf(float __x) { return __ocml_native_exp_f32(__x); }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fadd_rn(float __x, float __y) { return __x + __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fdiv_rn(float __x, float __y) { return __x / __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __fdividef(float __x, float __y) { return __x / __y; }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmaf_rd(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtn_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rte_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_ru(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtp_f32(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__
|
||||
float __fmaf_rz(float __x, float __y, float __z) {
|
||||
return __ocml_fma_rtz_f32(__x, __y, __z);
|
||||
}
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmaf_rn(float __x, float __y, float __z) {
|
||||
return __builtin_fmaf(__x, __y, __z);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fmul_rn(float __x, float __y) { return __x * __y; }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
|
||||
__DEVICE__
|
||||
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __frcp_rn(float __x) { return 1.0f / __x; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
|
||||
__DEVICE__
|
||||
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
|
||||
__DEVICE__
|
||||
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
|
||||
#else
|
||||
__DEVICE__
|
||||
float __fsub_rn(float __x, float __y) { return __x - __y; }
|
||||
#endif
|
||||
|
||||
__DEVICE__
|
||||
float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __logf(float __x) { return __ocml_native_log_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
|
||||
|
||||
__DEVICE__
|
||||
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
|
||||
|
||||
__DEVICE__
|
||||
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
*__sinptr = __ocml_native_sin_f32(__x);
|
||||
*__cosptr = __ocml_native_cos_f32(__x);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
|
||||
|
||||
__DEVICE__
|
||||
float __tanf(float __x) { return __ocml_tan_f32(__x); }
|
||||
// END INTRINSICS
|
||||
// END FLOAT
|
||||
|
||||
// BEGIN DOUBLE
|
||||
@ -941,7 +961,7 @@ double norm(int __dim,
|
||||
++__a;
|
||||
}
|
||||
|
||||
return __ocml_sqrt_f64(__r);
|
||||
return __builtin_sqrt(__r);
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
@ -1064,7 +1084,7 @@ __DEVICE__
|
||||
double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
|
||||
|
||||
__DEVICE__
|
||||
double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
|
||||
double sqrt(double __x) { return __builtin_sqrt(__x); }
|
||||
|
||||
__DEVICE__
|
||||
double tan(double __x) { return __ocml_tan_f64(__x); }
|
||||
@ -1198,7 +1218,7 @@ __DEVICE__
|
||||
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
|
||||
#else
|
||||
__DEVICE__
|
||||
double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
|
||||
double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
|
||||
#endif
|
||||
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
@ -1288,16 +1308,17 @@ double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
|
||||
|
||||
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||
__host__ inline static int min(int __arg1, int __arg2) {
|
||||
return std::min(__arg1, __arg2);
|
||||
return __arg1 < __arg2 ? __arg1 : __arg2;
|
||||
}
|
||||
|
||||
__host__ inline static int max(int __arg1, int __arg2) {
|
||||
return std::max(__arg1, __arg2);
|
||||
return __arg1 > __arg2 ? __arg1 : __arg2;
|
||||
}
|
||||
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||
#endif
|
||||
|
||||
#pragma pop_macro("__DEVICE__")
|
||||
#pragma pop_macro("__RETURN_TYPE")
|
||||
#pragma pop_macro("__FAST_OR_SLOW")
|
||||
|
||||
#endif // __CLANG_HIP_MATH_H__
|
||||
|
||||
114
lib/include/__clang_hip_runtime_wrapper.h
vendored
114
lib/include/__clang_hip_runtime_wrapper.h
vendored
@ -46,6 +46,67 @@ extern "C" {
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#if __has_include("hip/hip_version.h")
|
||||
#include "hip/hip_version.h"
|
||||
#endif // __has_include("hip/hip_version.h")
|
||||
#endif // __HIPCC_RTC__
|
||||
|
||||
typedef __SIZE_TYPE__ __hip_size_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif //__cplusplus
|
||||
|
||||
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
|
||||
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
|
||||
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
|
||||
#if __has_feature(address_sanitizer)
|
||||
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
|
||||
unsigned long long __pc);
|
||||
__device__ void __asan_free_impl(unsigned long long __addr,
|
||||
unsigned long long __pc);
|
||||
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
return (void *)__asan_malloc_impl(__size, __pc);
|
||||
}
|
||||
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
__asan_free_impl((unsigned long long)__ptr, __pc);
|
||||
}
|
||||
#else // __has_feature(address_sanitizer)
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return (void *) __ockl_dm_alloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__ockl_dm_dealloc((unsigned long long)__ptr);
|
||||
}
|
||||
#endif // __has_feature(address_sanitizer)
|
||||
#else // HIP version check
|
||||
#if __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__device__ void *__hip_malloc(__hip_size_t __size);
|
||||
__device__ void *__hip_free(void *__ptr);
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return __hip_malloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__hip_free(__ptr);
|
||||
}
|
||||
#else // __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
__builtin_trap();
|
||||
return (void *)0;
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__builtin_trap();
|
||||
}
|
||||
#endif // __HIP_ENABLE_DEVICE_MALLOC__
|
||||
#endif // HIP version check
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif //__cplusplus
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
@ -71,59 +132,6 @@ typedef __SIZE_TYPE__ size_t;
|
||||
#define INT_MAX __INTMAX_MAX__
|
||||
#endif // __HIPCC_RTC__
|
||||
|
||||
typedef __SIZE_TYPE__ __hip_size_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif //__cplusplus
|
||||
|
||||
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
|
||||
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
|
||||
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
|
||||
#if __has_feature(address_sanitizer)
|
||||
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
|
||||
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
|
||||
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
return (void *)__asan_malloc_impl(__size, __pc);
|
||||
}
|
||||
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
__asan_free_impl((unsigned long long)__ptr, __pc);
|
||||
}
|
||||
#else
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return (void *) __ockl_dm_alloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__ockl_dm_dealloc((unsigned long long)__ptr);
|
||||
}
|
||||
#endif // __has_feature(address_sanitizer)
|
||||
#else // HIP version check
|
||||
#if __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__device__ void *__hip_malloc(__hip_size_t __size);
|
||||
__device__ void *__hip_free(void *__ptr);
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return __hip_malloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__hip_free(__ptr);
|
||||
}
|
||||
#else
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
__builtin_trap();
|
||||
return (void *)0;
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__builtin_trap();
|
||||
}
|
||||
#endif
|
||||
#endif // HIP version check
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif //__cplusplus
|
||||
|
||||
#include <__clang_hip_libdevice_declares.h>
|
||||
#include <__clang_hip_math.h>
|
||||
#include <__clang_hip_stdlib.h>
|
||||
|
||||
13
lib/include/__stdarg___gnuc_va_list.h
vendored
Normal file
13
lib/include/__stdarg___gnuc_va_list.h
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
/*===---- __stdarg___gnuc_va_list.h - Definition of __gnuc_va_list ---------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __GNUC_VA_LIST
|
||||
#define __GNUC_VA_LIST
|
||||
typedef __builtin_va_list __gnuc_va_list;
|
||||
#endif
|
||||
12
lib/include/__stdarg___va_copy.h
vendored
Normal file
12
lib/include/__stdarg___va_copy.h
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
/*===---- __stdarg___va_copy.h - Definition of __va_copy -------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __va_copy
|
||||
#define __va_copy(d, s) __builtin_va_copy(d, s)
|
||||
#endif
|
||||
22
lib/include/__stdarg_va_arg.h
vendored
Normal file
22
lib/include/__stdarg_va_arg.h
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
/*===---- __stdarg_va_arg.h - Definitions of va_start, va_arg, va_end-------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef va_arg
|
||||
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
/* C23 does not require the second parameter for va_start. */
|
||||
#define va_start(ap, ...) __builtin_va_start(ap, 0)
|
||||
#else
|
||||
/* Versions before C23 do require the second parameter. */
|
||||
#define va_start(ap, param) __builtin_va_start(ap, param)
|
||||
#endif
|
||||
#define va_end(ap) __builtin_va_end(ap)
|
||||
#define va_arg(ap, type) __builtin_va_arg(ap, type)
|
||||
|
||||
#endif
|
||||
12
lib/include/__stdarg_va_copy.h
vendored
Normal file
12
lib/include/__stdarg_va_copy.h
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
/*===---- __stdarg_va_copy.h - Definition of va_copy------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef va_copy
|
||||
#define va_copy(dest, src) __builtin_va_copy(dest, src)
|
||||
#endif
|
||||
13
lib/include/__stdarg_va_list.h
vendored
Normal file
13
lib/include/__stdarg_va_list.h
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
/*===---- __stdarg_va_list.h - Definition of va_list -----------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _VA_LIST
|
||||
#define _VA_LIST
|
||||
typedef __builtin_va_list va_list;
|
||||
#endif
|
||||
2
lib/include/__stddef_max_align_t.h
vendored
2
lib/include/__stddef_max_align_t.h
vendored
@ -1,4 +1,4 @@
|
||||
/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===
|
||||
/*===---- __stddef_max_align_t.h - Definition of max_align_t ---------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
29
lib/include/__stddef_null.h
vendored
Normal file
29
lib/include/__stddef_null.h
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
/*===---- __stddef_null.h - Definition of NULL -----------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined(NULL) || !__building_module(_Builtin_stddef)
|
||||
|
||||
/* linux/stddef.h will define NULL to 0. glibc (and other) headers then define
|
||||
* __need_NULL and rely on stddef.h to redefine NULL to the correct value again.
|
||||
* Modules don't support redefining macros like that, but support that pattern
|
||||
* in the non-modules case.
|
||||
*/
|
||||
#undef NULL
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if !defined(__MINGW32__) && !defined(_MSC_VER)
|
||||
#define NULL __null
|
||||
#else
|
||||
#define NULL 0
|
||||
#endif
|
||||
#else
|
||||
#define NULL ((void*)0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
29
lib/include/__stddef_nullptr_t.h
vendored
Normal file
29
lib/include/__stddef_nullptr_t.h
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
/*===---- __stddef_nullptr_t.h - Definition of nullptr_t -------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_NULLPTR_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _NULLPTR_T
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
|
||||
namespace std {
|
||||
typedef decltype(nullptr) nullptr_t;
|
||||
}
|
||||
using ::std::nullptr_t;
|
||||
#endif
|
||||
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
typedef typeof(nullptr) nullptr_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
17
lib/include/__stddef_offsetof.h
vendored
Normal file
17
lib/include/__stddef_offsetof.h
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
/*===---- __stddef_offsetof.h - Definition of offsetof ---------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(offsetof) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define offsetof(t, d) __builtin_offsetof(t, d)
|
||||
#endif
|
||||
20
lib/include/__stddef_ptrdiff_t.h
vendored
Normal file
20
lib/include/__stddef_ptrdiff_t.h
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*===---- __stddef_ptrdiff_t.h - Definition of ptrdiff_t -------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_PTRDIFF_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _PTRDIFF_T
|
||||
|
||||
typedef __PTRDIFF_TYPE__ ptrdiff_t;
|
||||
|
||||
#endif
|
||||
20
lib/include/__stddef_rsize_t.h
vendored
Normal file
20
lib/include/__stddef_rsize_t.h
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*===---- __stddef_rsize_t.h - Definition of rsize_t -----------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_RSIZE_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _RSIZE_T
|
||||
|
||||
typedef __SIZE_TYPE__ rsize_t;
|
||||
|
||||
#endif
|
||||
20
lib/include/__stddef_size_t.h
vendored
Normal file
20
lib/include/__stddef_size_t.h
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*===---- __stddef_size_t.h - Definition of size_t -------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_SIZE_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _SIZE_T
|
||||
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
|
||||
#endif
|
||||
21
lib/include/__stddef_unreachable.h
vendored
Normal file
21
lib/include/__stddef_unreachable.h
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
/*===---- __stddef_unreachable.h - Definition of unreachable ---------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(unreachable) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define unreachable() __builtin_unreachable()
|
||||
#endif
|
||||
|
||||
#endif
|
||||
28
lib/include/__stddef_wchar_t.h
vendored
Normal file
28
lib/include/__stddef_wchar_t.h
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
/*===---- __stddef_wchar.h - Definition of wchar_t -------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED)
|
||||
|
||||
/*
|
||||
* When -fbuiltin-headers-in-system-modules is set this is a non-modular header
|
||||
* and needs to behave as if it was textual.
|
||||
*/
|
||||
#if !defined(_WCHAR_T) || \
|
||||
(__has_feature(modules) && !__building_module(_Builtin_stddef))
|
||||
#define _WCHAR_T
|
||||
|
||||
#ifdef _MSC_EXTENSIONS
|
||||
#define _WCHAR_T_DEFINED
|
||||
#endif
|
||||
|
||||
typedef __WCHAR_TYPE__ wchar_t;
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
15
lib/include/__stddef_wint_t.h
vendored
Normal file
15
lib/include/__stddef_wint_t.h
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
/*===---- __stddef_wint.h - Definition of wint_t ---------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _WINT_T
|
||||
#define _WINT_T
|
||||
|
||||
typedef __WINT_TYPE__ wint_t;
|
||||
|
||||
#endif
|
||||
160
lib/include/adcintrin.h
vendored
Normal file
160
lib/include/adcintrin.h
vendored
Normal file
@ -0,0 +1,160 @@
|
||||
/*===---- adcintrin.h - ADC intrinsics -------------------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __ADCINTRIN_H
|
||||
#define __ADCINTRIN_H
|
||||
|
||||
#if !defined(__i386__) && !defined(__x86_64__)
|
||||
#error "This header is only meant to be used on x86 and x64 architecture"
|
||||
#endif
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
/* Use C++ inline semantics in C++, GNU inline for C mode. */
|
||||
#if defined(__cplusplus)
|
||||
#define __INLINE __inline
|
||||
#else
|
||||
#define __INLINE static __inline
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
|
||||
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c SBB instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 32-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 32-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
|
||||
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 64-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 64-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_subborrow_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef __INLINE
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __ADCINTRIN_H */
|
||||
143
lib/include/adxintrin.h
vendored
143
lib/include/adxintrin.h
vendored
@ -15,7 +15,8 @@
|
||||
#define __ADXINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
|
||||
|
||||
/* Use C++ inline semantics in C++, GNU inline for C mode. */
|
||||
#if defined(__cplusplus)
|
||||
@ -53,10 +54,10 @@ extern "C" {
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
|
||||
_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarryx_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
@ -84,137 +85,10 @@ __INLINE unsigned char
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("adx")))
|
||||
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Intrinsics that are also available if __ADX__ is undefined. */
|
||||
|
||||
/// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 32-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated
|
||||
/// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory
|
||||
/// at \a __p, and returns the 8-bit carry-out (carry flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x + __y + temp)
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __y
|
||||
/// A 64-bit unsigned addend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the sum.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 32-bit integer
|
||||
/// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store32(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c SBB instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 32-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 32-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf,
|
||||
unsigned int __x,
|
||||
unsigned int __y,
|
||||
unsigned int *__p) {
|
||||
return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry
|
||||
/// flag \a __cf, and subtracts the result from unsigned 64-bit integer
|
||||
/// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p,
|
||||
/// and returns the 8-bit carry-out (carry or overflow flag).
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// temp := (__cf == 0) ? 0 : 1
|
||||
/// Store64(__p, __x - (__y + temp))
|
||||
/// result := CF
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ADC instruction.
|
||||
///
|
||||
/// \param __cf
|
||||
/// The 8-bit unsigned carry flag; any non-zero value indicates carry.
|
||||
/// \param __x
|
||||
/// The 64-bit unsigned minuend.
|
||||
/// \param __y
|
||||
/// The 64-bit unsigned subtrahend.
|
||||
/// \param __p
|
||||
/// Pointer to memory for storing the difference.
|
||||
/// \returns The 8-bit unsigned carry-out value.
|
||||
__INLINE unsigned char __DEFAULT_FN_ATTRS
|
||||
_subborrow_u64(unsigned char __cf, unsigned long long __x,
|
||||
_addcarryx_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p) {
|
||||
return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -222,6 +96,7 @@ _subborrow_u64(unsigned char __cf, unsigned long long __x,
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef __INLINE
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __ADXINTRIN_H */
|
||||
|
||||
43
lib/include/altivec.h
vendored
43
lib/include/altivec.h
vendored
@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
|
||||
|
||||
static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
|
||||
int __b) {
|
||||
vector signed char __res = (vector signed char)(0);
|
||||
__res[__b & 0x7] = __a;
|
||||
const vector signed char __zero = (vector signed char)0;
|
||||
vector signed char __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0xf] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||
vec_promote(unsigned char __a, int __b) {
|
||||
vector unsigned char __res = (vector unsigned char)(0);
|
||||
__res[__b & 0x7] = __a;
|
||||
const vector unsigned char __zero = (vector unsigned char)(0);
|
||||
vector unsigned char __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0xf] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
|
||||
vector short __res = (vector short)(0);
|
||||
const vector short __zero = (vector short)(0);
|
||||
vector short __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0x7] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned short __ATTRS_o_ai
|
||||
vec_promote(unsigned short __a, int __b) {
|
||||
vector unsigned short __res = (vector unsigned short)(0);
|
||||
const vector unsigned short __zero = (vector unsigned short)(0);
|
||||
vector unsigned short __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
|
||||
__res[__b & 0x7] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
|
||||
vector int __res = (vector int)(0);
|
||||
const vector int __zero = (vector int)(0);
|
||||
vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
|
||||
__res[__b & 0x3] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
|
||||
int __b) {
|
||||
vector unsigned int __res = (vector unsigned int)(0);
|
||||
const vector unsigned int __zero = (vector unsigned int)(0);
|
||||
vector unsigned int __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
|
||||
__res[__b & 0x3] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
|
||||
vector float __res = (vector float)(0);
|
||||
const vector float __zero = (vector float)(0);
|
||||
vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
|
||||
__res[__b & 0x3] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
|
||||
vector double __res = (vector double)(0);
|
||||
const vector double __zero = (vector double)(0);
|
||||
vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
|
||||
__res[__b & 0x1] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector signed long long __ATTRS_o_ai
|
||||
vec_promote(signed long long __a, int __b) {
|
||||
vector signed long long __res = (vector signed long long)(0);
|
||||
const vector signed long long __zero = (vector signed long long)(0);
|
||||
vector signed long long __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1);
|
||||
__res[__b & 0x1] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned long long __ATTRS_o_ai
|
||||
vec_promote(unsigned long long __a, int __b) {
|
||||
vector unsigned long long __res = (vector unsigned long long)(0);
|
||||
const vector unsigned long long __zero = (vector unsigned long long)(0);
|
||||
vector unsigned long long __res =
|
||||
__builtin_shufflevector(__zero, __zero, -1, -1);
|
||||
__res[__b & 0x1] = __a;
|
||||
return __res;
|
||||
}
|
||||
|
||||
8
lib/include/ammintrin.h
vendored
8
lib/include/ammintrin.h
vendored
@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y)
|
||||
/// \param __a
|
||||
/// The 64-bit double-precision floating-point register value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_stream_sd(double *__p, __m128d __a)
|
||||
_mm_stream_sd(void *__p, __m128d __a)
|
||||
{
|
||||
__builtin_ia32_movntsd(__p, (__v2df)__a);
|
||||
__builtin_ia32_movntsd((double *)__p, (__v2df)__a);
|
||||
}
|
||||
|
||||
/// Stores a 32-bit single-precision floating-point value in a 32-bit
|
||||
@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a)
|
||||
/// \param __a
|
||||
/// The 32-bit single-precision floating-point register value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_stream_ss(float *__p, __m128 __a)
|
||||
_mm_stream_ss(void *__p, __m128 __a)
|
||||
{
|
||||
__builtin_ia32_movntss(__p, (__v4sf)__a);
|
||||
__builtin_ia32_movntss((float *)__p, (__v4sf)__a);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
142
lib/include/arm_acle.h
vendored
142
lib/include/arm_acle.h
vendored
@ -4,6 +4,13 @@
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
* The Arm C Language Extensions specifications can be found in the following
|
||||
* link: https://github.com/ARM-software/acle/releases
|
||||
*
|
||||
* The ACLE section numbers are subject to change. When consulting the
|
||||
* specifications, it is recommended to search using section titles if
|
||||
* the section numbers look outdated.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
@ -20,8 +27,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
|
||||
/* 8.3 Memory barriers */
|
||||
/* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
|
||||
/* 7.3 Memory barriers */
|
||||
#if !__has_builtin(__dmb)
|
||||
#define __dmb(i) __builtin_arm_dmb(i)
|
||||
#endif
|
||||
@ -32,7 +39,7 @@ extern "C" {
|
||||
#define __isb(i) __builtin_arm_isb(i)
|
||||
#endif
|
||||
|
||||
/* 8.4 Hints */
|
||||
/* 7.4 Hints */
|
||||
|
||||
#if !__has_builtin(__wfi)
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
|
||||
@ -68,7 +75,7 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
|
||||
#define __dbg(t) __builtin_arm_dbg(t)
|
||||
#endif
|
||||
|
||||
/* 8.5 Swap */
|
||||
/* 7.5 Swap */
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
uint32_t v;
|
||||
@ -78,8 +85,8 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
return v;
|
||||
}
|
||||
|
||||
/* 8.6 Memory prefetch intrinsics */
|
||||
/* 8.6.1 Data prefetch */
|
||||
/* 7.6 Memory prefetch intrinsics */
|
||||
/* 7.6.1 Data prefetch */
|
||||
#define __pld(addr) __pldx(0, 0, 0, addr)
|
||||
|
||||
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
|
||||
@ -90,7 +97,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
|
||||
#endif
|
||||
|
||||
/* 8.6.2 Instruction prefetch */
|
||||
/* 7.6.2 Instruction prefetch */
|
||||
#define __pli(addr) __plix(0, 0, addr)
|
||||
|
||||
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
|
||||
@ -101,15 +108,15 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
__builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
|
||||
#endif
|
||||
|
||||
/* 8.7 NOP */
|
||||
/* 7.7 NOP */
|
||||
#if !defined(_MSC_VER) || !defined(__aarch64__)
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
|
||||
__builtin_arm_nop();
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9 DATA-PROCESSING INTRINSICS */
|
||||
/* 9.2 Miscellaneous data-processing intrinsics */
|
||||
/* 8 DATA-PROCESSING INTRINSICS */
|
||||
/* 8.2 Miscellaneous data-processing intrinsics */
|
||||
/* ROR */
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__ror(uint32_t __x, uint32_t __y) {
|
||||
@ -248,9 +255,7 @@ __rbitl(unsigned long __t) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* 9.3 16-bit multiplications
|
||||
*/
|
||||
/* 8.3 16-bit multiplications */
|
||||
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
|
||||
__smulbb(int32_t __a, int32_t __b) {
|
||||
@ -279,18 +284,18 @@ __smulwt(int32_t __a, int32_t __b) {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 9.4 Saturating intrinsics
|
||||
* 8.4 Saturating intrinsics
|
||||
*
|
||||
* FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
|
||||
* intrinsics are implemented and the flag is enabled.
|
||||
*/
|
||||
/* 9.4.1 Width-specified saturation intrinsics */
|
||||
/* 8.4.1 Width-specified saturation intrinsics */
|
||||
#if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
|
||||
#define __ssat(x, y) __builtin_arm_ssat(x, y)
|
||||
#define __usat(x, y) __builtin_arm_usat(x, y)
|
||||
#endif
|
||||
|
||||
/* 9.4.2 Saturating addition and subtraction intrinsics */
|
||||
/* 8.4.2 Saturating addition and subtraction intrinsics */
|
||||
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd(int32_t __t, int32_t __v) {
|
||||
@ -308,7 +313,7 @@ __qdbl(int32_t __t) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.4.3 Accumultating multiplications */
|
||||
/* 8.4.3 Accumultating multiplications */
|
||||
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
|
||||
@ -337,13 +342,13 @@ __smlawt(int32_t __a, int32_t __b, int32_t __c) {
|
||||
#endif
|
||||
|
||||
|
||||
/* 9.5.4 Parallel 16-bit saturation */
|
||||
/* 8.5.4 Parallel 16-bit saturation */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
#define __ssat16(x, y) __builtin_arm_ssat16(x, y)
|
||||
#define __usat16(x, y) __builtin_arm_usat16(x, y)
|
||||
#endif
|
||||
|
||||
/* 9.5.5 Packing and unpacking */
|
||||
/* 8.5.5 Packing and unpacking */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
typedef int32_t int8x4_t;
|
||||
typedef int32_t int16x2_t;
|
||||
@ -368,7 +373,7 @@ __uxtb16(int8x4_t __a) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.6 Parallel selection */
|
||||
/* 8.5.6 Parallel selection */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__sel(uint8x4_t __a, uint8x4_t __b) {
|
||||
@ -376,7 +381,7 @@ __sel(uint8x4_t __a, uint8x4_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.7 Parallel 8-bit addition and subtraction */
|
||||
/* 8.5.7 Parallel 8-bit addition and subtraction */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd8(int8x4_t __a, int8x4_t __b) {
|
||||
@ -428,7 +433,7 @@ __usub8(uint8x4_t __a, uint8x4_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.8 Sum of 8-bit absolute differences */
|
||||
/* 8.5.8 Sum of 8-bit absolute differences */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__usad8(uint8x4_t __a, uint8x4_t __b) {
|
||||
@ -440,7 +445,7 @@ __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.9 Parallel 16-bit addition and subtraction */
|
||||
/* 8.5.9 Parallel 16-bit addition and subtraction */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
|
||||
__qadd16(int16x2_t __a, int16x2_t __b) {
|
||||
@ -540,7 +545,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.5.10 Parallel 16-bit multiplications */
|
||||
/* 8.5.10 Parallel 16-bit multiplications */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
|
||||
@ -592,7 +597,22 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 9.7 CRC32 intrinsics */
|
||||
/* 8.6 Floating-point data-processing intrinsics */
|
||||
#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING) && \
|
||||
(__ARM_FEATURE_DIRECTED_ROUNDING)) && \
|
||||
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
|
||||
static __inline__ double __attribute__((__always_inline__, __nodebug__))
|
||||
__rintn(double __a) {
|
||||
return __builtin_roundeven(__a);
|
||||
}
|
||||
|
||||
static __inline__ float __attribute__((__always_inline__, __nodebug__))
|
||||
__rintnf(float __a) {
|
||||
return __builtin_roundevenf(__a);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 8.8 CRC32 intrinsics */
|
||||
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
|
||||
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
|
||||
@ -636,6 +656,7 @@ __crc32cd(uint32_t __a, uint64_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 8.6 Floating-point data-processing intrinsics */
|
||||
/* Armv8.3-A Javascript conversion intrinsic */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
|
||||
@ -687,7 +708,7 @@ __rint64x(double __a) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Armv8.7-A load/store 64-byte intrinsics */
|
||||
/* 8.9 Armv8.7-A load/store 64-byte intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
typedef struct {
|
||||
uint64_t val[8];
|
||||
@ -713,7 +734,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 10.1 Special register intrinsics */
|
||||
/* 11.1 Special register intrinsics */
|
||||
#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
|
||||
#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
|
||||
#define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg)
|
||||
@ -727,7 +748,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
|
||||
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
|
||||
|
||||
/* Memory Tagging Extensions (MTE) Intrinsics */
|
||||
/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
|
||||
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
|
||||
@ -736,12 +757,71 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
|
||||
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
|
||||
|
||||
/* Memory Operations Intrinsics */
|
||||
/* 18 Memory Operations Intrinsics */
|
||||
#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
|
||||
__builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
|
||||
#endif
|
||||
|
||||
/* Transactional Memory Extension (TME) Intrinsics */
|
||||
/* 11.3 Coprocessor Intrinsics */
|
||||
#if defined(__ARM_FEATURE_COPROC)
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x1)
|
||||
|
||||
#if (__ARM_ARCH < 8)
|
||||
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
|
||||
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
|
||||
#endif /* __ARM_ARCH < 8 */
|
||||
|
||||
#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p)
|
||||
#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p)
|
||||
|
||||
#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \
|
||||
__builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2)
|
||||
#define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \
|
||||
__builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2)
|
||||
|
||||
#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8)
|
||||
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
|
||||
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
|
||||
#endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */
|
||||
|
||||
#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__)
|
||||
#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \
|
||||
__builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)
|
||||
#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p)
|
||||
#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p)
|
||||
#endif /* ___ARM_ARCH_8M_MAIN__ */
|
||||
|
||||
#endif /* __ARM_FEATURE_COPROC & 0x1 */
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x2)
|
||||
#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \
|
||||
__builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)
|
||||
#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p)
|
||||
#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p)
|
||||
#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p)
|
||||
#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p)
|
||||
#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \
|
||||
__builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)
|
||||
#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \
|
||||
__builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2)
|
||||
#endif
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x4)
|
||||
#define __arm_mcrr(coproc, opc1, value, CRm) \
|
||||
__builtin_arm_mcrr(coproc, opc1, value, CRm)
|
||||
#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm)
|
||||
#endif
|
||||
|
||||
#if (__ARM_FEATURE_COPROC & 0x8)
|
||||
#define __arm_mcrr2(coproc, opc1, value, CRm) \
|
||||
__builtin_arm_mcrr2(coproc, opc1, value, CRm)
|
||||
#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm)
|
||||
#endif
|
||||
|
||||
#endif // __ARM_FEATURE_COPROC
|
||||
|
||||
/* 17 Transactional Memory Extension (TME) Intrinsics */
|
||||
#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
|
||||
|
||||
#define _TMFAILURE_REASON 0x00007fffu
|
||||
@ -763,7 +843,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
|
||||
#endif /* __ARM_FEATURE_TME */
|
||||
|
||||
/* Armv8.5-A Random number generation intrinsics */
|
||||
/* 8.7 Armv8.5-A Random number generation intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
|
||||
__rndr(uint64_t *__p) {
|
||||
|
||||
412
lib/include/arm_neon.h
vendored
412
lib/include/arm_neon.h
vendored
@ -35,12 +35,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <arm_bf16.h>
|
||||
typedef float float32_t;
|
||||
typedef __fp16 float16_t;
|
||||
#ifdef __aarch64__
|
||||
typedef double float64_t;
|
||||
#endif
|
||||
|
||||
#include <arm_vector_types.h>
|
||||
#ifdef __aarch64__
|
||||
typedef uint8_t poly8_t;
|
||||
typedef uint16_t poly16_t;
|
||||
@ -51,30 +46,6 @@ typedef int8_t poly8_t;
|
||||
typedef int16_t poly16_t;
|
||||
typedef int64_t poly64_t;
|
||||
#endif
|
||||
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
|
||||
#ifdef __aarch64__
|
||||
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
|
||||
#endif
|
||||
typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
|
||||
typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
|
||||
typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
|
||||
@ -82,96 +53,6 @@ typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
|
||||
typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
|
||||
typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
|
||||
|
||||
typedef struct int8x8x2_t {
|
||||
int8x8_t val[2];
|
||||
} int8x8x2_t;
|
||||
|
||||
typedef struct int8x16x2_t {
|
||||
int8x16_t val[2];
|
||||
} int8x16x2_t;
|
||||
|
||||
typedef struct int16x4x2_t {
|
||||
int16x4_t val[2];
|
||||
} int16x4x2_t;
|
||||
|
||||
typedef struct int16x8x2_t {
|
||||
int16x8_t val[2];
|
||||
} int16x8x2_t;
|
||||
|
||||
typedef struct int32x2x2_t {
|
||||
int32x2_t val[2];
|
||||
} int32x2x2_t;
|
||||
|
||||
typedef struct int32x4x2_t {
|
||||
int32x4_t val[2];
|
||||
} int32x4x2_t;
|
||||
|
||||
typedef struct int64x1x2_t {
|
||||
int64x1_t val[2];
|
||||
} int64x1x2_t;
|
||||
|
||||
typedef struct int64x2x2_t {
|
||||
int64x2_t val[2];
|
||||
} int64x2x2_t;
|
||||
|
||||
typedef struct uint8x8x2_t {
|
||||
uint8x8_t val[2];
|
||||
} uint8x8x2_t;
|
||||
|
||||
typedef struct uint8x16x2_t {
|
||||
uint8x16_t val[2];
|
||||
} uint8x16x2_t;
|
||||
|
||||
typedef struct uint16x4x2_t {
|
||||
uint16x4_t val[2];
|
||||
} uint16x4x2_t;
|
||||
|
||||
typedef struct uint16x8x2_t {
|
||||
uint16x8_t val[2];
|
||||
} uint16x8x2_t;
|
||||
|
||||
typedef struct uint32x2x2_t {
|
||||
uint32x2_t val[2];
|
||||
} uint32x2x2_t;
|
||||
|
||||
typedef struct uint32x4x2_t {
|
||||
uint32x4_t val[2];
|
||||
} uint32x4x2_t;
|
||||
|
||||
typedef struct uint64x1x2_t {
|
||||
uint64x1_t val[2];
|
||||
} uint64x1x2_t;
|
||||
|
||||
typedef struct uint64x2x2_t {
|
||||
uint64x2_t val[2];
|
||||
} uint64x2x2_t;
|
||||
|
||||
typedef struct float16x4x2_t {
|
||||
float16x4_t val[2];
|
||||
} float16x4x2_t;
|
||||
|
||||
typedef struct float16x8x2_t {
|
||||
float16x8_t val[2];
|
||||
} float16x8x2_t;
|
||||
|
||||
typedef struct float32x2x2_t {
|
||||
float32x2_t val[2];
|
||||
} float32x2x2_t;
|
||||
|
||||
typedef struct float32x4x2_t {
|
||||
float32x4_t val[2];
|
||||
} float32x4x2_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x2_t {
|
||||
float64x1_t val[2];
|
||||
} float64x1x2_t;
|
||||
|
||||
typedef struct float64x2x2_t {
|
||||
float64x2_t val[2];
|
||||
} float64x2x2_t;
|
||||
|
||||
#endif
|
||||
typedef struct poly8x8x2_t {
|
||||
poly8x8_t val[2];
|
||||
} poly8x8x2_t;
|
||||
@ -196,96 +77,6 @@ typedef struct poly64x2x2_t {
|
||||
poly64x2_t val[2];
|
||||
} poly64x2x2_t;
|
||||
|
||||
typedef struct int8x8x3_t {
|
||||
int8x8_t val[3];
|
||||
} int8x8x3_t;
|
||||
|
||||
typedef struct int8x16x3_t {
|
||||
int8x16_t val[3];
|
||||
} int8x16x3_t;
|
||||
|
||||
typedef struct int16x4x3_t {
|
||||
int16x4_t val[3];
|
||||
} int16x4x3_t;
|
||||
|
||||
typedef struct int16x8x3_t {
|
||||
int16x8_t val[3];
|
||||
} int16x8x3_t;
|
||||
|
||||
typedef struct int32x2x3_t {
|
||||
int32x2_t val[3];
|
||||
} int32x2x3_t;
|
||||
|
||||
typedef struct int32x4x3_t {
|
||||
int32x4_t val[3];
|
||||
} int32x4x3_t;
|
||||
|
||||
typedef struct int64x1x3_t {
|
||||
int64x1_t val[3];
|
||||
} int64x1x3_t;
|
||||
|
||||
typedef struct int64x2x3_t {
|
||||
int64x2_t val[3];
|
||||
} int64x2x3_t;
|
||||
|
||||
typedef struct uint8x8x3_t {
|
||||
uint8x8_t val[3];
|
||||
} uint8x8x3_t;
|
||||
|
||||
typedef struct uint8x16x3_t {
|
||||
uint8x16_t val[3];
|
||||
} uint8x16x3_t;
|
||||
|
||||
typedef struct uint16x4x3_t {
|
||||
uint16x4_t val[3];
|
||||
} uint16x4x3_t;
|
||||
|
||||
typedef struct uint16x8x3_t {
|
||||
uint16x8_t val[3];
|
||||
} uint16x8x3_t;
|
||||
|
||||
typedef struct uint32x2x3_t {
|
||||
uint32x2_t val[3];
|
||||
} uint32x2x3_t;
|
||||
|
||||
typedef struct uint32x4x3_t {
|
||||
uint32x4_t val[3];
|
||||
} uint32x4x3_t;
|
||||
|
||||
typedef struct uint64x1x3_t {
|
||||
uint64x1_t val[3];
|
||||
} uint64x1x3_t;
|
||||
|
||||
typedef struct uint64x2x3_t {
|
||||
uint64x2_t val[3];
|
||||
} uint64x2x3_t;
|
||||
|
||||
typedef struct float16x4x3_t {
|
||||
float16x4_t val[3];
|
||||
} float16x4x3_t;
|
||||
|
||||
typedef struct float16x8x3_t {
|
||||
float16x8_t val[3];
|
||||
} float16x8x3_t;
|
||||
|
||||
typedef struct float32x2x3_t {
|
||||
float32x2_t val[3];
|
||||
} float32x2x3_t;
|
||||
|
||||
typedef struct float32x4x3_t {
|
||||
float32x4_t val[3];
|
||||
} float32x4x3_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x3_t {
|
||||
float64x1_t val[3];
|
||||
} float64x1x3_t;
|
||||
|
||||
typedef struct float64x2x3_t {
|
||||
float64x2_t val[3];
|
||||
} float64x2x3_t;
|
||||
|
||||
#endif
|
||||
typedef struct poly8x8x3_t {
|
||||
poly8x8_t val[3];
|
||||
} poly8x8x3_t;
|
||||
@ -310,96 +101,6 @@ typedef struct poly64x2x3_t {
|
||||
poly64x2_t val[3];
|
||||
} poly64x2x3_t;
|
||||
|
||||
typedef struct int8x8x4_t {
|
||||
int8x8_t val[4];
|
||||
} int8x8x4_t;
|
||||
|
||||
typedef struct int8x16x4_t {
|
||||
int8x16_t val[4];
|
||||
} int8x16x4_t;
|
||||
|
||||
typedef struct int16x4x4_t {
|
||||
int16x4_t val[4];
|
||||
} int16x4x4_t;
|
||||
|
||||
typedef struct int16x8x4_t {
|
||||
int16x8_t val[4];
|
||||
} int16x8x4_t;
|
||||
|
||||
typedef struct int32x2x4_t {
|
||||
int32x2_t val[4];
|
||||
} int32x2x4_t;
|
||||
|
||||
typedef struct int32x4x4_t {
|
||||
int32x4_t val[4];
|
||||
} int32x4x4_t;
|
||||
|
||||
typedef struct int64x1x4_t {
|
||||
int64x1_t val[4];
|
||||
} int64x1x4_t;
|
||||
|
||||
typedef struct int64x2x4_t {
|
||||
int64x2_t val[4];
|
||||
} int64x2x4_t;
|
||||
|
||||
typedef struct uint8x8x4_t {
|
||||
uint8x8_t val[4];
|
||||
} uint8x8x4_t;
|
||||
|
||||
typedef struct uint8x16x4_t {
|
||||
uint8x16_t val[4];
|
||||
} uint8x16x4_t;
|
||||
|
||||
typedef struct uint16x4x4_t {
|
||||
uint16x4_t val[4];
|
||||
} uint16x4x4_t;
|
||||
|
||||
typedef struct uint16x8x4_t {
|
||||
uint16x8_t val[4];
|
||||
} uint16x8x4_t;
|
||||
|
||||
typedef struct uint32x2x4_t {
|
||||
uint32x2_t val[4];
|
||||
} uint32x2x4_t;
|
||||
|
||||
typedef struct uint32x4x4_t {
|
||||
uint32x4_t val[4];
|
||||
} uint32x4x4_t;
|
||||
|
||||
typedef struct uint64x1x4_t {
|
||||
uint64x1_t val[4];
|
||||
} uint64x1x4_t;
|
||||
|
||||
typedef struct uint64x2x4_t {
|
||||
uint64x2_t val[4];
|
||||
} uint64x2x4_t;
|
||||
|
||||
typedef struct float16x4x4_t {
|
||||
float16x4_t val[4];
|
||||
} float16x4x4_t;
|
||||
|
||||
typedef struct float16x8x4_t {
|
||||
float16x8_t val[4];
|
||||
} float16x8x4_t;
|
||||
|
||||
typedef struct float32x2x4_t {
|
||||
float32x2_t val[4];
|
||||
} float32x2x4_t;
|
||||
|
||||
typedef struct float32x4x4_t {
|
||||
float32x4_t val[4];
|
||||
} float32x4x4_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x4_t {
|
||||
float64x1_t val[4];
|
||||
} float64x1x4_t;
|
||||
|
||||
typedef struct float64x2x4_t {
|
||||
float64x2_t val[4];
|
||||
} float64x2x4_t;
|
||||
|
||||
#endif
|
||||
typedef struct poly8x8x4_t {
|
||||
poly8x8_t val[4];
|
||||
} poly8x8x4_t;
|
||||
@ -424,33 +125,6 @@ typedef struct poly64x2x4_t {
|
||||
poly64x2_t val[4];
|
||||
} poly64x2x4_t;
|
||||
|
||||
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
|
||||
|
||||
typedef struct bfloat16x4x2_t {
|
||||
bfloat16x4_t val[2];
|
||||
} bfloat16x4x2_t;
|
||||
|
||||
typedef struct bfloat16x8x2_t {
|
||||
bfloat16x8_t val[2];
|
||||
} bfloat16x8x2_t;
|
||||
|
||||
typedef struct bfloat16x4x3_t {
|
||||
bfloat16x4_t val[3];
|
||||
} bfloat16x4x3_t;
|
||||
|
||||
typedef struct bfloat16x8x3_t {
|
||||
bfloat16x8_t val[3];
|
||||
} bfloat16x8x3_t;
|
||||
|
||||
typedef struct bfloat16x4x4_t {
|
||||
bfloat16x4_t val[4];
|
||||
} bfloat16x4x4_t;
|
||||
|
||||
typedef struct bfloat16x8x4_t {
|
||||
bfloat16x8_t val[4];
|
||||
} bfloat16x8x4_t;
|
||||
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
@ -66600,6 +66274,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32x_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32x_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) {
|
||||
float32x4_t __ret;
|
||||
@ -66632,6 +66327,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd32z_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd32z_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) {
|
||||
float32x4_t __ret;
|
||||
@ -66664,6 +66380,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64x_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64x_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) {
|
||||
float32x4_t __ret;
|
||||
@ -66696,6 +66433,27 @@ __ai __attribute__((target("v8.5a"))) float32x2_t vrnd64z_f32(float32x2_t __p0)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42);
|
||||
return __ret;
|
||||
}
|
||||
#else
|
||||
__ai __attribute__((target("v8.5a"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) {
|
||||
float64x2_t __ret;
|
||||
float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
|
||||
__ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42);
|
||||
__ret = __builtin_shufflevector(__ret, __ret, 1, 0);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
__ai __attribute__((target("v8.5a"))) float64x1_t vrnd64z_f64(float64x1_t __p0) {
|
||||
float64x1_t __ret;
|
||||
__ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10);
|
||||
return __ret;
|
||||
}
|
||||
#endif
|
||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
|
||||
2412
lib/include/arm_sme.h
vendored
Normal file
2412
lib/include/arm_sme.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
642
lib/include/arm_sme_draft_spec_subject_to_change.h
vendored
642
lib/include/arm_sme_draft_spec_subject_to_change.h
vendored
@ -1,642 +0,0 @@
|
||||
/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics ------===
|
||||
*
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __ARM_SME_H
|
||||
#define __ARM_SME_H
|
||||
|
||||
#if !defined(__LITTLE_ENDIAN__)
|
||||
#error "Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h"
|
||||
#endif
|
||||
#include <arm_sve.h>
|
||||
|
||||
/* Function attributes */
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__))
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsb(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsd(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsh(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw), arm_streaming_compatible, arm_preserves_za))
|
||||
uint64_t svcntsw(void);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8), arm_streaming, arm_shared_za))
|
||||
void svld1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
void svst1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za), arm_streaming_compatible, arm_shared_za))
|
||||
void svzero_mask_za(uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za), arm_streaming_compatible, arm_shared_za))
|
||||
void svzero_za();
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za))
|
||||
svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za))
|
||||
void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za))
|
||||
void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za))
|
||||
void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za))
|
||||
void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za), arm_streaming_compatible, arm_shared_za))
|
||||
void svldr_vnum_za(uint32_t, uint64_t, void const *);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za), arm_streaming_compatible, arm_shared_za, arm_preserves_za))
|
||||
void svstr_vnum_za(uint32_t, uint64_t, void *);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#undef __ai
|
||||
|
||||
#endif /* __ARM_SME_H */
|
||||
7829
lib/include/arm_sve.h
vendored
7829
lib/include/arm_sve.h
vendored
File diff suppressed because it is too large
Load Diff
345
lib/include/arm_vector_types.h
vendored
Normal file
345
lib/include/arm_vector_types.h
vendored
Normal file
@ -0,0 +1,345 @@
|
||||
/*===---- arm_vector_types - ARM vector type ------===
|
||||
*
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)
|
||||
#error "This file should not be used standalone. Please include arm_neon.h or arm_sve.h instead"
|
||||
|
||||
#endif
|
||||
#ifndef __ARM_NEON_TYPES_H
|
||||
#define __ARM_NEON_TYPES_H
|
||||
typedef float float32_t;
|
||||
typedef __fp16 float16_t;
|
||||
#ifdef __aarch64__
|
||||
typedef double float64_t;
|
||||
#endif
|
||||
|
||||
typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
|
||||
typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
|
||||
typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
|
||||
#ifdef __aarch64__
|
||||
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
|
||||
#endif
|
||||
|
||||
typedef struct int8x8x2_t {
|
||||
int8x8_t val[2];
|
||||
} int8x8x2_t;
|
||||
|
||||
typedef struct int8x16x2_t {
|
||||
int8x16_t val[2];
|
||||
} int8x16x2_t;
|
||||
|
||||
typedef struct int16x4x2_t {
|
||||
int16x4_t val[2];
|
||||
} int16x4x2_t;
|
||||
|
||||
typedef struct int16x8x2_t {
|
||||
int16x8_t val[2];
|
||||
} int16x8x2_t;
|
||||
|
||||
typedef struct int32x2x2_t {
|
||||
int32x2_t val[2];
|
||||
} int32x2x2_t;
|
||||
|
||||
typedef struct int32x4x2_t {
|
||||
int32x4_t val[2];
|
||||
} int32x4x2_t;
|
||||
|
||||
typedef struct int64x1x2_t {
|
||||
int64x1_t val[2];
|
||||
} int64x1x2_t;
|
||||
|
||||
typedef struct int64x2x2_t {
|
||||
int64x2_t val[2];
|
||||
} int64x2x2_t;
|
||||
|
||||
typedef struct uint8x8x2_t {
|
||||
uint8x8_t val[2];
|
||||
} uint8x8x2_t;
|
||||
|
||||
typedef struct uint8x16x2_t {
|
||||
uint8x16_t val[2];
|
||||
} uint8x16x2_t;
|
||||
|
||||
typedef struct uint16x4x2_t {
|
||||
uint16x4_t val[2];
|
||||
} uint16x4x2_t;
|
||||
|
||||
typedef struct uint16x8x2_t {
|
||||
uint16x8_t val[2];
|
||||
} uint16x8x2_t;
|
||||
|
||||
typedef struct uint32x2x2_t {
|
||||
uint32x2_t val[2];
|
||||
} uint32x2x2_t;
|
||||
|
||||
typedef struct uint32x4x2_t {
|
||||
uint32x4_t val[2];
|
||||
} uint32x4x2_t;
|
||||
|
||||
typedef struct uint64x1x2_t {
|
||||
uint64x1_t val[2];
|
||||
} uint64x1x2_t;
|
||||
|
||||
typedef struct uint64x2x2_t {
|
||||
uint64x2_t val[2];
|
||||
} uint64x2x2_t;
|
||||
|
||||
typedef struct float16x4x2_t {
|
||||
float16x4_t val[2];
|
||||
} float16x4x2_t;
|
||||
|
||||
typedef struct float16x8x2_t {
|
||||
float16x8_t val[2];
|
||||
} float16x8x2_t;
|
||||
|
||||
typedef struct float32x2x2_t {
|
||||
float32x2_t val[2];
|
||||
} float32x2x2_t;
|
||||
|
||||
typedef struct float32x4x2_t {
|
||||
float32x4_t val[2];
|
||||
} float32x4x2_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x2_t {
|
||||
float64x1_t val[2];
|
||||
} float64x1x2_t;
|
||||
|
||||
typedef struct float64x2x2_t {
|
||||
float64x2_t val[2];
|
||||
} float64x2x2_t;
|
||||
|
||||
#endif
|
||||
typedef struct int8x8x3_t {
|
||||
int8x8_t val[3];
|
||||
} int8x8x3_t;
|
||||
|
||||
typedef struct int8x16x3_t {
|
||||
int8x16_t val[3];
|
||||
} int8x16x3_t;
|
||||
|
||||
typedef struct int16x4x3_t {
|
||||
int16x4_t val[3];
|
||||
} int16x4x3_t;
|
||||
|
||||
typedef struct int16x8x3_t {
|
||||
int16x8_t val[3];
|
||||
} int16x8x3_t;
|
||||
|
||||
typedef struct int32x2x3_t {
|
||||
int32x2_t val[3];
|
||||
} int32x2x3_t;
|
||||
|
||||
typedef struct int32x4x3_t {
|
||||
int32x4_t val[3];
|
||||
} int32x4x3_t;
|
||||
|
||||
typedef struct int64x1x3_t {
|
||||
int64x1_t val[3];
|
||||
} int64x1x3_t;
|
||||
|
||||
typedef struct int64x2x3_t {
|
||||
int64x2_t val[3];
|
||||
} int64x2x3_t;
|
||||
|
||||
typedef struct uint8x8x3_t {
|
||||
uint8x8_t val[3];
|
||||
} uint8x8x3_t;
|
||||
|
||||
typedef struct uint8x16x3_t {
|
||||
uint8x16_t val[3];
|
||||
} uint8x16x3_t;
|
||||
|
||||
typedef struct uint16x4x3_t {
|
||||
uint16x4_t val[3];
|
||||
} uint16x4x3_t;
|
||||
|
||||
typedef struct uint16x8x3_t {
|
||||
uint16x8_t val[3];
|
||||
} uint16x8x3_t;
|
||||
|
||||
typedef struct uint32x2x3_t {
|
||||
uint32x2_t val[3];
|
||||
} uint32x2x3_t;
|
||||
|
||||
typedef struct uint32x4x3_t {
|
||||
uint32x4_t val[3];
|
||||
} uint32x4x3_t;
|
||||
|
||||
typedef struct uint64x1x3_t {
|
||||
uint64x1_t val[3];
|
||||
} uint64x1x3_t;
|
||||
|
||||
typedef struct uint64x2x3_t {
|
||||
uint64x2_t val[3];
|
||||
} uint64x2x3_t;
|
||||
|
||||
typedef struct float16x4x3_t {
|
||||
float16x4_t val[3];
|
||||
} float16x4x3_t;
|
||||
|
||||
typedef struct float16x8x3_t {
|
||||
float16x8_t val[3];
|
||||
} float16x8x3_t;
|
||||
|
||||
typedef struct float32x2x3_t {
|
||||
float32x2_t val[3];
|
||||
} float32x2x3_t;
|
||||
|
||||
typedef struct float32x4x3_t {
|
||||
float32x4_t val[3];
|
||||
} float32x4x3_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x3_t {
|
||||
float64x1_t val[3];
|
||||
} float64x1x3_t;
|
||||
|
||||
typedef struct float64x2x3_t {
|
||||
float64x2_t val[3];
|
||||
} float64x2x3_t;
|
||||
|
||||
#endif
|
||||
typedef struct int8x8x4_t {
|
||||
int8x8_t val[4];
|
||||
} int8x8x4_t;
|
||||
|
||||
typedef struct int8x16x4_t {
|
||||
int8x16_t val[4];
|
||||
} int8x16x4_t;
|
||||
|
||||
typedef struct int16x4x4_t {
|
||||
int16x4_t val[4];
|
||||
} int16x4x4_t;
|
||||
|
||||
typedef struct int16x8x4_t {
|
||||
int16x8_t val[4];
|
||||
} int16x8x4_t;
|
||||
|
||||
typedef struct int32x2x4_t {
|
||||
int32x2_t val[4];
|
||||
} int32x2x4_t;
|
||||
|
||||
typedef struct int32x4x4_t {
|
||||
int32x4_t val[4];
|
||||
} int32x4x4_t;
|
||||
|
||||
typedef struct int64x1x4_t {
|
||||
int64x1_t val[4];
|
||||
} int64x1x4_t;
|
||||
|
||||
typedef struct int64x2x4_t {
|
||||
int64x2_t val[4];
|
||||
} int64x2x4_t;
|
||||
|
||||
typedef struct uint8x8x4_t {
|
||||
uint8x8_t val[4];
|
||||
} uint8x8x4_t;
|
||||
|
||||
typedef struct uint8x16x4_t {
|
||||
uint8x16_t val[4];
|
||||
} uint8x16x4_t;
|
||||
|
||||
typedef struct uint16x4x4_t {
|
||||
uint16x4_t val[4];
|
||||
} uint16x4x4_t;
|
||||
|
||||
typedef struct uint16x8x4_t {
|
||||
uint16x8_t val[4];
|
||||
} uint16x8x4_t;
|
||||
|
||||
typedef struct uint32x2x4_t {
|
||||
uint32x2_t val[4];
|
||||
} uint32x2x4_t;
|
||||
|
||||
typedef struct uint32x4x4_t {
|
||||
uint32x4_t val[4];
|
||||
} uint32x4x4_t;
|
||||
|
||||
typedef struct uint64x1x4_t {
|
||||
uint64x1_t val[4];
|
||||
} uint64x1x4_t;
|
||||
|
||||
typedef struct uint64x2x4_t {
|
||||
uint64x2_t val[4];
|
||||
} uint64x2x4_t;
|
||||
|
||||
typedef struct float16x4x4_t {
|
||||
float16x4_t val[4];
|
||||
} float16x4x4_t;
|
||||
|
||||
typedef struct float16x8x4_t {
|
||||
float16x8_t val[4];
|
||||
} float16x8x4_t;
|
||||
|
||||
typedef struct float32x2x4_t {
|
||||
float32x2_t val[4];
|
||||
} float32x2x4_t;
|
||||
|
||||
typedef struct float32x4x4_t {
|
||||
float32x4_t val[4];
|
||||
} float32x4x4_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
typedef struct float64x1x4_t {
|
||||
float64x1_t val[4];
|
||||
} float64x1x4_t;
|
||||
|
||||
typedef struct float64x2x4_t {
|
||||
float64x2_t val[4];
|
||||
} float64x2x4_t;
|
||||
|
||||
#endif
|
||||
typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
|
||||
|
||||
typedef struct bfloat16x4x2_t {
|
||||
bfloat16x4_t val[2];
|
||||
} bfloat16x4x2_t;
|
||||
|
||||
typedef struct bfloat16x8x2_t {
|
||||
bfloat16x8_t val[2];
|
||||
} bfloat16x8x2_t;
|
||||
|
||||
typedef struct bfloat16x4x3_t {
|
||||
bfloat16x4_t val[3];
|
||||
} bfloat16x4x3_t;
|
||||
|
||||
typedef struct bfloat16x8x3_t {
|
||||
bfloat16x8_t val[3];
|
||||
} bfloat16x8x3_t;
|
||||
|
||||
typedef struct bfloat16x4x4_t {
|
||||
bfloat16x4_t val[4];
|
||||
} bfloat16x4x4_t;
|
||||
|
||||
typedef struct bfloat16x8x4_t {
|
||||
bfloat16x8_t val[4];
|
||||
} bfloat16x8x4_t;
|
||||
|
||||
#endif // __ARM_NEON_TYPES_H
|
||||
27
lib/include/avx2intrin.h
vendored
27
lib/include/avx2intrin.h
vendored
@ -15,8 +15,12 @@
|
||||
#define __AVX2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx2,no-evex512"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx2,no-evex512"), __min_vector_width__(128)))
|
||||
|
||||
/* SSE4 Multiple Packed Sums of Absolute Difference. */
|
||||
/// Computes sixteen sum of absolute difference (SAD) operations on sets of
|
||||
@ -1307,6 +1311,23 @@ _mm256_min_epu32(__m256i __a, __m256i __b)
|
||||
return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
|
||||
}
|
||||
|
||||
/// Creates a 32-bit integer mask from the most significant bit of each byte
|
||||
/// in the 256-bit integer vector in \a __a and returns the result.
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// FOR i := 0 TO 31
|
||||
/// j := i*8
|
||||
/// result[i] := __a[j+7]
|
||||
/// ENDFOR
|
||||
/// \endcode
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c VPMOVMSKB instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 256-bit integer vector containing the source bytes.
|
||||
/// \returns The 32-bit integer mask.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS256
|
||||
_mm256_movemask_epi8(__m256i __a)
|
||||
{
|
||||
@ -2962,7 +2983,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
|
||||
/// A pointer to the 32-byte aligned memory containing the vector to load.
|
||||
/// \returns A 256-bit integer vector loaded from memory.
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_stream_load_si256(__m256i const *__V)
|
||||
_mm256_stream_load_si256(const void *__V)
|
||||
{
|
||||
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
|
||||
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);
|
||||
|
||||
5
lib/include/avx512bf16intrin.h
vendored
5
lib/include/avx512bf16intrin.h
vendored
@ -20,10 +20,11 @@ typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
|
||||
typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
|
||||
|
||||
#define __DEFAULT_FN_ATTRS512 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bf16,no-evex512")))
|
||||
|
||||
/// Convert One BF16 Data to One Single Float Data.
|
||||
///
|
||||
|
||||
5
lib/include/avx512bitalgintrin.h
vendored
5
lib/include/avx512bitalgintrin.h
vendored
@ -15,7 +15,10 @@
|
||||
#define __AVX512BITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bitalg,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi16(__m512i __A)
|
||||
|
||||
66
lib/include/avx512bwintrin.h
vendored
66
lib/include/avx512bwintrin.h
vendored
@ -18,8 +18,12 @@ typedef unsigned int __mmask32;
|
||||
typedef unsigned long long __mmask64;
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
|
||||
#define __DEFAULT_FN_ATTRS512 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,no-evex512")))
|
||||
|
||||
static __inline __mmask32 __DEFAULT_FN_ATTRS
|
||||
_knot_mask32(__mmask32 __M)
|
||||
@ -27,9 +31,7 @@ _knot_mask32(__mmask32 __M)
|
||||
return __builtin_ia32_knotsi(__M);
|
||||
}
|
||||
|
||||
static __inline __mmask64 __DEFAULT_FN_ATTRS
|
||||
_knot_mask64(__mmask64 __M)
|
||||
{
|
||||
static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
|
||||
return __builtin_ia32_knotdi(__M);
|
||||
}
|
||||
|
||||
@ -39,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kand_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -51,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kandn_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -63,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kor_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -75,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -87,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kxor_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -112,14 +109,12 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
|
||||
}
|
||||
|
||||
@ -148,14 +143,12 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
|
||||
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
|
||||
}
|
||||
|
||||
@ -171,9 +164,8 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
|
||||
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_kadd_mask64(__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -214,8 +206,7 @@ _load_mask32(__mmask32 *__A) {
|
||||
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_load_mask64(__mmask64 *__A) {
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
|
||||
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
|
||||
}
|
||||
|
||||
@ -224,8 +215,8 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
|
||||
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_store_mask64(__mmask64 *__A, __mmask64 __B) {
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
|
||||
__mmask64 __B) {
|
||||
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
|
||||
}
|
||||
|
||||
@ -1714,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
|
||||
(__v64qi) _mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
|
||||
__mmask64 __B) {
|
||||
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
|
||||
4
lib/include/avx512cdintrin.h
vendored
4
lib/include/avx512cdintrin.h
vendored
@ -15,7 +15,9 @@
|
||||
#define __AVX512CDINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512cd,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_conflict_epi64 (__m512i __A)
|
||||
|
||||
6
lib/include/avx512dqintrin.h
vendored
6
lib/include/avx512dqintrin.h
vendored
@ -15,8 +15,10 @@
|
||||
#define __AVX512DQINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512dq,no-evex512")))
|
||||
|
||||
static __inline __mmask8 __DEFAULT_FN_ATTRS
|
||||
_knot_mask8(__mmask8 __M)
|
||||
|
||||
10
lib/include/avx512fintrin.h
vendored
10
lib/include/avx512fintrin.h
vendored
@ -167,9 +167,13 @@ typedef enum
|
||||
} _MM_MANTISSA_SIGN_ENUM;
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512f,no-evex512"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512f,no-evex512")))
|
||||
|
||||
/* Create vectors with repeated elements */
|
||||
|
||||
|
||||
10
lib/include/avx512fp16intrin.h
vendored
10
lib/include/avx512fp16intrin.h
vendored
@ -22,13 +22,15 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
|
||||
__min_vector_width__(512)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16,evex512"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
|
||||
|
||||
4
lib/include/avx512ifmaintrin.h
vendored
4
lib/include/avx512ifmaintrin.h
vendored
@ -15,7 +15,9 @@
|
||||
#define __IFMAINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512ifma,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
|
||||
10
lib/include/avx512ifmavlintrin.h
vendored
10
lib/include/avx512ifmavlintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __IFMAVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512ifma,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512ifma,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
#define _mm_madd52hi_epu64(X, Y, Z) \
|
||||
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
|
||||
|
||||
5
lib/include/avx512pfintrin.h
vendored
5
lib/include/avx512pfintrin.h
vendored
@ -14,9 +14,6 @@
|
||||
#ifndef __AVX512PFINTRIN_H
|
||||
#define __AVX512PFINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
@ -92,6 +89,4 @@
|
||||
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
||||
2
lib/include/avx512vbmi2intrin.h
vendored
2
lib/include/avx512vbmi2intrin.h
vendored
@ -15,7 +15,7 @@
|
||||
#define __AVX512VBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
|
||||
5
lib/include/avx512vbmiintrin.h
vendored
5
lib/include/avx512vbmiintrin.h
vendored
@ -15,8 +15,9 @@
|
||||
#define __VBMIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vbmi,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)
|
||||
|
||||
11
lib/include/avx512vbmivlintrin.h
vendored
11
lib/include/avx512vbmivlintrin.h
vendored
@ -15,9 +15,14 @@
|
||||
#define __VBMIVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vbmi,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vbmi,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
|
||||
|
||||
14
lib/include/avx512vlbf16intrin.h
vendored
14
lib/include/avx512vlbf16intrin.h
vendored
@ -15,12 +15,14 @@
|
||||
#ifndef __AVX512VLBF16INTRIN_H
|
||||
#define __AVX512VLBF16INTRIN_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl, avx512bf16"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl, avx512bf16"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bf16,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bf16,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/// Convert Two Packed Single Data to One Packed BF16 Data.
|
||||
///
|
||||
|
||||
10
lib/include/avx512vlbitalgintrin.h
vendored
10
lib/include/avx512vlbitalgintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLBITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bitalg,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bitalg,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_popcnt_epi16(__m256i __A)
|
||||
|
||||
10
lib/include/avx512vlbwintrin.h
vendored
10
lib/include/avx512vlbwintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLBWINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bw,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512bw,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/* Integer compare */
|
||||
|
||||
|
||||
11
lib/include/avx512vlcdintrin.h
vendored
11
lib/include/avx512vlcdintrin.h
vendored
@ -14,9 +14,14 @@
|
||||
#define __AVX512VLCDINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512cd,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512cd,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_broadcastmb_epi64 (__mmask8 __A)
|
||||
|
||||
10
lib/include/avx512vldqintrin.h
vendored
10
lib/include/avx512vldqintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLDQINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512dq,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512dq,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
|
||||
|
||||
4
lib/include/avx512vlfp16intrin.h
vendored
4
lib/include/avx512vlfp16intrin.h
vendored
@ -19,11 +19,11 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16, avx512vl"), \
|
||||
__target__("avx512fp16,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512fp16, avx512vl"), \
|
||||
__target__("avx512fp16,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
|
||||
|
||||
10
lib/include/avx512vlintrin.h
vendored
10
lib/include/avx512vlintrin.h
vendored
@ -14,8 +14,14 @@
|
||||
#ifndef __AVX512VLINTRIN_H
|
||||
#define __AVX512VLINTRIN_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
typedef short __v2hi __attribute__((__vector_size__(4)));
|
||||
typedef char __v4qi __attribute__((__vector_size__(4)));
|
||||
|
||||
10
lib/include/avx512vlvbmi2intrin.h
vendored
10
lib/include/avx512vlvbmi2intrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLVBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vbmi2,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vbmi2,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
|
||||
10
lib/include/avx512vlvnniintrin.h
vendored
10
lib/include/avx512vlvnniintrin.h
vendored
@ -15,8 +15,14 @@
|
||||
#define __AVX512VLVNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vnni,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vnni,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
|
||||
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
|
||||
|
||||
10
lib/include/avx512vlvp2intersectintrin.h
vendored
10
lib/include/avx512vlvp2intersectintrin.h
vendored
@ -28,12 +28,14 @@
|
||||
#ifndef _AVX512VLVP2INTERSECT_H
|
||||
#define _AVX512VLVP2INTERSECT_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vl,avx512vp2intersect,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
/// Store, in an even/odd pair of mask registers, the indicators of the
|
||||
/// locations of value matches between dwords in operands __a and __b.
|
||||
|
||||
5
lib/include/avx512vnniintrin.h
vendored
5
lib/include/avx512vnniintrin.h
vendored
@ -15,8 +15,9 @@
|
||||
#define __AVX512VNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
|
||||
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vnni,evex512"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
|
||||
5
lib/include/avx512vp2intersectintrin.h
vendored
5
lib/include/avx512vp2intersectintrin.h
vendored
@ -28,8 +28,9 @@
|
||||
#ifndef _AVX512VP2INTERSECT_H
|
||||
#define _AVX512VP2INTERSECT_H
|
||||
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vp2intersect,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
/// Store, in an even/odd pair of mask registers, the indicators of the
|
||||
|
||||
4
lib/include/avx512vpopcntdqintrin.h
vendored
4
lib/include/avx512vpopcntdqintrin.h
vendored
@ -17,7 +17,9 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vpopcntdq,evex512"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
|
||||
|
||||
8
lib/include/avx512vpopcntdqvlintrin.h
vendored
8
lib/include/avx512vpopcntdqvlintrin.h
vendored
@ -17,9 +17,13 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_popcnt_epi64(__m128i __A) {
|
||||
|
||||
14
lib/include/avxintrin.h
vendored
14
lib/include/avxintrin.h
vendored
@ -50,8 +50,12 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
|
||||
#endif
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
/* Arithmetic */
|
||||
/// Adds two 256-bit vectors of [4 x double].
|
||||
@ -3563,7 +3567,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
|
||||
/// \param __b
|
||||
/// A 256-bit integer vector containing the values to be moved.
|
||||
static __inline void __DEFAULT_FN_ATTRS
|
||||
_mm256_stream_si256(__m256i *__a, __m256i __b)
|
||||
_mm256_stream_si256(void *__a, __m256i __b)
|
||||
{
|
||||
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
|
||||
__builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
|
||||
@ -3583,7 +3587,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
|
||||
/// \param __b
|
||||
/// A 256-bit vector of [4 x double] containing the values to be moved.
|
||||
static __inline void __DEFAULT_FN_ATTRS
|
||||
_mm256_stream_pd(double *__a, __m256d __b)
|
||||
_mm256_stream_pd(void *__a, __m256d __b)
|
||||
{
|
||||
typedef __v4df __v4df_aligned __attribute__((aligned(32)));
|
||||
__builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
|
||||
@ -3604,7 +3608,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
|
||||
/// \param __a
|
||||
/// A 256-bit vector of [8 x float] containing the values to be moved.
|
||||
static __inline void __DEFAULT_FN_ATTRS
|
||||
_mm256_stream_ps(float *__p, __m256 __a)
|
||||
_mm256_stream_ps(void *__p, __m256 __a)
|
||||
{
|
||||
typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
|
||||
__builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
|
||||
|
||||
305
lib/include/bmiintrin.h
vendored
305
lib/include/bmiintrin.h
vendored
@ -19,18 +19,17 @@
|
||||
to use it as a potentially faster version of BSF. */
|
||||
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#define _tzcnt_u16(a) (__tzcnt_u16((a)))
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 16-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _tzcnt_u16
|
||||
static __inline__ unsigned short __RELAXED_FN_ATTRS
|
||||
__tzcnt_u16(unsigned short __X)
|
||||
{
|
||||
@ -41,13 +40,30 @@ __tzcnt_u16(unsigned short __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// \code
|
||||
/// unsigned short _tzcnt_u16(unsigned short __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 16-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 16-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see __tzcnt_u16
|
||||
#define _tzcnt_u16 __tzcnt_u16
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 32-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _mm_tzcnt_32
|
||||
/// \see { _mm_tzcnt_32 _tzcnt_u32 }
|
||||
static __inline__ unsigned int __RELAXED_FN_ATTRS
|
||||
__tzcnt_u32(unsigned int __X)
|
||||
{
|
||||
@ -58,20 +74,35 @@ __tzcnt_u32(unsigned int __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An 32-bit integer containing the number of trailing zero bits in
|
||||
/// \returns A 32-bit integer containing the number of trailing zero bits in
|
||||
/// the operand.
|
||||
/// \see __tzcnt_u32
|
||||
/// \see { __tzcnt_u32 _tzcnt_u32 }
|
||||
static __inline__ int __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_32(unsigned int __X)
|
||||
{
|
||||
return (int)__builtin_ia32_tzcnt_u32(__X);
|
||||
}
|
||||
|
||||
#define _tzcnt_u32(a) (__tzcnt_u32((a)))
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned int _tzcnt_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 32-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see { _mm_tzcnt_32 __tzcnt_u32 }
|
||||
#define _tzcnt_u32 __tzcnt_u32
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
@ -79,13 +110,13 @@ _mm_tzcnt_32(unsigned int __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 64-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _mm_tzcnt_64
|
||||
/// \see { _mm_tzcnt_64 _tzcnt_u64 }
|
||||
static __inline__ unsigned long long __RELAXED_FN_ATTRS
|
||||
__tzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
@ -96,20 +127,35 @@ __tzcnt_u64(unsigned long long __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> TZCNT </c> instruction.
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An 64-bit integer containing the number of trailing zero bits in
|
||||
/// the operand.
|
||||
/// \see __tzcnt_u64
|
||||
/// \see { __tzcnt_u64 _tzcnt_u64 }
|
||||
static __inline__ long long __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_64(unsigned long long __X)
|
||||
{
|
||||
return (long long)__builtin_ia32_tzcnt_u64(__X);
|
||||
}
|
||||
|
||||
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned long long _tzcnt_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c TZCNT instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 64-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see { _mm_tzcnt_64 __tzcnt_u64
|
||||
#define _tzcnt_u64 __tzcnt_u64
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
@ -121,21 +167,12 @@ _mm_tzcnt_64(unsigned long long __X)
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
|
||||
|
||||
#define _andn_u32(a, b) (__andn_u32((a), (b)))
|
||||
|
||||
/* _bextr_u32 != __bextr_u32 */
|
||||
#define _blsi_u32(a) (__blsi_u32((a)))
|
||||
|
||||
#define _blsmsk_u32(a) (__blsmsk_u32((a)))
|
||||
|
||||
#define _blsr_u32(a) (__blsr_u32((a)))
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing one of the operands.
|
||||
@ -143,19 +180,40 @@ _mm_tzcnt_64(unsigned long long __X)
|
||||
/// An unsigned integer containing one of the operands.
|
||||
/// \returns An unsigned integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see _andn_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__andn_u32(unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing one of the operands.
|
||||
/// \param __Y
|
||||
/// An unsigned integer containing one of the operands.
|
||||
/// \returns An unsigned integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see __andn_u32
|
||||
#define _andn_u32 __andn_u32
|
||||
|
||||
/* AMD-specified, double-leading-underscore version of BEXTR */
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be extracted.
|
||||
@ -178,7 +236,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be extracted.
|
||||
@ -203,7 +261,7 @@ _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be extracted.
|
||||
@ -224,33 +282,89 @@ _bextr2_u32(unsigned int __X, unsigned int __Y) {
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the bits from
|
||||
/// the source operand.
|
||||
/// \see _blsi_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsi_u32(unsigned int __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
/// Clears all bits in the source except for the least significant bit
|
||||
/// containing a value of 1 and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned int _blsi_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer whose bits are to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the bits from
|
||||
/// the source operand.
|
||||
/// \see __blsi_u32
|
||||
#define _blsi_u32 __blsi_u32
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer used to create the mask.
|
||||
/// \returns An unsigned integer containing the newly created mask.
|
||||
/// \see _blsmsk_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u32(unsigned int __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
|
||||
/// \code
|
||||
/// unsigned int _blsmsk_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer used to create the mask.
|
||||
/// \returns An unsigned integer containing the newly created mask.
|
||||
/// \see __blsmsk_u32
|
||||
#define _blsmsk_u32 __blsmsk_u32
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing the operand to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the source
|
||||
/// operand.
|
||||
/// \see _blsr_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u32(unsigned int __X)
|
||||
__blsr_u32(unsigned int __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
@ -258,35 +372,27 @@ __blsmsk_u32(unsigned int __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
|
||||
/// \code
|
||||
/// unsigned int _bls4_u32(unsigned int __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned integer containing the operand to be cleared.
|
||||
/// \returns An unsigned integer containing the result of clearing the source
|
||||
/// operand.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__blsr_u32(unsigned int __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
/// \see __blsr_u32
|
||||
#define _blsr_u32 __blsr_u32
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#define _andn_u64(a, b) (__andn_u64((a), (b)))
|
||||
|
||||
/* _bextr_u64 != __bextr_u64 */
|
||||
#define _blsi_u64(a) (__blsi_u64((a)))
|
||||
|
||||
#define _blsmsk_u64(a) (__blsmsk_u64((a)))
|
||||
|
||||
#define _blsr_u64(a) (__blsr_u64((a)))
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> ANDN </c> instruction.
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
@ -294,19 +400,41 @@ __blsr_u32(unsigned int __X)
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see _andn_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned long long _andn_u64(unsigned long long __X,
|
||||
/// unsigned long long __Y);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c ANDN instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
/// \param __Y
|
||||
/// An unsigned 64-bit integer containing one of the operands.
|
||||
/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
|
||||
/// operand with the one's complement of the first operand.
|
||||
/// \see __andn_u64
|
||||
#define _andn_u64 __andn_u64
|
||||
|
||||
/* AMD-specified, double-leading-underscore version of BEXTR */
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be extracted.
|
||||
@ -329,7 +457,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be extracted.
|
||||
@ -354,7 +482,7 @@ _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BEXTR </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BEXTR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be extracted.
|
||||
@ -375,33 +503,89 @@ _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSI </c> instruction.
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// bits from the source operand.
|
||||
/// \see _blsi_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsi_u64(unsigned long long __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
/// Clears all bits in the source except for the least significant bit
|
||||
/// containing a value of 1 and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// unsigned long long _blsi_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSI instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer whose bits are to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// bits from the source operand.
|
||||
/// \see __blsi_u64
|
||||
#define _blsi_u64 __blsi_u64
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer used to create the mask.
|
||||
/// \returns An unsigned 64-bit integer containing the newly created mask.
|
||||
/// \see _blsmsk_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
|
||||
/// \code
|
||||
/// unsigned long long _blsmsk_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSMSK instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer used to create the mask.
|
||||
/// \returns An unsigned 64-bit integer containing the newly created mask.
|
||||
/// \see __blsmsk_u64
|
||||
#define _blsmsk_u64 __blsmsk_u64
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing the operand to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// source operand.
|
||||
/// \see _blsr_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsmsk_u64(unsigned long long __X)
|
||||
__blsr_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
@ -409,17 +593,18 @@ __blsmsk_u64(unsigned long long __X)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> BLSR </c> instruction.
|
||||
/// \code
|
||||
/// unsigned long long _blsr_u64(unsigned long long __X);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c BLSR instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// An unsigned 64-bit integer containing the operand to be cleared.
|
||||
/// \returns An unsigned 64-bit integer containing the result of clearing the
|
||||
/// source operand.
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__blsr_u64(unsigned long long __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
/// \see __blsr_u64
|
||||
#define _blsr_u64 __blsr_u64
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
|
||||
9
lib/include/cuda_wrappers/bits/basic_string.h
vendored
Normal file
9
lib/include/cuda_wrappers/bits/basic_string.h
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
|
||||
// `__attribute((__noinline__))`. In order to avoid compilation error,
|
||||
// temporarily unset __noinline__ when we include affected libstdc++ header.
|
||||
|
||||
#pragma push_macro("__noinline__")
|
||||
#undef __noinline__
|
||||
#include_next "bits/basic_string.h"
|
||||
|
||||
#pragma pop_macro("__noinline__")
|
||||
9
lib/include/cuda_wrappers/bits/basic_string.tcc
vendored
Normal file
9
lib/include/cuda_wrappers/bits/basic_string.tcc
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
// CUDA headers define __noinline__ which interferes with libstdc++'s use of
|
||||
// `__attribute((__noinline__))`. In order to avoid compilation error,
|
||||
// temporarily unset __noinline__ when we include affected libstdc++ header.
|
||||
|
||||
#pragma push_macro("__noinline__")
|
||||
#undef __noinline__
|
||||
#include_next "bits/basic_string.tcc"
|
||||
|
||||
#pragma pop_macro("__noinline__")
|
||||
20
lib/include/emmintrin.h
vendored
20
lib/include/emmintrin.h
vendored
@ -50,11 +50,11 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
|
||||
__min_vector_width__(128)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("sse2,no-evex512"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS_MMX \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \
|
||||
__min_vector_width__(64)))
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("mmx,sse2,no-evex512"), __min_vector_width__(64)))
|
||||
|
||||
/// Adds lower double-precision values in both operands and returns the
|
||||
/// sum in the lower 64 bits of the result. The upper 64 bits of the result
|
||||
@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
|
||||
/// A pointer to the 128-bit aligned memory location used to store the value.
|
||||
/// \param __a
|
||||
/// A vector of [2 x double] containing the 64-bit values to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
|
||||
__m128d __a) {
|
||||
__builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
|
||||
}
|
||||
@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
|
||||
/// A pointer to the 128-bit aligned memory location used to store the value.
|
||||
/// \param __a
|
||||
/// A 128-bit integer vector containing the values to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
|
||||
__m128i __a) {
|
||||
__builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
|
||||
}
|
||||
@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
|
||||
/// A 32-bit integer containing the value to be stored.
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
|
||||
_mm_stream_si32(int *__p, int __a) {
|
||||
__builtin_ia32_movnti(__p, __a);
|
||||
_mm_stream_si32(void *__p, int __a) {
|
||||
__builtin_ia32_movnti((int *)__p, __a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@ -4003,8 +4003,8 @@ static __inline__ void
|
||||
/// A 64-bit integer containing the value to be stored.
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
|
||||
_mm_stream_si64(long long *__p, long long __a) {
|
||||
__builtin_ia32_movnti64(__p, __a);
|
||||
_mm_stream_si64(void *__p, long long __a) {
|
||||
__builtin_ia32_movnti64((long long *)__p, __a);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
29
lib/include/gfniintrin.h
vendored
29
lib/include/gfniintrin.h
vendored
@ -15,19 +15,36 @@
|
||||
#define __GFNIINTRIN_H
|
||||
|
||||
/* Default attributes for simple form (no masking). */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("gfni,no-evex512"), __min_vector_width__(128)))
|
||||
|
||||
/* Default attributes for YMM unmasked form. */
|
||||
#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS_Y \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx,gfni,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
/* Default attributes for ZMM unmasked forms. */
|
||||
#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS_Z \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512f,evex512,gfni"), \
|
||||
__min_vector_width__(512)))
|
||||
/* Default attributes for ZMM masked forms. */
|
||||
#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS_Z_MASK \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,evex512,gfni"), \
|
||||
__min_vector_width__(512)))
|
||||
|
||||
/* Default attributes for VLX masked forms. */
|
||||
#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS_VL128 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
|
||||
__min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS_VL256 \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("avx512bw,avx512vl,gfni,no-evex512"), \
|
||||
__min_vector_width__(256)))
|
||||
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
|
||||
818
lib/include/ia32intrin.h
vendored
818
lib/include/ia32intrin.h
vendored
File diff suppressed because it is too large
Load Diff
69
lib/include/immintrin.h
vendored
69
lib/include/immintrin.h
vendored
@ -291,11 +291,13 @@
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDPID__)
|
||||
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
|
||||
/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
|
||||
///
|
||||
/// \returns The 32-bit contents of the MSR.
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
|
||||
_rdpid_u32(void) {
|
||||
return __builtin_ia32_rdpid();
|
||||
@ -488,6 +490,15 @@ _writegsbase_u64(unsigned long long __V)
|
||||
* field inside of it.
|
||||
*/
|
||||
|
||||
/// Load a 16-bit value from memory and swap its bytes.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the 16-bit value to load.
|
||||
/// \returns The byte-swapped value.
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i16(void const * __P) {
|
||||
struct __loadu_i16 {
|
||||
@ -496,6 +507,16 @@ _loadbe_i16(void const * __P) {
|
||||
return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
|
||||
}
|
||||
|
||||
/// Swap the bytes of a 16-bit value and store it to memory.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the memory for storing the swapped value.
|
||||
/// \param __D
|
||||
/// The 16-bit value to be byte-swapped.
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i16(void * __P, short __D) {
|
||||
struct __storeu_i16 {
|
||||
@ -504,6 +525,15 @@ _storebe_i16(void * __P, short __D) {
|
||||
((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
|
||||
}
|
||||
|
||||
/// Load a 32-bit value from memory and swap its bytes.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the 32-bit value to load.
|
||||
/// \returns The byte-swapped value.
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i32(void const * __P) {
|
||||
struct __loadu_i32 {
|
||||
@ -512,6 +542,16 @@ _loadbe_i32(void const * __P) {
|
||||
return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
|
||||
}
|
||||
|
||||
/// Swap the bytes of a 32-bit value and store it to memory.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the memory for storing the swapped value.
|
||||
/// \param __D
|
||||
/// The 32-bit value to be byte-swapped.
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i32(void * __P, int __D) {
|
||||
struct __storeu_i32 {
|
||||
@ -521,6 +561,15 @@ _storebe_i32(void * __P, int __D) {
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Load a 64-bit value from memory and swap its bytes.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the 64-bit value to load.
|
||||
/// \returns The byte-swapped value.
|
||||
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i64(void const * __P) {
|
||||
struct __loadu_i64 {
|
||||
@ -529,6 +578,16 @@ _loadbe_i64(void const * __P) {
|
||||
return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
|
||||
}
|
||||
|
||||
/// Swap the bytes of a 64-bit value and store it to memory.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the MOVBE instruction.
|
||||
///
|
||||
/// \param __P
|
||||
/// A pointer to the memory for storing the swapped value.
|
||||
/// \param __D
|
||||
/// The 64-bit value to be byte-swapped.
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i64(void * __P, long long __D) {
|
||||
struct __storeu_i64 {
|
||||
@ -578,9 +637,13 @@ _storebe_i64(void * __P, long long __D) {
|
||||
#include <cetintrin.h>
|
||||
#endif
|
||||
|
||||
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
|
||||
* whereas others are also available at all times. */
|
||||
/* Intrinsics inside adcintrin.h are available at all times. */
|
||||
#include <adcintrin.h>
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__ADX__)
|
||||
#include <adxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDSEED__)
|
||||
|
||||
16
lib/include/intrin.h
vendored
16
lib/include/intrin.h
vendored
@ -572,6 +572,22 @@ unsigned char __readx18byte(unsigned long offset);
|
||||
unsigned short __readx18word(unsigned long offset);
|
||||
unsigned long __readx18dword(unsigned long offset);
|
||||
unsigned __int64 __readx18qword(unsigned long offset);
|
||||
|
||||
double _CopyDoubleFromInt64(__int64);
|
||||
float _CopyFloatFromInt32(__int32);
|
||||
__int32 _CopyInt32FromFloat(float);
|
||||
__int64 _CopyInt64FromDouble(double);
|
||||
|
||||
unsigned int _CountLeadingOnes(unsigned long);
|
||||
unsigned int _CountLeadingOnes64(unsigned __int64);
|
||||
unsigned int _CountLeadingSigns(long);
|
||||
unsigned int _CountLeadingSigns64(__int64);
|
||||
unsigned int _CountLeadingZeros(unsigned long);
|
||||
unsigned int _CountLeadingZeros64(unsigned _int64);
|
||||
unsigned int _CountOneBits(unsigned long);
|
||||
unsigned int _CountOneBits64(unsigned __int64);
|
||||
|
||||
void __cdecl __prefetch(void *);
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|
||||
14
lib/include/larchintrin.h
vendored
14
lib/include/larchintrin.h
vendored
@ -156,7 +156,7 @@ extern __inline unsigned char
|
||||
return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__iocsrrd_h(unsigned int _1) {
|
||||
return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1);
|
||||
@ -228,6 +228,18 @@ extern __inline void
|
||||
((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2)))
|
||||
#endif
|
||||
|
||||
#define __frecipe_s(/*float*/ _1) \
|
||||
(float)__builtin_loongarch_frecipe_s((float)_1)
|
||||
|
||||
#define __frecipe_d(/*double*/ _1) \
|
||||
(double)__builtin_loongarch_frecipe_d((double)_1)
|
||||
|
||||
#define __frsqrte_s(/*float*/ _1) \
|
||||
(float)__builtin_loongarch_frsqrte_s((float)_1)
|
||||
|
||||
#define __frsqrte_d(/*double*/ _1) \
|
||||
(double)__builtin_loongarch_frsqrte_d((double)_1)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
3884
lib/include/lasxintrin.h
vendored
Normal file
3884
lib/include/lasxintrin.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6
lib/include/limits.h
vendored
6
lib/include/limits.h
vendored
@ -66,10 +66,8 @@
|
||||
|
||||
#define CHAR_BIT __CHAR_BIT__
|
||||
|
||||
/* C2x 5.2.4.2.1 */
|
||||
/* FIXME: This is using the placeholder dates Clang produces for these macros
|
||||
in C2x mode; switch to the correct values once they've been published. */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L
|
||||
/* C23 5.2.4.2.1 */
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
#define BOOL_WIDTH __BOOL_WIDTH__
|
||||
#define CHAR_WIDTH CHAR_BIT
|
||||
#define SCHAR_WIDTH CHAR_BIT
|
||||
|
||||
34
lib/include/llvm_libc_wrappers/assert.h
vendored
Normal file
34
lib/include/llvm_libc_wrappers/assert.h
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
//===-- Wrapper for C standard assert.h declarations on the GPU ------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
|
||||
|
||||
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
#include_next <assert.h>
|
||||
|
||||
#if __has_include(<llvm-libc-decls/assert.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
#define __LIBC_ATTRS __attribute__((device))
|
||||
#endif
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
#include <llvm-libc-decls/assert.h>
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
#undef __LIBC_ATTRS
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __CLANG_LLVM_LIBC_WRAPPERS_ASSERT_H__
|
||||
17
lib/include/llvm_libc_wrappers/ctype.h
vendored
17
lib/include/llvm_libc_wrappers/ctype.h
vendored
@ -13,8 +13,19 @@
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is
|
||||
// necessary to prevent it from doing that and remapping our implementation.
|
||||
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
|
||||
#pragma push_macro("__USE_EXTERN_INLINES")
|
||||
#undef __USE_EXTERN_INLINES
|
||||
#endif
|
||||
|
||||
#include_next <ctype.h>
|
||||
|
||||
#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
|
||||
#pragma pop_macro("__USE_EXTERN_INLINES")
|
||||
#endif
|
||||
|
||||
#if __has_include(<llvm-libc-decls/ctype.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
@ -26,6 +37,7 @@
|
||||
|
||||
#pragma push_macro("isalnum")
|
||||
#pragma push_macro("isalpha")
|
||||
#pragma push_macro("isascii")
|
||||
#pragma push_macro("isblank")
|
||||
#pragma push_macro("iscntrl")
|
||||
#pragma push_macro("isdigit")
|
||||
@ -36,11 +48,13 @@
|
||||
#pragma push_macro("isspace")
|
||||
#pragma push_macro("isupper")
|
||||
#pragma push_macro("isxdigit")
|
||||
#pragma push_macro("toascii")
|
||||
#pragma push_macro("tolower")
|
||||
#pragma push_macro("toupper")
|
||||
|
||||
#undef isalnum
|
||||
#undef isalpha
|
||||
#undef isascii
|
||||
#undef iscntrl
|
||||
#undef isdigit
|
||||
#undef islower
|
||||
@ -51,6 +65,7 @@
|
||||
#undef isupper
|
||||
#undef isblank
|
||||
#undef isxdigit
|
||||
#undef toascii
|
||||
#undef tolower
|
||||
#undef toupper
|
||||
|
||||
@ -64,6 +79,7 @@
|
||||
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
|
||||
#pragma pop_macro("isalnum")
|
||||
#pragma pop_macro("isalpha")
|
||||
#pragma pop_macro("isascii")
|
||||
#pragma pop_macro("isblank")
|
||||
#pragma pop_macro("iscntrl")
|
||||
#pragma pop_macro("isdigit")
|
||||
@ -74,6 +90,7 @@
|
||||
#pragma pop_macro("isspace")
|
||||
#pragma pop_macro("isupper")
|
||||
#pragma pop_macro("isxdigit")
|
||||
#pragma pop_macro("toascii")
|
||||
#pragma pop_macro("tolower")
|
||||
#pragma pop_macro("toupper")
|
||||
#endif
|
||||
|
||||
52
lib/include/llvm_libc_wrappers/stdio.h
vendored
52
lib/include/llvm_libc_wrappers/stdio.h
vendored
@ -6,21 +6,58 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
|
||||
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
#include_next <stdio.h>
|
||||
|
||||
// In some old versions of glibc, other standard headers sometimes define
|
||||
// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h
|
||||
// to produce special definitions. Future includes of stdio.h when those
|
||||
// special macros are undefined are expected to produce the normal definitions
|
||||
// from stdio.h.
|
||||
//
|
||||
// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__)
|
||||
// unconditionally to the above include_next. Otherwise, after an occurrence of
|
||||
// the first glibc stdio.h use case described above, the include_next would be
|
||||
// skipped for remaining includes of stdio.h, leaving required symbols
|
||||
// undefined.
|
||||
//
|
||||
// We make the following assumptions to handle all use cases:
|
||||
//
|
||||
// 1. If the above include_next produces special glibc definitions, then (a) it
|
||||
// does not produce the normal definitions that we must intercept below, (b)
|
||||
// the current file was included from a glibc header that already defined
|
||||
// __GLIBC__ (usually by including glibc's <features.h>), and (c) the above
|
||||
// include_next does not define _STDIO_H. In that case, we skip the rest of
|
||||
// the current file and don't guard against future includes.
|
||||
// 2. If the above include_next produces the normal stdio.h definitions, then
|
||||
// either (a) __GLIBC__ is not defined because C headers are from some other
|
||||
// libc implementation or (b) the above include_next defines _STDIO_H to
|
||||
// prevent the above include_next from having any effect in the future.
|
||||
#if !defined(__GLIBC__) || defined(_STDIO_H)
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
|
||||
#if __has_include(<llvm-libc-decls/stdio.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
#define __LIBC_ATTRS __attribute__((device))
|
||||
#endif
|
||||
|
||||
// Some headers provide these as macros. Temporarily undefine them so they do
|
||||
// not conflict with any definitions for the GPU.
|
||||
|
||||
#pragma push_macro("stdout")
|
||||
#pragma push_macro("stdin")
|
||||
#pragma push_macro("stderr")
|
||||
|
||||
#undef stdout
|
||||
#undef stderr
|
||||
#undef stdin
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
#include <llvm-libc-decls/stdio.h>
|
||||
@ -29,6 +66,15 @@
|
||||
|
||||
#undef __LIBC_ATTRS
|
||||
|
||||
// Restore the original macros when compiling on the host.
|
||||
#if !defined(__NVPTX__) && !defined(__AMDGPU__)
|
||||
#pragma pop_macro("stdout")
|
||||
#pragma pop_macro("stderr")
|
||||
#pragma pop_macro("stdin")
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
|
||||
|
||||
#endif
|
||||
|
||||
5
lib/include/llvm_libc_wrappers/stdlib.h
vendored
5
lib/include/llvm_libc_wrappers/stdlib.h
vendored
@ -23,8 +23,11 @@
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
// The LLVM C library uses this type so we forward declare it.
|
||||
// The LLVM C library uses these named types so we forward declare them.
|
||||
typedef void (*__atexithandler_t)(void);
|
||||
typedef int (*__bsearchcompare_t)(const void *, const void *);
|
||||
typedef int (*__qsortcompare_t)(const void *, const void *);
|
||||
typedef int (*__qsortrcompare_t)(const void *, const void *, void *);
|
||||
|
||||
// Enforce ABI compatibility with the structs used by the LLVM C library.
|
||||
_Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!");
|
||||
|
||||
65
lib/include/llvm_libc_wrappers/string.h
vendored
65
lib/include/llvm_libc_wrappers/string.h
vendored
@ -13,9 +13,6 @@
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
// FIXME: The GNU headers provide C++ standard compliant headers when in C++
|
||||
// mode and the LLVM libc does not. We cannot enable memchr, strchr, strchrnul,
|
||||
// strpbrk, strrchr, strstr, or strcasestr until this is addressed.
|
||||
#include_next <string.h>
|
||||
|
||||
#if __has_include(<llvm-libc-decls/string.h>)
|
||||
@ -26,8 +23,70 @@
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
// The GNU headers provide C++ standard compliant headers when in C++ mode and
|
||||
// the LLVM libc does not. We need to manually provide the definitions using the
|
||||
// same prototypes.
|
||||
#if defined(__cplusplus) && defined(__GLIBC__) && \
|
||||
defined(__CORRECT_ISO_CPP_STRING_H_PROTO)
|
||||
|
||||
#ifndef __LIBC_ATTRS
|
||||
#define __LIBC_ATTRS
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
void *memccpy(void *__restrict, const void *__restrict, int,
|
||||
size_t) __LIBC_ATTRS;
|
||||
int memcmp(const void *, const void *, size_t) __LIBC_ATTRS;
|
||||
void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
|
||||
void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS;
|
||||
void *memmove(void *, const void *, size_t) __LIBC_ATTRS;
|
||||
void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
|
||||
void *memset(void *, int, size_t) __LIBC_ATTRS;
|
||||
char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
int strcmp(const char *, const char *) __LIBC_ATTRS;
|
||||
int strcoll(const char *, const char *) __LIBC_ATTRS;
|
||||
char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
size_t strcspn(const char *, const char *) __LIBC_ATTRS;
|
||||
char *strdup(const char *) __LIBC_ATTRS;
|
||||
size_t strlen(const char *) __LIBC_ATTRS;
|
||||
char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
int strncmp(const char *, const char *, size_t) __LIBC_ATTRS;
|
||||
char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
char *strndup(const char *, size_t) __LIBC_ATTRS;
|
||||
size_t strnlen(const char *, size_t) __LIBC_ATTRS;
|
||||
size_t strspn(const char *, const char *) __LIBC_ATTRS;
|
||||
char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS;
|
||||
char *strtok_r(char *__restrict, const char *__restrict,
|
||||
char **__restrict) __LIBC_ATTRS;
|
||||
size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
|
||||
}
|
||||
|
||||
extern "C++" {
|
||||
char *strstr(char *, const char *) noexcept __LIBC_ATTRS;
|
||||
const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS;
|
||||
char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS;
|
||||
const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS;
|
||||
char *strrchr(char *, int) noexcept __LIBC_ATTRS;
|
||||
const char *strrchr(const char *, int) noexcept __LIBC_ATTRS;
|
||||
char *strchr(char *, int) noexcept __LIBC_ATTRS;
|
||||
const char *strchr(const char *, int) noexcept __LIBC_ATTRS;
|
||||
char *strchrnul(char *, int) noexcept __LIBC_ATTRS;
|
||||
const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS;
|
||||
char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS;
|
||||
const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS;
|
||||
void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
|
||||
}
|
||||
|
||||
#else
|
||||
#include <llvm-libc-decls/string.h>
|
||||
|
||||
#endif
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
#undef __LIBC_ATTRS
|
||||
|
||||
34
lib/include/llvm_libc_wrappers/time.h
vendored
Normal file
34
lib/include/llvm_libc_wrappers/time.h
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
//===-- Wrapper for C standard time.h declarations on the GPU -------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
|
||||
#define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
|
||||
|
||||
#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
|
||||
#error "This file is for GPU offloading compilation only"
|
||||
#endif
|
||||
|
||||
#include_next <time.h>
|
||||
|
||||
#if __has_include(<llvm-libc-decls/time.h>)
|
||||
|
||||
#if defined(__HIP__) || defined(__CUDA__)
|
||||
#define __LIBC_ATTRS __attribute__((device))
|
||||
#endif
|
||||
|
||||
#pragma omp begin declare target
|
||||
|
||||
_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!");
|
||||
|
||||
#include <llvm-libc-decls/time.h>
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
|
||||
3750
lib/include/lsxintrin.h
vendored
Normal file
3750
lib/include/lsxintrin.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
12
lib/include/mmintrin.h
vendored
12
lib/include/mmintrin.h
vendored
@ -22,7 +22,9 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
|
||||
typedef char __v8qi __attribute__((__vector_size__(8)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
|
||||
__min_vector_width__(64)))
|
||||
|
||||
/// Clears the MMX state by setting the state of the x87 stack registers
|
||||
/// to empty.
|
||||
@ -31,10 +33,10 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> EMMS </c> instruction.
|
||||
///
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
|
||||
_mm_empty(void)
|
||||
{
|
||||
__builtin_ia32_emms();
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__,
|
||||
__target__("mmx,no-evex512")))
|
||||
_mm_empty(void) {
|
||||
__builtin_ia32_emms();
|
||||
}
|
||||
|
||||
/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
|
||||
|
||||
157
lib/include/module.modulemap
vendored
157
lib/include/module.modulemap
vendored
@ -153,10 +153,163 @@ module _Builtin_intrinsics [system] [extern_c] {
|
||||
}
|
||||
}
|
||||
|
||||
module _Builtin_stddef_max_align_t [system] [extern_c] {
|
||||
header "__stddef_max_align_t.h"
|
||||
// Start -fbuiltin-headers-in-system-modules affected modules
|
||||
|
||||
// The following modules all ignore their headers when
|
||||
// -fbuiltin-headers-in-system-modules is passed, and many of
|
||||
// those headers join system modules when present.
|
||||
|
||||
// e.g. if -fbuiltin-headers-in-system-modules is passed, then
|
||||
// float.h will not be in the _Builtin_float module (that module
|
||||
// will be empty). If there is a system module that declares
|
||||
// `header "float.h"`, then the builtin float.h will join
|
||||
// that module. The system float.h (if present) will be treated
|
||||
// as a textual header in the sytem module.
|
||||
module _Builtin_float [system] {
|
||||
header "float.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_inttypes [system] {
|
||||
header "inttypes.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_iso646 [system] {
|
||||
header "iso646.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_limits [system] {
|
||||
header "limits.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdalign [system] {
|
||||
header "stdalign.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdarg [system] {
|
||||
textual header "stdarg.h"
|
||||
|
||||
explicit module __gnuc_va_list {
|
||||
header "__stdarg___gnuc_va_list.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module __va_copy {
|
||||
header "__stdarg___va_copy.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module va_arg {
|
||||
header "__stdarg_va_arg.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module va_copy {
|
||||
header "__stdarg_va_copy.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module va_list {
|
||||
header "__stdarg_va_list.h"
|
||||
export *
|
||||
}
|
||||
}
|
||||
|
||||
module _Builtin_stdatomic [system] {
|
||||
header "stdatomic.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdbool [system] {
|
||||
header "stdbool.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stddef [system] {
|
||||
textual header "stddef.h"
|
||||
|
||||
// __stddef_max_align_t.h is always in this module, even if
|
||||
// -fbuiltin-headers-in-system-modules is passed.
|
||||
explicit module max_align_t {
|
||||
header "__stddef_max_align_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module null {
|
||||
header "__stddef_null.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module nullptr_t {
|
||||
header "__stddef_nullptr_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module offsetof {
|
||||
header "__stddef_offsetof.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module ptrdiff_t {
|
||||
header "__stddef_ptrdiff_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module rsize_t {
|
||||
header "__stddef_rsize_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module size_t {
|
||||
header "__stddef_size_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module unreachable {
|
||||
header "__stddef_unreachable.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module wchar_t {
|
||||
header "__stddef_wchar_t.h"
|
||||
export *
|
||||
}
|
||||
}
|
||||
|
||||
// wint_t is provided by <wchar.h> and not <stddef.h>. It's here
|
||||
// for compatibility, but must be explicitly requested. Therefore
|
||||
// __stddef_wint_t.h is not part of _Builtin_stddef. It is always in
|
||||
// this module even if -fbuiltin-headers-in-system-modules is passed.
|
||||
module _Builtin_stddef_wint_t [system] {
|
||||
header "__stddef_wint_t.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdint [system] {
|
||||
header "stdint.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_stdnoreturn [system] {
|
||||
header "stdnoreturn.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_tgmath [system] {
|
||||
header "tgmath.h"
|
||||
export *
|
||||
}
|
||||
|
||||
module _Builtin_unwind [system] {
|
||||
header "unwind.h"
|
||||
export *
|
||||
}
|
||||
// End -fbuiltin-headers-in-system-modules affected modules
|
||||
|
||||
module opencl_c {
|
||||
requires opencl
|
||||
header "opencl-c.h"
|
||||
|
||||
5
lib/include/opencl-c-base.h
vendored
5
lib/include/opencl-c-base.h
vendored
@ -45,6 +45,7 @@
|
||||
#define __opencl_c_ext_fp32_local_atomic_add 1
|
||||
#define __opencl_c_ext_fp32_global_atomic_min_max 1
|
||||
#define __opencl_c_ext_fp32_local_atomic_min_max 1
|
||||
#define __opencl_c_ext_image_raw10_raw12 1
|
||||
|
||||
#endif // defined(__SPIR__) || defined(__SPIRV__)
|
||||
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
@ -477,6 +478,10 @@ typedef enum memory_order
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_3_0
|
||||
#define CLK_UNORM_INT_101010_2 0x10E0
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0
|
||||
#ifdef __opencl_c_ext_image_raw10_raw12
|
||||
#define CLK_UNSIGNED_INT_RAW10_EXT 0x10E3
|
||||
#define CLK_UNSIGNED_INT_RAW12_EXT 0x10E4
|
||||
#endif // __opencl_c_ext_image_raw10_raw12
|
||||
|
||||
// Channel order, numbering must be aligned with cl_channel_order in cl.h
|
||||
//
|
||||
|
||||
2
lib/include/openmp_wrappers/cmath
vendored
2
lib/include/openmp_wrappers/cmath
vendored
@ -1,4 +1,4 @@
|
||||
/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -===
|
||||
/*===-- __clang_openmp_device_functions.h - OpenMP math declares -*- c++ -*-===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
5
lib/include/pmmintrin.h
vendored
5
lib/include/pmmintrin.h
vendored
@ -17,8 +17,9 @@
|
||||
#include <emmintrin.h>
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, \
|
||||
__target__("sse3,no-evex512"), __min_vector_width__(128)))
|
||||
|
||||
/// Loads data from an unaligned memory location to elements in a 128-bit
|
||||
/// vector.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user