Merge branch 'llvm6'

Zig now depends on LLVM 6.0.0.

The latest commit that depends on LLVM 5.0.1 is
2e010c60ae006944ae20ab8b3445598471c9f1e8.
This commit is contained in:
Andrew Kelley 2018-03-08 10:59:54 -05:00
commit 3200ebc2ea
923 changed files with 48773 additions and 15179 deletions

View File

@ -64,97 +64,118 @@ else()
include_directories(${LLVM_INCLUDE_DIRS})
include_directories(${CLANG_INCLUDE_DIRS})
set(EMBEDDED_LLD_LIB_SOURCES
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Driver/DarwinLdDriver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Config/Version.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/LayoutPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ObjCPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/TLVPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/GOTPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ShimPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/WriterMachO.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/StubsPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/FileArchive.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/TargetOptionsCommandFlags.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/File.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Error.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/SymbolTable.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Reader.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Reproduce.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Writer.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/LinkingContext.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Resolver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/Args.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/ErrorHandler.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/Memory.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/Reproduce.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/Strings.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/TargetOptionsCommandFlags.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/Threads.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/Common/Version.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/DefinedAtom.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Error.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/File.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/LinkingContext.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Reader.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Resolver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/SymbolTable.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Core/Writer.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/Driver/DarwinLdDriver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/FileArchive.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/GOTPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/LayoutPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ObjCPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/ShimPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/StubsPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/TLVPass.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/WriterMachO.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp"
)
set(EMBEDDED_LLD_ELF_SOURCES
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/ScriptLexer.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/AArch64ErrataFix.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AArch64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AMDGPU.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/ARM.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AVR.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/SPARCV9.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/Mips.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AArch64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/X86_64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/MipsArchTree.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/SPARCV9.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/X86.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/GdbIndex.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/X86_64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Driver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Relocations.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Error.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/LTO.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Strings.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/ScriptParser.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/MarkLive.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/SyntheticSections.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/SymbolTable.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/LinkerScript.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/DriverUtils.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/EhFrame.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Target.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Filesystem.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/OutputSections.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Symbols.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/GdbIndex.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/ICF.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/InputFiles.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Thunks.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/DriverUtils.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Writer.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/InputSection.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/LTO.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/LinkerScript.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/MapFile.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/MarkLive.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/OutputSections.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Relocations.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/ScriptLexer.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/ScriptParser.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Strings.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/SymbolTable.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Symbols.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/SyntheticSections.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Target.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Thunks.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Writer.cpp"
)
set(EMBEDDED_LLD_COFF_SOURCES
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Chunks.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/DLL.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Driver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Chunks.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/PDB.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Error.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/LTO.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Strings.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/MarkLive.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/SymbolTable.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Symbols.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/DriverUtils.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/ICF.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/InputFiles.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/DriverUtils.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Writer.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/LTO.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/MapFile.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/MarkLive.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/MinGW.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/PDB.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Strings.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/SymbolTable.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Symbols.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/COFF/Writer.cpp"
)
set(EMBEDDED_LLD_MINGW_SOURCES
"${CMAKE_SOURCE_DIR}/deps/lld/MinGW/Driver.cpp"
)
set(EMBEDDED_LLD_WASM_SOURCES
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/Driver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/InputFiles.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/InputSegment.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/OutputSections.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/Symbols.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/SymbolTable.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/Writer.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/wasm/WriterUtils.cpp"
)
add_library(embedded_lld_lib ${EMBEDDED_LLD_LIB_SOURCES})
add_library(embedded_lld_elf ${EMBEDDED_LLD_ELF_SOURCES})
add_library(embedded_lld_coff ${EMBEDDED_LLD_COFF_SOURCES})
add_library(embedded_lld_mingw ${EMBEDDED_LLD_MINGW_SOURCES})
add_library(embedded_lld_wasm ${EMBEDDED_LLD_WASM_SOURCES})
if(MSVC)
set(ZIG_LLD_COMPILE_FLAGS "-std=c++11 -D_CRT_SECURE_NO_WARNINGS /w")
else()
@ -172,6 +193,14 @@ else()
COMPILE_FLAGS ${ZIG_LLD_COMPILE_FLAGS}
LINK_FLAGS " "
)
set_target_properties(embedded_lld_mingw PROPERTIES
COMPILE_FLAGS ${ZIG_LLD_COMPILE_FLAGS}
LINK_FLAGS " "
)
set_target_properties(embedded_lld_wasm PROPERTIES
COMPILE_FLAGS ${ZIG_LLD_COMPILE_FLAGS}
LINK_FLAGS " "
)
target_include_directories(embedded_lld_lib PRIVATE
"${CMAKE_SOURCE_DIR}/deps/lld/include"
"${CMAKE_SOURCE_DIR}/deps/lld-prebuilt"
@ -188,13 +217,27 @@ else()
"${CMAKE_SOURCE_DIR}/deps/lld-prebuilt/COFF"
"${CMAKE_SOURCE_DIR}/deps/lld-prebuilt"
)
target_include_directories(embedded_lld_mingw PRIVATE
"${CMAKE_SOURCE_DIR}/deps/lld/MinGW"
"${CMAKE_SOURCE_DIR}/deps/lld/include"
"${CMAKE_SOURCE_DIR}/deps/lld-prebuilt/MinGW"
"${CMAKE_SOURCE_DIR}/deps/lld-prebuilt"
)
target_include_directories(embedded_lld_wasm PRIVATE
"${CMAKE_SOURCE_DIR}/deps/lld/wasm"
"${CMAKE_SOURCE_DIR}/deps/lld/include"
"${CMAKE_SOURCE_DIR}/deps/lld-prebuilt/wasm"
"${CMAKE_SOURCE_DIR}/deps/lld-prebuilt"
)
set(LLD_INCLUDE_DIRS "")
set(LLD_LIBRARIES
embedded_lld_elf
embedded_lld_coff
embedded_lld_mingw
embedded_lld_wasm
embedded_lld_lib
)
install(TARGETS embedded_lld_elf embedded_lld_coff embedded_lld_lib DESTINATION "${ZIG_CPP_LIB_DIR}")
install(TARGETS embedded_lld_elf embedded_lld_coff embedded_lld_mingw embedded_lld_wasm embedded_lld_lib DESTINATION "${ZIG_CPP_LIB_DIR}")
endif()
# No patches have been applied to SoftFloat-3d
@ -498,10 +541,12 @@ set(ZIG_C_HEADER_FILES
"adxintrin.h"
"altivec.h"
"ammintrin.h"
"arm64intr.h"
"arm_acle.h"
"arm_neon.h"
"armintr.h"
"avx2intrin.h"
"avx512bitalgintrin.h"
"avx512bwintrin.h"
"avx512cdintrin.h"
"avx512dqintrin.h"
@ -510,17 +555,25 @@ set(ZIG_C_HEADER_FILES
"avx512ifmaintrin.h"
"avx512ifmavlintrin.h"
"avx512pfintrin.h"
"avx512vbmi2intrin.h"
"avx512vbmiintrin.h"
"avx512vbmivlintrin.h"
"avx512vlbitalgintrin.h"
"avx512vlbwintrin.h"
"avx512vlcdintrin.h"
"avx512vldqintrin.h"
"avx512vlintrin.h"
"avx512vlvbmi2intrin.h"
"avx512vlvnniintrin.h"
"avx512vnniintrin.h"
"avx512vpopcntdqintrin.h"
"avx512vpopcntdqvlintrin.h"
"avxintrin.h"
"bmi2intrin.h"
"bmiintrin.h"
"cetintrin.h"
"clflushoptintrin.h"
"clwbintrin.h"
"clzerointrin.h"
"cpuid.h"
"cuda_wrappers/algorithm"
@ -532,6 +585,7 @@ set(ZIG_C_HEADER_FILES
"fma4intrin.h"
"fmaintrin.h"
"fxsrintrin.h"
"gfniintrin.h"
"htmintrin.h"
"htmxlintrin.h"
"ia32intrin.h"
@ -571,8 +625,10 @@ set(ZIG_C_HEADER_FILES
"tmmintrin.h"
"unwind.h"
"vadefs.h"
"vaesintrin.h"
"varargs.h"
"vecintrin.h"
"vpclmulqdqintrin.h"
"wmmintrin.h"
"x86intrin.h"
"xmmintrin.h"

View File

@ -124,14 +124,14 @@ libc. Create demo games using Zig.
##### POSIX
* cmake >= 2.8.5
* g++ >= 5.0.0 or clang >= 3.6.0
* LLVM, Clang, LLD development libraries == 5.0.1, compiled with the same gcc or clang version above
* gcc >= 5.0.0 or clang >= 3.6.0
* LLVM, Clang, LLD development libraries == 6.x, compiled with the same gcc or clang version above
##### Windows
* cmake >= 2.8.5
* Microsoft Visual Studio 2015
* LLVM, Clang, LLD development libraries == 5.0.1, compiled with the same MSVC version above
* LLVM, Clang, LLD development libraries == 6.x, compiled with the same MSVC version above
#### Instructions
@ -155,11 +155,11 @@ make install
`ZIG_LIBC_LIB_DIR` and `ZIG_LIBC_STATIC_LIB_DIR` are unused.
```
brew install cmake llvm@5
brew outdated llvm@5 || brew upgrade llvm@5
brew install cmake llvm@6
brew outdated llvm@6 || brew upgrade llvm@6
mkdir build
cd build
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@5/ -DCMAKE_INSTALL_PREFIX=$(pwd)
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@6/ -DCMAKE_INSTALL_PREFIX=$(pwd)
make install
./zig build --build-file ../build.zig test
```

View File

@ -131,15 +131,6 @@ __DEVICE__ float ldexp(float __arg, int __exp) {
__DEVICE__ float log(float __x) { return ::logf(__x); }
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
__DEVICE__ float nexttoward(float __from, double __to) {
return __builtin_nexttowardf(__from, __to);
}
__DEVICE__ double nexttoward(double __from, double __to) {
return __builtin_nexttoward(__from, __to);
}
__DEVICE__ float nexttowardf(float __from, double __to) {
return __builtin_nexttowardf(__from, __to);
}
__DEVICE__ float pow(float __base, float __exp) {
return ::powf(__base, __exp);
}
@ -157,6 +148,10 @@ __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
// Notably missing above is nexttoward. We omit it because
// libdevice doesn't provide an implementation, and we don't want to be in the
// business of implementing tricky libm functions in this header.
// Now we've defined everything we promised we'd define in
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
// fix up our math functions.
@ -295,13 +290,6 @@ ldexp(__T __x, int __exp) {
return std::ldexp((double)__x, __exp);
}
template <typename __T>
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
double>::type
nexttoward(__T __from, double __to) {
return std::nexttoward((double)__from, __to);
}
template <typename __T1, typename __T2>
__DEVICE__ typename __clang_cuda_enable_if<
std::numeric_limits<__T1>::is_specialized &&
@ -388,7 +376,6 @@ using ::lrint;
using ::lround;
using ::nearbyint;
using ::nextafter;
using ::nexttoward;
using ::pow;
using ::remainder;
using ::remquo;
@ -456,8 +443,6 @@ using ::lroundf;
using ::modff;
using ::nearbyintf;
using ::nextafterf;
using ::nexttowardf;
using ::nexttowardf;
using ::powf;
using ::remainderf;
using ::remquof;

View File

@ -34,23 +34,24 @@
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
#pragma push_macro("__MAKE_SHUFFLES")
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask) \
inline __device__ int __FnName(int __val, int __offset, \
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \
__Type) \
inline __device__ int __FnName(int __val, __Type __offset, \
int __width = warpSize) { \
return __IntIntrinsic(__val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ float __FnName(float __val, int __offset, \
inline __device__ float __FnName(float __val, __Type __offset, \
int __width = warpSize) { \
return __FloatIntrinsic(__val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ unsigned int __FnName(unsigned int __val, int __offset, \
inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned int>( \
::__FnName(static_cast<int>(__val), __offset, __width)); \
} \
inline __device__ long long __FnName(long long __val, int __offset, \
inline __device__ long long __FnName(long long __val, __Type __offset, \
int __width = warpSize) { \
struct __Bits { \
int __a, __b; \
@ -65,12 +66,29 @@
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
return __ret; \
} \
inline __device__ long __FnName(long __val, __Type __offset, \
int __width = warpSize) { \
_Static_assert(sizeof(long) == sizeof(long long) || \
sizeof(long) == sizeof(int)); \
if (sizeof(long) == sizeof(long long)) { \
return static_cast<long>( \
::__FnName(static_cast<long long>(__val), __offset, __width)); \
} else if (sizeof(long) == sizeof(int)) { \
return static_cast<long>( \
::__FnName(static_cast<int>(__val), __offset, __width)); \
} \
} \
inline __device__ unsigned long __FnName( \
unsigned long __val, __Type __offset, int __width = warpSize) { \
return static_cast<unsigned long>( \
::__FnName(static_cast<long>(__val), __offset, __width)); \
} \
inline __device__ unsigned long long __FnName( \
unsigned long long __val, int __offset, int __width = warpSize) { \
unsigned long long __val, __Type __offset, int __width = warpSize) { \
return static_cast<unsigned long long>(::__FnName( \
static_cast<unsigned long long>(__val), __offset, __width)); \
} \
inline __device__ double __FnName(double __val, int __offset, \
inline __device__ double __FnName(double __val, __Type __offset, \
int __width = warpSize) { \
long long __tmp; \
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
@ -81,17 +99,166 @@
return __ret; \
}
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f);
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
// maxLane.
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0);
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f);
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,
unsigned int);
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,
unsigned int);
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
int);
#pragma pop_macro("__MAKE_SHUFFLES")
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
#if CUDA_VERSION >= 9000
#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
// __shfl_sync_* variants available in CUDA-9
#pragma push_macro("__MAKE_SYNC_SHUFFLES")
#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \
__Mask, __Type) \
inline __device__ int __FnName(unsigned int __mask, int __val, \
__Type __offset, int __width = warpSize) { \
return __IntIntrinsic(__mask, __val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ float __FnName(unsigned int __mask, float __val, \
__Type __offset, int __width = warpSize) { \
return __FloatIntrinsic(__mask, __val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ unsigned int __FnName(unsigned int __mask, \
unsigned int __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned int>( \
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
} \
inline __device__ long long __FnName(unsigned int __mask, long long __val, \
__Type __offset, \
int __width = warpSize) { \
struct __Bits { \
int __a, __b; \
}; \
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
__Bits __tmp; \
memcpy(&__val, &__tmp, sizeof(__val)); \
__tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \
__tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \
long long __ret; \
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
return __ret; \
} \
inline __device__ unsigned long long __FnName( \
unsigned int __mask, unsigned long long __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned long long>(::__FnName( \
__mask, static_cast<unsigned long long>(__val), __offset, __width)); \
} \
inline __device__ long __FnName(unsigned int __mask, long __val, \
__Type __offset, int __width = warpSize) { \
_Static_assert(sizeof(long) == sizeof(long long) || \
sizeof(long) == sizeof(int)); \
if (sizeof(long) == sizeof(long long)) { \
return static_cast<long>(::__FnName( \
__mask, static_cast<long long>(__val), __offset, __width)); \
} else if (sizeof(long) == sizeof(int)) { \
return static_cast<long>( \
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
} \
} \
inline __device__ unsigned long __FnName( \
unsigned int __mask, unsigned long __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned long>( \
::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \
} \
inline __device__ double __FnName(unsigned int __mask, double __val, \
__Type __offset, int __width = warpSize) { \
long long __tmp; \
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
memcpy(&__tmp, &__val, sizeof(__val)); \
__tmp = ::__FnName(__mask, __tmp, __offset, __width); \
double __ret; \
memcpy(&__ret, &__tmp, sizeof(__ret)); \
return __ret; \
}
__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,
__nvvm_shfl_sync_idx_f32, 0x1f, int);
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
// maxLane.
__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,
__nvvm_shfl_sync_up_f32, 0, unsigned int);
__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
__nvvm_shfl_sync_down_f32, 0x1f, unsigned int);
__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,
__nvvm_shfl_sync_bfly_f32, 0x1f, int);
#pragma pop_macro("__MAKE_SYNC_SHUFFLES")
inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
return __nvvm_bar_warp_sync(mask);
}
inline __device__ void __barrier_sync(unsigned int id) {
__nvvm_barrier_sync(id);
}
inline __device__ void __barrier_sync_count(unsigned int id,
unsigned int count) {
__nvvm_barrier_sync_cnt(id, count);
}
inline __device__ int __all_sync(unsigned int mask, int pred) {
return __nvvm_vote_all_sync(mask, pred);
}
inline __device__ int __any_sync(unsigned int mask, int pred) {
return __nvvm_vote_any_sync(mask, pred);
}
inline __device__ int __uni_sync(unsigned int mask, int pred) {
return __nvvm_vote_uni_sync(mask, pred);
}
inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
return __nvvm_vote_ballot_sync(mask, pred);
}
inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
return __nvvm_fns(mask, base, offset);
}
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
// Define __match* builtins CUDA-9 headers expect to see.
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
inline __device__ unsigned int __match32_any_sync(unsigned int mask,
unsigned int value) {
return __nvvm_match_any_sync_i32(mask, value);
}
inline __device__ unsigned long long
__match64_any_sync(unsigned int mask, unsigned long long value) {
return __nvvm_match_any_sync_i64(mask, value);
}
inline __device__ unsigned int
__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {
return __nvvm_match_all_sync_i32p(mask, value, pred);
}
inline __device__ unsigned long long
__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {
return __nvvm_match_all_sync_i64p(mask, value, pred);
}
#include "crt/sm_70_rt.hpp"
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
#endif // __CUDA_VERSION >= 9000
// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
// Prevent the vanilla sm_32 intrinsics header from being included.

View File

@ -149,9 +149,6 @@ __DEVICE__ double nearbyint(double);
__DEVICE__ float nearbyint(float);
__DEVICE__ double nextafter(double, double);
__DEVICE__ float nextafter(float, float);
__DEVICE__ double nexttoward(double, double);
__DEVICE__ float nexttoward(float, double);
__DEVICE__ float nexttowardf(float, double);
__DEVICE__ double pow(double, double);
__DEVICE__ double pow(double, int);
__DEVICE__ float pow(float, float);
@ -185,6 +182,10 @@ __DEVICE__ float tgamma(float);
__DEVICE__ double trunc(double);
__DEVICE__ float trunc(float);
// Notably missing above is nexttoward, which we don't define on
// the device side because libdevice doesn't give us an implementation, and we
// don't want to be in the business of writing one ourselves.
// We need to define these overloads in exactly the namespace our standard
// library uses (including the right inline namespace), otherwise they won't be
// picked up by other functions in the standard library (e.g. functions in
@ -255,7 +256,6 @@ using ::nan;
using ::nanf;
using ::nearbyint;
using ::nextafter;
using ::nexttoward;
using ::pow;
using ::remainder;
using ::remquo;

View File

@ -62,7 +62,7 @@
#include "cuda.h"
#if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION"
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9000
#error "Unsupported CUDA version!"
#endif
@ -86,7 +86,11 @@
#define __COMMON_FUNCTIONS_H__
#undef __CUDACC__
#if CUDA_VERSION < 9000
#define __CUDABE__
#else
#define __CUDA_LIBDEVICE__
#endif
// Disables definitions of device-side runtime support stubs in
// cuda_device_runtime_api.h
#include "driver_types.h"
@ -94,6 +98,7 @@
#include "host_defines.h"
#undef __CUDABE__
#undef __CUDA_LIBDEVICE__
#define __CUDACC__
#include "cuda_runtime.h"
@ -105,7 +110,9 @@
#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
#if CUDA_VERSION < 9000
#include "crt/device_runtime.h"
#endif
#include "crt/host_runtime.h"
// device_runtime.h defines __cxa_* macros that will conflict with
// cxxabi.h.
@ -166,7 +173,18 @@ inline __host__ double __signbitd(double x) {
// __device__.
#pragma push_macro("__forceinline__")
#define __forceinline__ __device__ __inline__ __attribute__((always_inline))
#pragma push_macro("__float2half_rn")
#if CUDA_VERSION >= 9000
// CUDA-9 has conflicting prototypes for __float2half_rn(float f) in
// cuda_fp16.h[pp] and device_functions.hpp. We need to get the one in
// device_functions.hpp out of the way.
#define __float2half_rn __float2half_rn_disabled
#endif
#include "device_functions.hpp"
#pragma pop_macro("__float2half_rn")
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
@ -247,7 +265,23 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
#pragma push_macro("__GNUC__")
#undef __GNUC__
#define signbit __ignored_cuda_signbit
// CUDA-9 omits device-side definitions of some math functions if it sees
// include guard from math.h wrapper from libstdc++. We have to undo the header
// guard temporarily to get the definitions we need.
#pragma push_macro("_GLIBCXX_MATH_H")
#pragma push_macro("_LIBCPP_VERSION")
#if CUDA_VERSION >= 9000
#undef _GLIBCXX_MATH_H
// We also need to undo another guard that checks for libc++ 3.8+
#ifdef _LIBCPP_VERSION
#define _LIBCPP_VERSION 3700
#endif
#endif
#include "math_functions.hpp"
#pragma pop_macro("_GLIBCXX_MATH_H")
#pragma pop_macro("_LIBCPP_VERSION")
#pragma pop_macro("__GNUC__")
#pragma pop_macro("signbit")

49
c_headers/arm64intr.h Normal file
View File

@ -0,0 +1,49 @@
/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
/* Only include this if we're compiling for the windows platform. */
#ifndef _MSC_VER
#include_next <arm64intr.h>
#else
#ifndef __ARM64INTR_H
#define __ARM64INTR_H
typedef enum
{
_ARM64_BARRIER_SY = 0xF,
_ARM64_BARRIER_ST = 0xE,
_ARM64_BARRIER_LD = 0xD,
_ARM64_BARRIER_ISH = 0xB,
_ARM64_BARRIER_ISHST = 0xA,
_ARM64_BARRIER_ISHLD = 0x9,
_ARM64_BARRIER_NSH = 0x7,
_ARM64_BARRIER_NSHST = 0x6,
_ARM64_BARRIER_NSHLD = 0x5,
_ARM64_BARRIER_OSH = 0x3,
_ARM64_BARRIER_OSHST = 0x2,
_ARM64_BARRIER_OSHLD = 0x1
} _ARM64INTR_BARRIER_TYPE;
#endif /* __ARM64INTR_H */
#endif /* _MSC_VER */

File diff suppressed because it is too large Load Diff

View File

@ -145,13 +145,21 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_avg_epu8(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
return (__m256i)__builtin_convertvector(
((__builtin_convertvector((__v32qu)__a, __v32hu) +
__builtin_convertvector((__v32qu)__b, __v32hu)) + 1)
>> 1, __v32qu);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_avg_epu16(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
typedef unsigned int __v16su __attribute__((__vector_size__(64)));
return (__m256i)__builtin_convertvector(
((__builtin_convertvector((__v16hu)__a, __v16su) +
__builtin_convertvector((__v16hu)__b, __v16su)) + 1)
>> 1, __v16hu);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS

View File

@ -0,0 +1,97 @@
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512BITALGINTRIN_H
#define __AVX512BITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
(__v32hi) _mm512_popcnt_epi16(__B),
(__v32hi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
__U,
__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi8(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
(__v64qi) _mm512_popcnt_epi8(__B),
(__v64qi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
__U,
__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
(__v64qi) __B,
__U);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
{
return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
__A,
__B);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -56,293 +56,145 @@ _mm512_setzero_hi(void) {
/* Integer compare */
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
(__mmask64)-1);
}
#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)-1); })
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
__u);
}
#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)(m)); })
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
(__mmask64)-1);
}
#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)-1); })
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
__u);
}
#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)(m)); })
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
(__mmask32)-1);
}
#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)-1); })
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
__u);
}
#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)(m)); })
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
(__mmask32)-1);
}
#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)-1); })
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
__u);
}
#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)(m)); })
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
(__mmask64)-1);
}
#define _mm512_cmpeq_epi8_mask(A, B) \
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epi8_mask(A, B) \
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epi8_mask(A, B) \
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epi8_mask(A, B) \
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epi8_mask(k, A, B) \
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epi8_mask(A, B) \
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epi8_mask(A, B) \
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
__u);
}
#define _mm512_cmpeq_epu8_mask(A, B) \
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epu8_mask(A, B) \
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epu8_mask(A, B) \
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epu8_mask(A, B) \
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epu8_mask(k, A, B) \
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epu8_mask(A, B) \
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epu8_mask(A, B) \
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
(__mmask64)-1);
}
#define _mm512_cmpeq_epi16_mask(A, B) \
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epi16_mask(A, B) \
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epi16_mask(A, B) \
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epi16_mask(A, B) \
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epi16_mask(k, A, B) \
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epi16_mask(A, B) \
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epi16_mask(A, B) \
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
__u);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
(__mmask64)-1);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
__u);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
(__mmask32)-1);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
__u);
}
#define _mm512_cmpeq_epu16_mask(A, B) \
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epu16_mask(A, B) \
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epu16_mask(A, B) \
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epu16_mask(A, B) \
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epu16_mask(A, B) \
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epu16_mask(A, B) \
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_add_epi8 (__m512i __A, __m512i __B) {
@ -706,57 +558,55 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_avg_epu8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
(__v64qi) __B,
(__v64qi) _mm512_setzero_qi(),
(__mmask64) -1);
typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
return (__m512i)__builtin_convertvector(
((__builtin_convertvector((__v64qu) __A, __v64hu) +
__builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
>> 1, __v64qu);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
__m512i __B)
{
return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
(__v64qi) __B,
(__v64qi) __W,
(__mmask64) __U);
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_avg_epu8(__A, __B),
(__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
(__v64qi) __B,
(__v64qi) _mm512_setzero_qi(),
(__mmask64) __U);
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_avg_epu8(__A, __B),
(__v64qi)_mm512_setzero_qi());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_avg_epu16 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
(__v32hi) __B,
(__v32hi) _mm512_setzero_hi(),
(__mmask32) -1);
typedef unsigned int __v32su __attribute__((__vector_size__(128)));
return (__m512i)__builtin_convertvector(
((__builtin_convertvector((__v32hu) __A, __v32su) +
__builtin_convertvector((__v32hu) __B, __v32su)) + 1)
>> 1, __v32hu);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
__m512i __B)
{
return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
(__v32hi) __B,
(__v32hi) __W,
(__mmask32) __U);
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_avg_epu16(__A, __B),
(__v32hi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
(__v32hi) __B,
(__v32hi) _mm512_setzero_hi(),
(__mmask32) __U);
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_avg_epu16(__A, __B),
(__v32hi) _mm512_setzero_hi());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@ -1543,46 +1393,6 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
}
#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)-1); })
#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)(m)); })
#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)-1); })
#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
(__v64qi)(__m512i)(b), (int)(p), \
(__mmask64)(m)); })
#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)-1); })
#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)(m)); })
#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)-1); })
#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
(__v32hi)(__m512i)(b), (int)(p), \
(__mmask32)(m)); })
#define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \
(__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
(__v32hi)_mm512_undefined_epi32(), \
@ -2028,32 +1838,29 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
{
return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
(__v64qi) __O,
__M);
return (__m512i) __builtin_ia32_selectb_512(__M,
(__v64qi)_mm512_set1_epi8(__A),
(__v64qi) __O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
{
return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
(__v64qi)
_mm512_setzero_qi(),
__M);
return (__m512i) __builtin_ia32_selectb_512(__M,
(__v64qi) _mm512_set1_epi8(__A),
(__v64qi) _mm512_setzero_si512());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
return (__mmask64) (( __A & 0xFFFFFFFF) | ( __B << 32));
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
(__mmask32) __B);
return (__mmask32) (( __A & 0xFFFF) | ( __B << 16));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@ -2108,61 +1915,56 @@ _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_test_epi8_mask (__m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
(__v64qi) __B,
(__mmask64) -1);
return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
_mm512_setzero_qi());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
(__v64qi) __B, __U);
return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_qi());
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_test_epi16_mask (__m512i __A, __m512i __B)
{
return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
_mm512_setzero_qi());
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
(__v32hi) __B, __U);
return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_qi());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
(__v64qi) __B,
(__mmask64) -1);
return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_qi());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
(__v64qi) __B, __U);
return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_qi());
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
{
return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
_mm512_setzero_qi());
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
(__v32hi) __B, __U);
return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_qi());
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
@ -2219,17 +2021,17 @@ _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
{
return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
(__v32hi) __O,
__M);
return (__m512i) __builtin_ia32_selectw_512(__M,
(__v32hi) _mm512_set1_epi16(__A),
(__v32hi) __O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
{
return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
(__v32hi) _mm512_setzero_hi(),
__M);
return (__m512i) __builtin_ia32_selectw_512(__M,
(__v32hi) _mm512_set1_epi16(__A),
(__v32hi) _mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS

View File

@ -130,13 +130,14 @@ _mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastmb_epi64 (__mmask8 __A)
{
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
return (__m512i) _mm512_set1_epi64((long long) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastmw_epi32 (__mmask16 __A)
{
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
return (__m512i) _mm512_set1_epi32((int) __A);
}
#undef __DEFAULT_FN_ATTRS

View File

@ -973,25 +973,26 @@ _mm512_movepi64_mask (__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x2 (__m128 __A)
{
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
(__v16sf)_mm512_undefined_ps(),
(__mmask16) -1);
return (__m512)__builtin_shufflevector((__v4sf)__A,
(__v4sf)_mm_undefined_ps(),
0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
{
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
(__v16sf)
__O, __M);
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x2(__A),
(__v16sf)__O);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
{
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
(__v16sf)_mm512_setzero_ps (),
__M);
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x2(__A),
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
@ -1044,25 +1045,26 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x2 (__m128i __A)
{
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
(__v16si)_mm512_setzero_si512(),
(__mmask16) -1);
return (__m512i)__builtin_shufflevector((__v4si)__A,
(__v4si)_mm_undefined_si128(),
0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
{
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
(__v16si)
__O, __M);
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x2(__A),
(__v16si)__O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
{
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
(__v16si)_mm512_setzero_si512 (),
__M);
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x2(__A),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS

View File

@ -258,25 +258,6 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
(__v8di) _mm512_setzero_si512());
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
{
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
(__v16si)
_mm512_setzero_si512 (),
__M);
}
#ifdef __x86_64__
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
{
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
(__v8di)
_mm512_setzero_si512 (),
__M);
}
#endif
static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_setzero_ps(void)
@ -335,12 +316,30 @@ _mm512_set1_epi32(int __s)
__s, __s, __s, __s, __s, __s, __s, __s };
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
{
return (__m512i)__builtin_ia32_selectd_512(__M,
(__v16si)_mm512_set1_epi32(__A),
(__v16si)_mm512_setzero_si512());
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set1_epi64(long long __d)
{
return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
}
#ifdef __x86_64__
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
{
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di)_mm512_set1_epi64(__A),
(__v8di)_mm512_setzero_si512());
}
#endif
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcastss_ps(__m128 __A)
{
@ -4544,37 +4543,6 @@ _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
/* Bit Test */
static __inline __mmask16 __DEFAULT_FN_ATTRS
_mm512_test_epi32_mask(__m512i __A, __m512i __B)
{
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
{
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
(__v16si) __B, __U);
}
static __inline __mmask8 __DEFAULT_FN_ATTRS
_mm512_test_epi64_mask(__m512i __A, __m512i __B)
{
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
}
/* SIMD load ops */
@ -4845,293 +4813,105 @@ _mm512_knot(__mmask16 __M)
/* Integer compare */
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
(__mmask16)-1);
}
#define _mm512_cmpeq_epi32_mask(A, B) \
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epi32_mask(A, B) \
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epi32_mask(A, B) \
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epi32_mask(A, B) \
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epi32_mask(k, A, B) \
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epi32_mask(A, B) \
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epi32_mask(A, B) \
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
__u);
}
#define _mm512_cmpeq_epu32_mask(A, B) \
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epu32_mask(A, B) \
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epu32_mask(A, B) \
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epu32_mask(A, B) \
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epu32_mask(k, A, B) \
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epu32_mask(A, B) \
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epu32_mask(A, B) \
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
(__mmask16)-1);
}
#define _mm512_cmpeq_epi64_mask(A, B) \
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epi64_mask(A, B) \
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epi64_mask(A, B) \
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epi64_mask(A, B) \
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epi64_mask(k, A, B) \
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epi64_mask(A, B) \
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epi64_mask(A, B) \
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
__u);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
(__mmask16)-1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
__u);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
(__mmask8)-1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
__u);
}
#define _mm512_cmpeq_epu64_mask(A, B) \
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
#define _mm512_cmpge_epu64_mask(A, B) \
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
#define _mm512_cmpgt_epu64_mask(A, B) \
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
#define _mm512_cmple_epu64_mask(A, B) \
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
#define _mm512_mask_cmple_epu64_mask(k, A, B) \
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
#define _mm512_cmplt_epu64_mask(A, B) \
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
#define _mm512_cmpneq_epu64_mask(A, B) \
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepi8_epi32(__m128i __A)
@ -6798,35 +6578,6 @@ _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
(__mmask16) __U);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
{
return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
{
return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
(__v16si) __B, __U);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
{
return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
(__v8di) __B, __U);
}
#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
(__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
@ -7195,76 +6946,100 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
}
#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
(__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), (int)(imm), \
(__v16sf)_mm512_undefined_ps(), \
(__mmask16)-1); })
(__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), \
0 + ((((imm) >> 0) & 0x3) * 4), \
1 + ((((imm) >> 0) & 0x3) * 4), \
2 + ((((imm) >> 0) & 0x3) * 4), \
3 + ((((imm) >> 0) & 0x3) * 4), \
0 + ((((imm) >> 2) & 0x3) * 4), \
1 + ((((imm) >> 2) & 0x3) * 4), \
2 + ((((imm) >> 2) & 0x3) * 4), \
3 + ((((imm) >> 2) & 0x3) * 4), \
16 + ((((imm) >> 4) & 0x3) * 4), \
17 + ((((imm) >> 4) & 0x3) * 4), \
18 + ((((imm) >> 4) & 0x3) * 4), \
19 + ((((imm) >> 4) & 0x3) * 4), \
16 + ((((imm) >> 6) & 0x3) * 4), \
17 + ((((imm) >> 6) & 0x3) * 4), \
18 + ((((imm) >> 6) & 0x3) * 4), \
19 + ((((imm) >> 6) & 0x3) * 4)); })
#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
(__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), (int)(imm), \
(__v16sf)(__m512)(W), \
(__mmask16)(U)); })
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
(__v16sf)(__m512)(W)); })
#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
(__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(B), (int)(imm), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(U)); })
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
(__v16sf)_mm512_setzero_ps()); })
#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
(__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), (int)(imm), \
(__v8df)_mm512_undefined_pd(), \
(__mmask8)-1); })
(__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), \
0 + ((((imm) >> 0) & 0x3) * 2), \
1 + ((((imm) >> 0) & 0x3) * 2), \
0 + ((((imm) >> 2) & 0x3) * 2), \
1 + ((((imm) >> 2) & 0x3) * 2), \
8 + ((((imm) >> 4) & 0x3) * 2), \
9 + ((((imm) >> 4) & 0x3) * 2), \
8 + ((((imm) >> 6) & 0x3) * 2), \
9 + ((((imm) >> 6) & 0x3) * 2)); })
#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
(__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), (int)(imm), \
(__v8df)(__m512d)(W), \
(__mmask8)(U)); })
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
(__v8df)(__m512d)(W)); })
#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
(__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), (int)(imm), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(U)); })
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
(__v8df)_mm512_setzero_pd()); })
#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
(__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (int)(imm), \
(__v16si)_mm512_setzero_si512(), \
(__mmask16)-1); })
(__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), \
0 + ((((imm) >> 0) & 0x3) * 2), \
1 + ((((imm) >> 0) & 0x3) * 2), \
0 + ((((imm) >> 2) & 0x3) * 2), \
1 + ((((imm) >> 2) & 0x3) * 2), \
8 + ((((imm) >> 4) & 0x3) * 2), \
9 + ((((imm) >> 4) & 0x3) * 2), \
8 + ((((imm) >> 6) & 0x3) * 2), \
9 + ((((imm) >> 6) & 0x3) * 2)); })
#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
(__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (int)(imm), \
(__v16si)(__m512i)(W), \
(__mmask16)(U)); })
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
(__v16si)(__m512i)(W)); })
#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
(__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (int)(imm), \
(__v16si)_mm512_setzero_si512(), \
(__mmask16)(U)); })
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
(__v16si)_mm512_setzero_si512()); })
#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
(__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (int)(imm), \
(__v8di)_mm512_setzero_si512(), \
(__mmask8)-1); })
(__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), \
0 + ((((imm) >> 0) & 0x3) * 2), \
1 + ((((imm) >> 0) & 0x3) * 2), \
0 + ((((imm) >> 2) & 0x3) * 2), \
1 + ((((imm) >> 2) & 0x3) * 2), \
8 + ((((imm) >> 4) & 0x3) * 2), \
9 + ((((imm) >> 4) & 0x3) * 2), \
8 + ((((imm) >> 6) & 0x3) * 2), \
9 + ((((imm) >> 6) & 0x3) * 2)); })
#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
(__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (int)(imm), \
(__v8di)(__m512i)(W), \
(__mmask8)(U)); })
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
(__v8di)(__m512i)(W)); })
#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
(__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (int)(imm), \
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U)); })
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
(__v8di)_mm512_setzero_si512()); })
#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
(__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
@ -9012,7 +8787,7 @@ _mm512_kortestz (__mmask16 __A, __mmask16 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
return (__mmask16) (( __A & 0xFF) | ( __B << 8));
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
@ -9035,7 +8810,7 @@ _mm512_stream_si512 (__m512i * __P, __m512i __A)
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_stream_load_si512 (void *__P)
_mm512_stream_load_si512 (void const *__P)
{
typedef __v8di __v8di_aligned __attribute__((aligned(64)));
return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
@ -9167,6 +8942,64 @@ _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
(__mmask8)(M), \
_MM_FROUND_CUR_DIRECTION); })
/* Bit Test */
static __inline __mmask16 __DEFAULT_FN_ATTRS
_mm512_test_epi32_mask (__m512i __A, __m512i __B)
{
return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
_mm512_setzero_epi32());
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
{
return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_epi32());
}
static __inline __mmask8 __DEFAULT_FN_ATTRS
_mm512_test_epi64_mask (__m512i __A, __m512i __B)
{
return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
_mm512_setzero_epi32());
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
{
return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_epi32());
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
{
return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
_mm512_setzero_epi32());
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
{
return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_epi32());
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
{
return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
_mm512_setzero_epi32());
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
{
return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
_mm512_setzero_epi32());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_movehdup_ps (__m512 __A)
{
@ -9737,16 +9570,18 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
{
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
__M);
return (__m512i) __builtin_ia32_selectd_512(__M,
(__v16si) _mm512_set1_epi32(__A),
(__v16si) __O);
}
#ifdef __x86_64__
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
{
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
__M);
return (__m512i) __builtin_ia32_selectq_512(__M,
(__v8di) _mm512_set1_epi64(__A),
(__v8di) __O);
}
#endif

View File

@ -0,0 +1,391 @@
/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VBMI2INTRIN_H
#define __AVX512VBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
(__v32hi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
(__v32hi) _mm512_setzero_hi(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
(__v64qi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
(__v64qi) _mm512_setzero_qi(),
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
{
__builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
{
__builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
(__v32hi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
(__v32hi) _mm512_setzero_hi(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
(__v64qi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
(__v64qi) _mm512_setzero_qi(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
(__v32hi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
(__v32hi) _mm512_setzero_hi(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
(__v64qi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
(__v64qi) _mm512_setzero_qi(),
__U);
}
#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \
(__v8di)(B), \
(int)(I), \
(__v8di)(S), \
(__mmask8)(U)); })
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
_mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
#define _mm512_shldi_epi64(A, B, I) \
_mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \
(__v16si)(B), \
(int)(I), \
(__v16si)(S), \
(__mmask16)(U)); })
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
_mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
#define _mm512_shldi_epi32(A, B, I) \
_mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \
(__v32hi)(B), \
(int)(I), \
(__v32hi)(S), \
(__mmask32)(U)); })
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
_mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
#define _mm512_shldi_epi16(A, B, I) \
_mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \
(__v8di)(B), \
(int)(I), \
(__v8di)(S), \
(__mmask8)(U)); })
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
_mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
#define _mm512_shrdi_epi64(A, B, I) \
_mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \
(__v16si)(B), \
(int)(I), \
(__v16si)(S), \
(__mmask16)(U)); })
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
_mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
#define _mm512_shrdi_epi32(A, B, I) \
_mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \
(__v32hi)(B), \
(int)(I), \
(__v32hi)(S), \
(__mmask32)(U)); })
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
_mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
#define _mm512_shrdi_epi16(A, B, I) \
_mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -0,0 +1,157 @@
/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLBITALGINTRIN_H
#define __AVX512VLBITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_popcnt_epi16(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
(__v16hi) _mm256_popcnt_epi16(__B),
(__v16hi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
__U,
__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_popcnt_epi16(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
(__v8hi) _mm128_popcnt_epi16(__B),
(__v8hi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
{
return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
__U,
__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_popcnt_epi8(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
(__v32qi) _mm256_popcnt_epi8(__B),
(__v32qi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
__U,
__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_popcnt_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
(__v16qi) _mm128_popcnt_epi8(__B),
(__v16qi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
{
return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
__U,
__B);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
{
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
(__v32qi) __B,
__U);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
{
return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
__A,
__B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
{
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
(__v16qi) __B,
__U);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
{
return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
__A,
__B);
}
#undef __DEFAULT_FN_ATTRS
#endif

File diff suppressed because it is too large Load Diff

View File

@ -33,26 +33,26 @@
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastmb_epi64 (__mmask8 __A)
{
return (__m128i) __builtin_ia32_broadcastmb128 (__A);
{
return (__m128i) _mm_set1_epi64x((long long) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastmb_epi64 (__mmask8 __A)
{
return (__m256i) __builtin_ia32_broadcastmb256 (__A);
return (__m256i) _mm256_set1_epi64x((long long)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastmw_epi32 (__mmask16 __A)
{
return (__m128i) __builtin_ia32_broadcastmw128 (__A);
return (__m128i) _mm_set1_epi32((int)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastmw_epi32 (__mmask16 __A)
{
return (__m256i) __builtin_ia32_broadcastmw256 (__A);
return (__m256i) _mm256_set1_epi32((int)__A);
}

View File

@ -978,25 +978,25 @@ _mm256_movepi64_mask (__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_f32x2 (__m128 __A)
{
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
(__v8sf)_mm256_undefined_ps(),
(__mmask8) -1);
return (__m256)__builtin_shufflevector((__v4sf)__A,
(__v4sf)_mm_undefined_ps(),
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
{
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
(__v8sf) __O,
__M);
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
(__v8sf)_mm256_broadcast_f32x2(__A),
(__v8sf)__O);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
{
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
(__v8sf) _mm256_setzero_ps (),
__M);
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
(__v8sf)_mm256_broadcast_f32x2(__A),
(__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
@ -1025,49 +1025,49 @@ _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcast_i32x2 (__m128i __A)
{
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
(__v4si)_mm_undefined_si128(),
(__mmask8) -1);
return (__m128i)__builtin_shufflevector((__v4si)__A,
(__v4si)_mm_undefined_si128(),
0, 1, 0, 1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
{
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
(__v4si) __O,
__M);
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
(__v4si)_mm_broadcast_i32x2(__A),
(__v4si)__O);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
{
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
(__v4si) _mm_setzero_si128 (),
__M);
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
(__v4si)_mm_broadcast_i32x2(__A),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcast_i32x2 (__m128i __A)
{
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
(__v8si)_mm256_undefined_si256(),
(__mmask8) -1);
return (__m256i)__builtin_shufflevector((__v4si)__A,
(__v4si)_mm_undefined_si128(),
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
{
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
(__v8si) __O,
__M);
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_broadcast_i32x2(__A),
(__v8si)__O);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
{
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
(__v8si) _mm256_setzero_si256 (),
__M);
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_broadcast_i32x2(__A),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,748 @@
/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLVBMI2INTRIN_H
#define __AVX512VLVBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2")))
static __inline __m128i __DEFAULT_FN_ATTRS
_mm128_setzero_hi(void) {
return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
(__v8hi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
(__v8hi) _mm128_setzero_hi(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
(__v16qi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
(__v16qi) _mm128_setzero_hi(),
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
{
__builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
{
__builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
(__v8hi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
(__v8hi) _mm128_setzero_hi(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
(__v16qi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
(__v16qi) _mm128_setzero_hi(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
(__v8hi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
(__v8hi) _mm128_setzero_hi(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
(__v16qi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
(__v16qi) _mm128_setzero_hi(),
__U);
}
static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setzero_hi(void) {
return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
(__v16hi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
(__v16hi) _mm256_setzero_hi(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
(__v32qi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
(__v32qi) _mm256_setzero_hi(),
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
{
__builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
{
__builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
(__v16hi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
(__v16hi) _mm256_setzero_hi(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
(__v32qi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
(__v32qi) _mm256_setzero_hi(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
(__v16hi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
(__v16hi) _mm256_setzero_hi(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
(__v32qi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
(__v32qi) _mm256_setzero_hi(),
__U);
}
#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \
(__v4di)(B), \
(int)(I), \
(__v4di)(S), \
(__mmask8)(U)); })
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
_mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
#define _mm256_shldi_epi64(A, B, I) \
_mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \
(__v2di)(B), \
(int)(I), \
(__v2di)(S), \
(__mmask8)(U)); })
#define _mm128_maskz_shldi_epi64(U, A, B, I) \
_mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
#define _mm128_shldi_epi64(A, B, I) \
_mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \
(__v8si)(B), \
(int)(I), \
(__v8si)(S), \
(__mmask8)(U)); })
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
_mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
#define _mm256_shldi_epi32(A, B, I) \
_mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \
(__v4si)(B), \
(int)(I), \
(__v4si)(S), \
(__mmask8)(U)); })
#define _mm128_maskz_shldi_epi32(U, A, B, I) \
_mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
#define _mm128_shldi_epi32(A, B, I) \
_mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \
(__v16hi)(B), \
(int)(I), \
(__v16hi)(S), \
(__mmask16)(U)); })
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
_mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
#define _mm256_shldi_epi16(A, B, I) \
_mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \
(__v8hi)(B), \
(int)(I), \
(__v8hi)(S), \
(__mmask8)(U)); })
#define _mm128_maskz_shldi_epi16(U, A, B, I) \
_mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
#define _mm128_shldi_epi16(A, B, I) \
_mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \
(__v4di)(B), \
(int)(I), \
(__v4di)(S), \
(__mmask8)(U)); })
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
_mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
#define _mm256_shrdi_epi64(A, B, I) \
_mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \
(__v2di)(B), \
(int)(I), \
(__v2di)(S), \
(__mmask8)(U)); })
#define _mm128_maskz_shrdi_epi64(U, A, B, I) \
_mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
#define _mm128_shrdi_epi64(A, B, I) \
_mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \
(__v8si)(B), \
(int)(I), \
(__v8si)(S), \
(__mmask8)(U)); })
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
_mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
#define _mm256_shrdi_epi32(A, B, I) \
_mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \
(__v4si)(B), \
(int)(I), \
(__v4si)(S), \
(__mmask8)(U)); })
#define _mm128_maskz_shrdi_epi32(U, A, B, I) \
_mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
#define _mm128_shrdi_epi32(A, B, I) \
_mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \
(__v16hi)(B), \
(int)(I), \
(__v16hi)(S), \
(__mmask16)(U)); })
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
_mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
#define _mm256_shrdi_epi16(A, B, I) \
_mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \
(__v8hi)(B), \
(int)(I), \
(__v8hi)(S), \
(__mmask8)(U)); })
#define _mm128_maskz_shrdi_epi16(U, A, B, I) \
_mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
#define _mm128_shrdi_epi16(A, B, I) \
_mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
(__mmask16) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
(__mmask16) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
(__mmask8) -1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -0,0 +1,254 @@
/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLVNNIINTRIN_H
#define __AVX512VLVNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni")))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -0,0 +1,146 @@
/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VNNIINTRIN_H
#define __AVX512VNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) __U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -0,0 +1,99 @@
/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
*------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error \
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VPOPCNTDQVLINTRIN_H
#define __AVX512VPOPCNTDQVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl")))
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectq_128(
(__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128(
(__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectq_256(
(__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectd_256(
(__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A);
}
#undef __DEFAULT_FN_ATTRS
#endif

93
c_headers/cetintrin.h Normal file
View File

@ -0,0 +1,93 @@
/*===---- cetintrin.h - CET intrinsic ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <cetintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __CETINTRIN_H
#define __CETINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("shstk")))
static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {
__builtin_ia32_incsspd(__a);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {
__builtin_ia32_incsspq(__a);
}
#endif /* __x86_64__ */
static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {
return __builtin_ia32_rdsspd(__a);
}
#ifdef __x86_64__
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {
return __builtin_ia32_rdsspq(__a);
}
#endif /* __x86_64__ */
static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {
__builtin_ia32_saveprevssp();
}
static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {
__builtin_ia32_rstorssp(__p);
}
static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {
__builtin_ia32_wrssd(__a, __p);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {
__builtin_ia32_wrssq(__a, __p);
}
#endif /* __x86_64__ */
static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {
__builtin_ia32_wrussd(__a, __p);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {
__builtin_ia32_wrussq(__a, __p);
}
#endif /* __x86_64__ */
static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {
__builtin_ia32_setssbsy();
}
static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {
__builtin_ia32_clrssbsy(__p);
}
#undef __DEFAULT_FN_ATTRS
#endif /* __CETINTRIN_H */

View File

@ -32,7 +32,7 @@
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt")))
static __inline__ void __DEFAULT_FN_ATTRS
_mm_clflushopt(char * __m) {
_mm_clflushopt(void const * __m) {
__builtin_ia32_clflushopt(__m);
}

52
c_headers/clwbintrin.h Normal file
View File

@ -0,0 +1,52 @@
/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __CLWBINTRIN_H
#define __CLWBINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb")))
/// \brief Writes back to memory the cache line (if modified) that contains the
/// linear address specified in \a __p from any level of the cache hierarchy in
/// the cache coherence domain
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> CLWB </c> instruction.
///
/// \param __p
/// A pointer to the memory location used to identify the cache line to be
/// written back.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_clwb(void const *__p) {
__builtin_ia32_clwb(__p);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -173,16 +173,24 @@
#define bit_AVX512VL 0x80000000
/* Features in %ecx for leaf 7 sub-leaf 0 */
#define bit_PREFTCHWT1 0x00000001
#define bit_AVX512VBMI 0x00000002
#define bit_PKU 0x00000004
#define bit_OSPKE 0x00000010
#define bit_PREFTCHWT1 0x00000001
#define bit_AVX512VBMI 0x00000002
#define bit_PKU 0x00000004
#define bit_OSPKE 0x00000010
#define bit_AVX512VBMI2 0x00000040
#define bit_SHSTK 0x00000080
#define bit_GFNI 0x00000100
#define bit_VAES 0x00000200
#define bit_VPCLMULQDQ 0x00000400
#define bit_AVX512VNNI 0x00000800
#define bit_AVX512BITALG 0x00001000
#define bit_AVX512VPOPCNTDQ 0x00004000
#define bit_RDPID 0x00400000
#define bit_RDPID 0x00400000
/* Features in %edx for leaf 7 sub-leaf 0 */
#define bit_AVX5124VNNIW 0x00000004
#define bit_AVX5124FMAPS 0x00000008
#define bit_IBT 0x00100000
/* Features in %eax for leaf 13 sub-leaf 1 */
#define bit_XSAVEOPT 0x00000001
@ -192,6 +200,7 @@
/* Features in %ecx for leaf 0x80000001 */
#define bit_LAHF_LM 0x00000001
#define bit_ABM 0x00000020
#define bit_LZCNT bit_ABM /* for gcc compat */
#define bit_SSE4a 0x00000040
#define bit_PRFCHW 0x00000100
#define bit_XOP 0x00000800

View File

@ -80,7 +80,7 @@ min(const __T &__a, const __T &__b, __Cmp __cmp) {
template <class __T>
inline __device__ const __T &
min(const __T &__a, const __T &__b) {
return __a < __b ? __b : __a;
return __a < __b ? __a : __b;
}
#ifdef _LIBCPP_END_NAMESPACE_STD

View File

@ -26,7 +26,6 @@
#include_next <new>
// Device overrides for placement new and delete.
#pragma push_macro("CUDA_NOEXCEPT")
#if __cplusplus >= 201103L
#define CUDA_NOEXCEPT noexcept
@ -34,6 +33,55 @@
#define CUDA_NOEXCEPT
#endif
// Device overrides for non-placement new and delete.
__device__ inline void *operator new(__SIZE_TYPE__ size) {
if (size == 0) {
size = 1;
}
return ::malloc(size);
}
__device__ inline void *operator new(__SIZE_TYPE__ size,
const std::nothrow_t &) CUDA_NOEXCEPT {
return ::operator new(size);
}
__device__ inline void *operator new[](__SIZE_TYPE__ size) {
return ::operator new(size);
}
__device__ inline void *operator new[](__SIZE_TYPE__ size,
const std::nothrow_t &) {
return ::operator new(size);
}
__device__ inline void operator delete(void* ptr) CUDA_NOEXCEPT {
if (ptr) {
::free(ptr);
}
}
__device__ inline void operator delete(void *ptr,
const std::nothrow_t &) CUDA_NOEXCEPT {
::operator delete(ptr);
}
__device__ inline void operator delete[](void* ptr) CUDA_NOEXCEPT {
::operator delete(ptr);
}
__device__ inline void operator delete[](void *ptr,
const std::nothrow_t &) CUDA_NOEXCEPT {
::operator delete(ptr);
}
// Sized delete, C++14 only.
#if __cplusplus >= 201402L
__device__ void operator delete(void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
::operator delete(ptr);
}
__device__ void operator delete[](void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
::operator delete(ptr);
}
#endif
// Device overrides for placement new and delete.
__device__ inline void *operator new(__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
return __ptr;
}
@ -42,6 +90,7 @@ __device__ inline void *operator new[](__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT
}
__device__ inline void operator delete(void *, void *) CUDA_NOEXCEPT {}
__device__ inline void operator delete[](void *, void *) CUDA_NOEXCEPT {}
#pragma pop_macro("CUDA_NOEXCEPT")
#endif // include guard

View File

@ -217,8 +217,8 @@ _mm_div_pd(__m128d __a, __m128d __b)
/// \brief Calculates the square root of the lower double-precision value of
/// the second operand and returns it in the lower 64 bits of the result.
/// The upper 64 bits of the result are copied from the upper double-
/// precision value of the first operand.
/// The upper 64 bits of the result are copied from the upper
/// double-precision value of the first operand.
///
/// \headerfile <x86intrin.h>
///
@ -260,8 +260,8 @@ _mm_sqrt_pd(__m128d __a)
/// \brief Compares lower 64-bit double-precision values of both operands, and
/// returns the lesser of the pair of values in the lower 64-bits of the
/// result. The upper 64 bits of the result are copied from the upper double-
/// precision value of the first operand.
/// result. The upper 64 bits of the result are copied from the upper
/// double-precision value of the first operand.
///
/// \headerfile <x86intrin.h>
///
@ -304,8 +304,8 @@ _mm_min_pd(__m128d __a, __m128d __b)
/// \brief Compares lower 64-bit double-precision values of both operands, and
/// returns the greater of the pair of values in the lower 64-bits of the
/// result. The upper 64 bits of the result are copied from the upper double-
/// precision value of the first operand.
/// result. The upper 64 bits of the result are copied from the upper
/// double-precision value of the first operand.
///
/// \headerfile <x86intrin.h>
///
@ -983,8 +983,10 @@ _mm_cmpnge_sd(__m128d __a, __m128d __b)
}
/// \brief Compares the lower double-precision floating-point values in each of
/// the two 128-bit floating-point vectors of [2 x double] for equality. The
/// comparison yields 0 for false, 1 for true.
/// the two 128-bit floating-point vectors of [2 x double] for equality.
///
/// The comparison yields 0 for false, 1 for true. If either of the two
/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -996,7 +998,8 @@ _mm_cmpnge_sd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comieq_sd(__m128d __a, __m128d __b)
{
@ -1008,7 +1011,8 @@ _mm_comieq_sd(__m128d __a, __m128d __b)
/// the value in the first parameter is less than the corresponding value in
/// the second parameter.
///
/// The comparison yields 0 for false, 1 for true.
/// The comparison yields 0 for false, 1 for true. If either of the two
/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1020,7 +1024,8 @@ _mm_comieq_sd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comilt_sd(__m128d __a, __m128d __b)
{
@ -1032,7 +1037,8 @@ _mm_comilt_sd(__m128d __a, __m128d __b)
/// the value in the first parameter is less than or equal to the
/// corresponding value in the second parameter.
///
/// The comparison yields 0 for false, 1 for true.
/// The comparison yields 0 for false, 1 for true. If either of the two
/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1044,7 +1050,8 @@ _mm_comilt_sd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comile_sd(__m128d __a, __m128d __b)
{
@ -1056,7 +1063,8 @@ _mm_comile_sd(__m128d __a, __m128d __b)
/// the value in the first parameter is greater than the corresponding value
/// in the second parameter.
///
/// The comparison yields 0 for false, 1 for true.
/// The comparison yields 0 for false, 1 for true. If either of the two
/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1068,7 +1076,8 @@ _mm_comile_sd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comigt_sd(__m128d __a, __m128d __b)
{
@ -1080,7 +1089,8 @@ _mm_comigt_sd(__m128d __a, __m128d __b)
/// the value in the first parameter is greater than or equal to the
/// corresponding value in the second parameter.
///
/// The comparison yields 0 for false, 1 for true.
/// The comparison yields 0 for false, 1 for true. If either of the two
/// lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1092,7 +1102,8 @@ _mm_comigt_sd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comige_sd(__m128d __a, __m128d __b)
{
@ -1104,7 +1115,8 @@ _mm_comige_sd(__m128d __a, __m128d __b)
/// the value in the first parameter is unequal to the corresponding value in
/// the second parameter.
///
/// The comparison yields 0 for false, 1 for true.
/// The comparison yields 0 for false, 1 for true. If either of the two
/// lower double-precision values is NaN, 1 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1116,7 +1128,8 @@ _mm_comige_sd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 1 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_comineq_sd(__m128d __a, __m128d __b)
{
@ -1127,7 +1140,7 @@ _mm_comineq_sd(__m128d __a, __m128d __b)
/// the two 128-bit floating-point vectors of [2 x double] for equality. The
/// comparison yields 0 for false, 1 for true.
///
/// If either of the two lower double-precision values is NaN, 1 is returned.
/// If either of the two lower double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1140,7 +1153,7 @@ _mm_comineq_sd(__m128d __a, __m128d __b)
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 1 is returned.
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomieq_sd(__m128d __a, __m128d __b)
{
@ -1153,7 +1166,7 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
/// the second parameter.
///
/// The comparison yields 0 for false, 1 for true. If either of the two lower
/// double-precision values is NaN, 1 is returned.
/// double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1166,7 +1179,7 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 1 is returned.
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomilt_sd(__m128d __a, __m128d __b)
{
@ -1179,7 +1192,7 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b)
/// corresponding value in the second parameter.
///
/// The comparison yields 0 for false, 1 for true. If either of the two lower
/// double-precision values is NaN, 1 is returned.
/// double-precision values is NaN, 0 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1192,7 +1205,7 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b)
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results. If either of the two
/// lower double-precision values is NaN, 1 is returned.
/// lower double-precision values is NaN, 0 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomile_sd(__m128d __a, __m128d __b)
{
@ -1257,7 +1270,7 @@ _mm_ucomige_sd(__m128d __a, __m128d __b)
/// the second parameter.
///
/// The comparison yields 0 for false, 1 for true. If either of the two lower
/// double-precision values is NaN, 0 is returned.
/// double-precision values is NaN, 1 is returned.
///
/// \headerfile <x86intrin.h>
///
@ -1270,7 +1283,7 @@ _mm_ucomige_sd(__m128d __a, __m128d __b)
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison result. If either of the two
/// lower double-precision values is NaN, 0 is returned.
/// lower double-precision values is NaN, 1 is returned.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_ucomineq_sd(__m128d __a, __m128d __b)
{
@ -1935,14 +1948,15 @@ _mm_store_pd(double *__dp, __m128d __a)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c>VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
/// This intrinsic corresponds to the
/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
///
/// \param __dp
/// A pointer to a memory location that can store two double-precision
/// values.
/// \param __a
/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
/// of the values in \a dp.
/// of the values in \a __dp.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_store1_pd(double *__dp, __m128d __a)
{
@ -1950,18 +1964,20 @@ _mm_store1_pd(double *__dp, __m128d __a)
_mm_store_pd(__dp, __a);
}
/// \brief Stores a 128-bit vector of [2 x double] into an aligned memory
/// location.
/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
/// the upper and lower 64 bits of a memory location.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
/// This intrinsic corresponds to the
/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
///
/// \param __dp
/// A pointer to a 128-bit memory location. The address of the memory
/// location has to be 16-byte aligned.
/// A pointer to a memory location that can store two double-precision
/// values.
/// \param __a
/// A 128-bit vector of [2 x double] containing the values to be stored.
/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
/// of the values in \a __dp.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_store_pd1(double *__dp, __m128d __a)
{
@ -2258,7 +2274,11 @@ _mm_adds_epu16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu8(__m128i __a, __m128i __b)
{
return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
return (__m128i)__builtin_convertvector(
((__builtin_convertvector((__v16qu)__a, __v16hu) +
__builtin_convertvector((__v16qu)__b, __v16hu)) + 1)
>> 1, __v16qu);
}
/// \brief Computes the rounded avarages of corresponding elements of two
@ -2278,7 +2298,11 @@ _mm_avg_epu8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu16(__m128i __a, __m128i __b)
{
return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
return (__m128i)__builtin_convertvector(
((__builtin_convertvector((__v8hu)__a, __v8su) +
__builtin_convertvector((__v8hu)__b, __v8su)) + 1)
>> 1, __v8hu);
}
/// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]
@ -3838,8 +3862,7 @@ _mm_set1_epi8(char __b)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
/// instruction.
/// This intrinsic does not correspond to a specific instruction.
///
/// \param __q0
/// A 64-bit integral value used to initialize the lower 64 bits of the
@ -4010,7 +4033,7 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
/// specified unaligned memory location. When a mask bit is 1, the
/// corresponding byte is written, otherwise it is not written.
///
/// To minimize caching, the date is flagged as non-temporal (unlikely to be
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon). Exception and trap behavior for elements not selected
/// for storage to memory are implementation dependent.
///
@ -4524,8 +4547,8 @@ _mm_unpackhi_epi32(__m128i __a, __m128i __b)
return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
}
/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
/// of [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
///
/// \headerfile <x86intrin.h>
///
@ -4657,7 +4680,7 @@ _mm_unpacklo_epi64(__m128i __a, __m128i __b)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic has no corresponding instruction.
/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.
///
/// \param __a
/// A 128-bit integer vector operand. The lower 64 bits are moved to the
@ -4674,7 +4697,7 @@ _mm_movepi64_pi64(__m128i __a)
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVQ / MOVQ / MOVD </c> instruction.
/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.
///
/// \param __a
/// A 64-bit value.
@ -4704,8 +4727,8 @@ _mm_move_epi64(__m128i __a)
return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
}
/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
/// [2 x double] and interleaves them into a 128-bit vector of [2 x
/// double].
///
/// \headerfile <x86intrin.h>
@ -4725,7 +4748,7 @@ _mm_unpackhi_pd(__m128d __a, __m128d __b)
return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
}
/// \brief Unpacks the low-order (even-indexed) values from two 128-bit vectors
/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors
/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
/// double].
///
@ -4784,9 +4807,9 @@ _mm_movemask_pd(__m128d __a)
/// A 128-bit vector of [2 x double].
/// \param i
/// An 8-bit immediate value. The least significant two bits specify which
/// elements to copy from a and b: \n
/// Bit[0] = 0: lower element of a copied to lower element of result. \n
/// Bit[0] = 1: upper element of a copied to lower element of result. \n
/// elements to copy from \a a and \a b: \n
/// Bit[0] = 0: lower element of \a a copied to lower element of result. \n
/// Bit[0] = 1: upper element of \a a copied to lower element of result. \n
/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.

View File

@ -143,4 +143,18 @@
# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
#endif
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
# define FLT16_MANT_DIG __FLT16_MANT_DIG__
# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
# define FLT16_DIG __FLT16_DIG__
# define FLT16_MIN_EXP __FLT16_MIN_EXP__
# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__
# define FLT16_MAX_EXP __FLT16_MAX_EXP__
# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__
# define FLT16_MAX __FLT16_MAX__
# define FLT16_EPSILON __FLT16_EPSILON__
# define FLT16_MIN __FLT16_MIN__
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
#endif /* __FLOAT_H */

View File

@ -60,73 +60,73 @@ _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@ -144,13 +144,13 @@ _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
@ -168,37 +168,37 @@ _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
@ -216,13 +216,13 @@ _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
#undef __DEFAULT_FN_ATTRS

View File

@ -46,85 +46,85 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
@ -142,13 +142,13 @@ _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
@ -166,37 +166,37 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
@ -214,13 +214,13 @@ _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
#undef __DEFAULT_FN_ATTRS

202
c_headers/gfniintrin.h Normal file
View File

@ -0,0 +1,202 @@
/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __GFNIINTRIN_H
#define __GFNIINTRIN_H
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), \
(char)(I)); })
#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
(__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
(__v16qi)(__m128i)(S)); })
#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
(__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
U, A, B, I); })
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
(__v32qi)(__m256i)(B), \
(char)(I)); })
#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
(__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
(__v32qi)(__m256i)(S)); })
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
(__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
U, A, B, I); })
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
(__v64qi)(__m512i)(B), \
(char)(I)); })
#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
(__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
(__v64qi)(__m512i)(S)); })
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
(__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \
U, A, B, I); })
#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), \
(char)(I)); })
#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
(__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
(__v16qi)(__m128i)(S)); })
#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
(__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \
U, A, B, I); })
#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
(__v32qi)(__m256i)(B), \
(char)(I)); })
#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
(__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
(__v32qi)(__m256i)(S)); })
#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
(__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
U, A, B, I); })
#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
(__v64qi)(__m512i)(B), \
(char)(I)); })
#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
(__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \
(__v64qi)(__m512i)(S)); })
#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
(__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \
U, A, B, I); })
/* Default attributes for simple form (no masking). */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni")))
/* Default attributes for ZMM forms. */
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni")))
/* Default attributes for VLX forms. */
#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni")))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
(__v16qi) __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_selectb_128(__U,
(__v16qi) _mm_gf2p8mul_epi8(__A, __B),
(__v16qi) __S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
{
return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
__U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
(__v32qi) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_selectb_256(__U,
(__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
(__v32qi) __S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
{
return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
__U, __A, __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
(__v64qi) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512(__U,
(__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
(__v64qi) __S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
{
return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(),
__U, __A, __B);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_F
#undef __DEFAULT_FN_ATTRS_VL
#endif // __GFNIINTRIN_H

View File

@ -58,6 +58,10 @@
#include <clflushoptintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
#include <clwbintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
#include <avxintrin.h>
#endif
@ -114,6 +118,10 @@ _mm256_cvtph_ps(__m128i __a)
}
#endif /* __AVX2__ */
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
#include <vpclmulqdqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
#include <bmiintrin.h>
#endif
@ -142,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512bwintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
#include <avx512bitalgintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
#include <avx512cdintrin.h>
#endif
@ -150,10 +162,29 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512vpopcntdqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
#include <avx512vpopcntdqvlintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
#include <avx512vnniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
#include <avx512vlvnniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
#include <avx512dqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
#include <avx512vlbitalgintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BW__))
#include <avx512vlbwintrin.h>
@ -191,6 +222,15 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512vbmivlintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
#include <avx512vbmi2intrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
#include <avx512vlvbmi2intrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
#include <avx512pfintrin.h>
#endif
@ -199,6 +239,14 @@ _mm256_cvtph_ps(__m128i __a)
#include <pkuintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
#include <vaesintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
#include <gfniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short *__p)
@ -315,6 +363,10 @@ _writegsbase_u64(unsigned long long __V)
#include <xsavesintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
#include <cetintrin.h>
#endif
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
* whereas others are also available at all times. */
#include <adxintrin.h>

View File

@ -38,6 +38,10 @@
#include <armintr.h>
#endif
#if defined(_M_ARM64)
#include <arm64intr.h>
#endif
/* For the definition of jmp_buf. */
#if __STDC_HOSTED__
#include <setjmp.h>
@ -828,7 +832,7 @@ _InterlockedCompareExchange_nf(long volatile *_Destination,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return _Comparand;
}
static __inline__ short __DEFAULT_FN_ATTRS
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_rel(long volatile *_Destination,
long _Exchange, long _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,

View File

@ -11381,6 +11381,8 @@ half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);
* For each component of a vector type,
* result[i] = if MSB of c[i] is set ? b[i] : a[i].
* For a scalar type, result = c ? b : a.
* b and a must have the same type.
* c must have the same number of elements and bits as a.
*/
char __ovld __cnfn select(char a, char b, char c);
uchar __ovld __cnfn select(uchar a, uchar b, char c);
@ -11394,60 +11396,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, char8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);
char16 __ovld __cnfn select(char16 a, char16 b, char16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);
short __ovld __cnfn select(short a, short b, char c);
ushort __ovld __cnfn select(ushort a, ushort b, char c);
short2 __ovld __cnfn select(short2 a, short2 b, char2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, char2 c);
short3 __ovld __cnfn select(short3 a, short3 b, char3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, char3 c);
short4 __ovld __cnfn select(short4 a, short4 b, char4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, char4 c);
short8 __ovld __cnfn select(short8 a, short8 b, char8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, char8 c);
short16 __ovld __cnfn select(short16 a, short16 b, char16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, char16 c);
int __ovld __cnfn select(int a, int b, char c);
uint __ovld __cnfn select(uint a, uint b, char c);
int2 __ovld __cnfn select(int2 a, int2 b, char2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, char2 c);
int3 __ovld __cnfn select(int3 a, int3 b, char3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, char3 c);
int4 __ovld __cnfn select(int4 a, int4 b, char4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, char4 c);
int8 __ovld __cnfn select(int8 a, int8 b, char8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, char8 c);
int16 __ovld __cnfn select(int16 a, int16 b, char16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, char16 c);
long __ovld __cnfn select(long a, long b, char c);
ulong __ovld __cnfn select(ulong a, ulong b, char c);
long2 __ovld __cnfn select(long2 a, long2 b, char2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, char2 c);
long3 __ovld __cnfn select(long3 a, long3 b, char3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, char3 c);
long4 __ovld __cnfn select(long4 a, long4 b, char4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, char4 c);
long8 __ovld __cnfn select(long8 a, long8 b, char8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, char8 c);
long16 __ovld __cnfn select(long16 a, long16 b, char16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, char16 c);
float __ovld __cnfn select(float a, float b, char c);
float2 __ovld __cnfn select(float2 a, float2 b, char2 c);
float3 __ovld __cnfn select(float3 a, float3 b, char3 c);
float4 __ovld __cnfn select(float4 a, float4 b, char4 c);
float8 __ovld __cnfn select(float8 a, float8 b, char8 c);
float16 __ovld __cnfn select(float16 a, float16 b, char16 c);
char __ovld __cnfn select(char a, char b, short c);
uchar __ovld __cnfn select(uchar a, uchar b, short c);
char2 __ovld __cnfn select(char2 a, char2 b, short2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, short2 c);
char3 __ovld __cnfn select(char3 a, char3 b, short3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, short3 c);
char4 __ovld __cnfn select(char4 a, char4 b, short4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, short4 c);
char8 __ovld __cnfn select(char8 a, char8 b, short8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, short8 c);
char16 __ovld __cnfn select(char16 a, char16 b, short16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, short16 c);
short __ovld __cnfn select(short a, short b, short c);
ushort __ovld __cnfn select(ushort a, ushort b, short c);
short2 __ovld __cnfn select(short2 a, short2 b, short2 c);
@ -11460,60 +11409,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, short8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);
short16 __ovld __cnfn select(short16 a, short16 b, short16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);
int __ovld __cnfn select(int a, int b, short c);
uint __ovld __cnfn select(uint a, uint b, short c);
int2 __ovld __cnfn select(int2 a, int2 b, short2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, short2 c);
int3 __ovld __cnfn select(int3 a, int3 b, short3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, short3 c);
int4 __ovld __cnfn select(int4 a, int4 b, short4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, short4 c);
int8 __ovld __cnfn select(int8 a, int8 b, short8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, short8 c);
int16 __ovld __cnfn select(int16 a, int16 b, short16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, short16 c);
long __ovld __cnfn select(long a, long b, short c);
ulong __ovld __cnfn select(ulong a, ulong b, short c);
long2 __ovld __cnfn select(long2 a, long2 b, short2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, short2 c);
long3 __ovld __cnfn select(long3 a, long3 b, short3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, short3 c);
long4 __ovld __cnfn select(long4 a, long4 b, short4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, short4 c);
long8 __ovld __cnfn select(long8 a, long8 b, short8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, short8 c);
long16 __ovld __cnfn select(long16 a, long16 b, short16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, short16 c);
float __ovld __cnfn select(float a, float b, short c);
float2 __ovld __cnfn select(float2 a, float2 b, short2 c);
float3 __ovld __cnfn select(float3 a, float3 b, short3 c);
float4 __ovld __cnfn select(float4 a, float4 b, short4 c);
float8 __ovld __cnfn select(float8 a, float8 b, short8 c);
float16 __ovld __cnfn select(float16 a, float16 b, short16 c);
char __ovld __cnfn select(char a, char b, int c);
uchar __ovld __cnfn select(uchar a, uchar b, int c);
char2 __ovld __cnfn select(char2 a, char2 b, int2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, int2 c);
char3 __ovld __cnfn select(char3 a, char3 b, int3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, int3 c);
char4 __ovld __cnfn select(char4 a, char4 b, int4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, int4 c);
char8 __ovld __cnfn select(char8 a, char8 b, int8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, int8 c);
char16 __ovld __cnfn select(char16 a, char16 b, int16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, int16 c);
short __ovld __cnfn select(short a, short b, int c);
ushort __ovld __cnfn select(ushort a, ushort b, int c);
short2 __ovld __cnfn select(short2 a, short2 b, int2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, int2 c);
short3 __ovld __cnfn select(short3 a, short3 b, int3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, int3 c);
short4 __ovld __cnfn select(short4 a, short4 b, int4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, int4 c);
short8 __ovld __cnfn select(short8 a, short8 b, int8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, int8 c);
short16 __ovld __cnfn select(short16 a, short16 b, int16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, int16 c);
int __ovld __cnfn select(int a, int b, int c);
uint __ovld __cnfn select(uint a, uint b, int c);
int2 __ovld __cnfn select(int2 a, int2 b, int2 c);
@ -11526,60 +11422,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, int8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);
int16 __ovld __cnfn select(int16 a, int16 b, int16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);
long __ovld __cnfn select(long a, long b, int c);
ulong __ovld __cnfn select(ulong a, ulong b, int c);
long2 __ovld __cnfn select(long2 a, long2 b, int2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, int2 c);
long3 __ovld __cnfn select(long3 a, long3 b, int3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, int3 c);
long4 __ovld __cnfn select(long4 a, long4 b, int4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, int4 c);
long8 __ovld __cnfn select(long8 a, long8 b, int8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, int8 c);
long16 __ovld __cnfn select(long16 a, long16 b, int16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, int16 c);
float __ovld __cnfn select(float a, float b, int c);
float2 __ovld __cnfn select(float2 a, float2 b, int2 c);
float3 __ovld __cnfn select(float3 a, float3 b, int3 c);
float4 __ovld __cnfn select(float4 a, float4 b, int4 c);
float8 __ovld __cnfn select(float8 a, float8 b, int8 c);
float16 __ovld __cnfn select(float16 a, float16 b, int16 c);
char __ovld __cnfn select(char a, char b, long c);
uchar __ovld __cnfn select(uchar a, uchar b, long c);
char2 __ovld __cnfn select(char2 a, char2 b, long2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, long2 c);
char3 __ovld __cnfn select(char3 a, char3 b, long3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, long3 c);
char4 __ovld __cnfn select(char4 a, char4 b, long4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, long4 c);
char8 __ovld __cnfn select(char8 a, char8 b, long8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, long8 c);
char16 __ovld __cnfn select(char16 a, char16 b, long16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, long16 c);
short __ovld __cnfn select(short a, short b, long c);
ushort __ovld __cnfn select(ushort a, ushort b, long c);
short2 __ovld __cnfn select(short2 a, short2 b, long2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, long2 c);
short3 __ovld __cnfn select(short3 a, short3 b, long3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, long3 c);
short4 __ovld __cnfn select(short4 a, short4 b, long4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, long4 c);
short8 __ovld __cnfn select(short8 a, short8 b, long8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, long8 c);
short16 __ovld __cnfn select(short16 a, short16 b, long16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, long16 c);
int __ovld __cnfn select(int a, int b, long c);
uint __ovld __cnfn select(uint a, uint b, long c);
int2 __ovld __cnfn select(int2 a, int2 b, long2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, long2 c);
int3 __ovld __cnfn select(int3 a, int3 b, long3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, long3 c);
int4 __ovld __cnfn select(int4 a, int4 b, long4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, long4 c);
int8 __ovld __cnfn select(int8 a, int8 b, long8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, long8 c);
int16 __ovld __cnfn select(int16 a, int16 b, long16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, long16 c);
long __ovld __cnfn select(long a, long b, long c);
ulong __ovld __cnfn select(ulong a, ulong b, long c);
long2 __ovld __cnfn select(long2 a, long2 b, long2 c);
@ -11592,12 +11441,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, long8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);
long16 __ovld __cnfn select(long16 a, long16 b, long16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);
float __ovld __cnfn select(float a, float b, long c);
float2 __ovld __cnfn select(float2 a, float2 b, long2 c);
float3 __ovld __cnfn select(float3 a, float3 b, long3 c);
float4 __ovld __cnfn select(float4 a, float4 b, long4 c);
float8 __ovld __cnfn select(float8 a, float8 b, long8 c);
float16 __ovld __cnfn select(float16 a, float16 b, long16 c);
char __ovld __cnfn select(char a, char b, uchar c);
uchar __ovld __cnfn select(uchar a, uchar b, uchar c);
char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);
@ -11610,60 +11454,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);
char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);
short __ovld __cnfn select(short a, short b, uchar c);
ushort __ovld __cnfn select(ushort a, ushort b, uchar c);
short2 __ovld __cnfn select(short2 a, short2 b, uchar2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uchar2 c);
short3 __ovld __cnfn select(short3 a, short3 b, uchar3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uchar3 c);
short4 __ovld __cnfn select(short4 a, short4 b, uchar4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uchar4 c);
short8 __ovld __cnfn select(short8 a, short8 b, uchar8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uchar8 c);
short16 __ovld __cnfn select(short16 a, short16 b, uchar16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uchar16 c);
int __ovld __cnfn select(int a, int b, uchar c);
uint __ovld __cnfn select(uint a, uint b, uchar c);
int2 __ovld __cnfn select(int2 a, int2 b, uchar2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, uchar2 c);
int3 __ovld __cnfn select(int3 a, int3 b, uchar3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, uchar3 c);
int4 __ovld __cnfn select(int4 a, int4 b, uchar4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, uchar4 c);
int8 __ovld __cnfn select(int8 a, int8 b, uchar8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, uchar8 c);
int16 __ovld __cnfn select(int16 a, int16 b, uchar16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, uchar16 c);
long __ovld __cnfn select(long a, long b, uchar c);
ulong __ovld __cnfn select(ulong a, ulong b, uchar c);
long2 __ovld __cnfn select(long2 a, long2 b, uchar2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uchar2 c);
long3 __ovld __cnfn select(long3 a, long3 b, uchar3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uchar3 c);
long4 __ovld __cnfn select(long4 a, long4 b, uchar4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uchar4 c);
long8 __ovld __cnfn select(long8 a, long8 b, uchar8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uchar8 c);
long16 __ovld __cnfn select(long16 a, long16 b, uchar16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uchar16 c);
float __ovld __cnfn select(float a, float b, uchar c);
float2 __ovld __cnfn select(float2 a, float2 b, uchar2 c);
float3 __ovld __cnfn select(float3 a, float3 b, uchar3 c);
float4 __ovld __cnfn select(float4 a, float4 b, uchar4 c);
float8 __ovld __cnfn select(float8 a, float8 b, uchar8 c);
float16 __ovld __cnfn select(float16 a, float16 b, uchar16 c);
char __ovld __cnfn select(char a, char b, ushort c);
uchar __ovld __cnfn select(uchar a, uchar b, ushort c);
char2 __ovld __cnfn select(char2 a, char2 b, ushort2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ushort2 c);
char3 __ovld __cnfn select(char3 a, char3 b, ushort3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ushort3 c);
char4 __ovld __cnfn select(char4 a, char4 b, ushort4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ushort4 c);
char8 __ovld __cnfn select(char8 a, char8 b, ushort8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ushort8 c);
char16 __ovld __cnfn select(char16 a, char16 b, ushort16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ushort16 c);
short __ovld __cnfn select(short a, short b, ushort c);
ushort __ovld __cnfn select(ushort a, ushort b, ushort c);
short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);
@ -11676,60 +11467,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);
short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);
int __ovld __cnfn select(int a, int b, ushort c);
uint __ovld __cnfn select(uint a, uint b, ushort c);
int2 __ovld __cnfn select(int2 a, int2 b, ushort2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, ushort2 c);
int3 __ovld __cnfn select(int3 a, int3 b, ushort3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, ushort3 c);
int4 __ovld __cnfn select(int4 a, int4 b, ushort4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, ushort4 c);
int8 __ovld __cnfn select(int8 a, int8 b, ushort8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, ushort8 c);
int16 __ovld __cnfn select(int16 a, int16 b, ushort16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, ushort16 c);
long __ovld __cnfn select(long a, long b, ushort c);
ulong __ovld __cnfn select(ulong a, ulong b, ushort c);
long2 __ovld __cnfn select(long2 a, long2 b, ushort2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ushort2 c);
long3 __ovld __cnfn select(long3 a, long3 b, ushort3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ushort3 c);
long4 __ovld __cnfn select(long4 a, long4 b, ushort4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ushort4 c);
long8 __ovld __cnfn select(long8 a, long8 b, ushort8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ushort8 c);
long16 __ovld __cnfn select(long16 a, long16 b, ushort16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ushort16 c);
float __ovld __cnfn select(float a, float b, ushort c);
float2 __ovld __cnfn select(float2 a, float2 b, ushort2 c);
float3 __ovld __cnfn select(float3 a, float3 b, ushort3 c);
float4 __ovld __cnfn select(float4 a, float4 b, ushort4 c);
float8 __ovld __cnfn select(float8 a, float8 b, ushort8 c);
float16 __ovld __cnfn select(float16 a, float16 b, ushort16 c);
char __ovld __cnfn select(char a, char b, uint c);
uchar __ovld __cnfn select(uchar a, uchar b, uint c);
char2 __ovld __cnfn select(char2 a, char2 b, uint2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uint2 c);
char3 __ovld __cnfn select(char3 a, char3 b, uint3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uint3 c);
char4 __ovld __cnfn select(char4 a, char4 b, uint4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uint4 c);
char8 __ovld __cnfn select(char8 a, char8 b, uint8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uint8 c);
char16 __ovld __cnfn select(char16 a, char16 b, uint16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uint16 c);
short __ovld __cnfn select(short a, short b, uint c);
ushort __ovld __cnfn select(ushort a, ushort b, uint c);
short2 __ovld __cnfn select(short2 a, short2 b, uint2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uint2 c);
short3 __ovld __cnfn select(short3 a, short3 b, uint3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uint3 c);
short4 __ovld __cnfn select(short4 a, short4 b, uint4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uint4 c);
short8 __ovld __cnfn select(short8 a, short8 b, uint8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uint8 c);
short16 __ovld __cnfn select(short16 a, short16 b, uint16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uint16 c);
int __ovld __cnfn select(int a, int b, uint c);
uint __ovld __cnfn select(uint a, uint b, uint c);
int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);
@ -11742,60 +11480,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);
int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);
long __ovld __cnfn select(long a, long b, uint c);
ulong __ovld __cnfn select(ulong a, ulong b, uint c);
long2 __ovld __cnfn select(long2 a, long2 b, uint2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uint2 c);
long3 __ovld __cnfn select(long3 a, long3 b, uint3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uint3 c);
long4 __ovld __cnfn select(long4 a, long4 b, uint4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uint4 c);
long8 __ovld __cnfn select(long8 a, long8 b, uint8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uint8 c);
long16 __ovld __cnfn select(long16 a, long16 b, uint16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uint16 c);
float __ovld __cnfn select(float a, float b, uint c);
float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);
float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);
float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);
float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);
float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);
char __ovld __cnfn select(char a, char b, ulong c);
uchar __ovld __cnfn select(uchar a, uchar b, ulong c);
char2 __ovld __cnfn select(char2 a, char2 b, ulong2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ulong2 c);
char3 __ovld __cnfn select(char3 a, char3 b, ulong3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ulong3 c);
char4 __ovld __cnfn select(char4 a, char4 b, ulong4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ulong4 c);
char8 __ovld __cnfn select(char8 a, char8 b, ulong8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ulong8 c);
char16 __ovld __cnfn select(char16 a, char16 b, ulong16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ulong16 c);
short __ovld __cnfn select(short a, short b, ulong c);
ushort __ovld __cnfn select(ushort a, ushort b, ulong c);
short2 __ovld __cnfn select(short2 a, short2 b, ulong2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ulong2 c);
short3 __ovld __cnfn select(short3 a, short3 b, ulong3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ulong3 c);
short4 __ovld __cnfn select(short4 a, short4 b, ulong4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ulong4 c);
short8 __ovld __cnfn select(short8 a, short8 b, ulong8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ulong8 c);
short16 __ovld __cnfn select(short16 a, short16 b, ulong16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ulong16 c);
int __ovld __cnfn select(int a, int b, ulong c);
uint __ovld __cnfn select(uint a, uint b, ulong c);
int2 __ovld __cnfn select(int2 a, int2 b, ulong2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, ulong2 c);
int3 __ovld __cnfn select(int3 a, int3 b, ulong3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, ulong3 c);
int4 __ovld __cnfn select(int4 a, int4 b, ulong4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, ulong4 c);
int8 __ovld __cnfn select(int8 a, int8 b, ulong8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, ulong8 c);
int16 __ovld __cnfn select(int16 a, int16 b, ulong16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, ulong16 c);
long __ovld __cnfn select(long a, long b, ulong c);
ulong __ovld __cnfn select(ulong a, ulong b, ulong c);
long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);
@ -11808,12 +11499,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);
long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);
float __ovld __cnfn select(float a, float b, ulong c);
float2 __ovld __cnfn select(float2 a, float2 b, ulong2 c);
float3 __ovld __cnfn select(float3 a, float3 b, ulong3 c);
float4 __ovld __cnfn select(float4 a, float4 b, ulong4 c);
float8 __ovld __cnfn select(float8 a, float8 b, ulong8 c);
float16 __ovld __cnfn select(float16 a, float16 b, ulong16 c);
#ifdef cl_khr_fp64
double __ovld __cnfn select(double a, double b, long c);
double2 __ovld __cnfn select(double2 a, double2 b, long2 c);
@ -13141,13 +12827,14 @@ void __ovld __conv barrier(cl_mem_fence_flags flags);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
typedef enum memory_scope
{
memory_scope_work_item,
memory_scope_work_group,
memory_scope_device,
memory_scope_all_svm_devices,
memory_scope_sub_group
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
@ -13952,11 +13639,11 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum memory_order
{
memory_order_relaxed,
memory_order_acquire,
memory_order_release,
memory_order_acq_rel,
memory_order_seq_cst
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
@ -16199,6 +15886,313 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
#endif //cl_khr_subgroups cl_intel_subgroups
#if defined(cl_intel_subgroups)
// Intel-Specific Sub Group Functions
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );
float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );
float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );
float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );
float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );
int __ovld __conv intel_sub_group_shuffle( int x, uint c );
int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );
int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );
int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );
int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );
int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );
uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );
uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );
uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );
uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );
uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );
uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );
long __ovld __conv intel_sub_group_shuffle( long x, uint c );
ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );
float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );
float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );
float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );
float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );
float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );
float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );
int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );
int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );
int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );
int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );
int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );
int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );
uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );
uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );
uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );
uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );
uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );
uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );
long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );
ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );
float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );
float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );
float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );
float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );
float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );
float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );
int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );
int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );
int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );
int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );
int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );
int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );
uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );
uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );
uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );
uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );
uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );
uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );
long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );
ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );
float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );
float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );
float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );
float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );
float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );
float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );
int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );
int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );
int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );
int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );
int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );
int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );
uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );
uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );
uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );
uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );
uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );
uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read( const __global uint* p );
uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );
uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );
uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );
void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );
void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );
#ifdef cl_khr_fp16
half __ovld __conv intel_sub_group_shuffle( half x, uint c );
half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );
half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );
half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );
#endif
#if defined(cl_khr_fp64)
double __ovld __conv intel_sub_group_shuffle( double x, uint c );
double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );
double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );
double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );
#endif
#endif //cl_intel_subgroups
#if defined(cl_intel_subgroups_short)
short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );
short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );
short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );
short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );
short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );
ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );
ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );
ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );
ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );
ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );
short __ovld __conv intel_sub_group_shuffle( short x, uint c );
short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );
short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );
short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );
short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );
short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);
ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );
short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );
short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );
short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );
short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );
short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );
short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );
ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );
short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );
short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );
short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );
short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );
short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );
short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );
ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );
short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );
short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );
short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );
short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );
short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );
short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );
ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );
short __ovld __conv intel_sub_group_reduce_add( short x );
ushort __ovld __conv intel_sub_group_reduce_add( ushort x );
short __ovld __conv intel_sub_group_reduce_min( short x );
ushort __ovld __conv intel_sub_group_reduce_min( ushort x );
short __ovld __conv intel_sub_group_reduce_max( short x );
ushort __ovld __conv intel_sub_group_reduce_max( ushort x );
short __ovld __conv intel_sub_group_scan_exclusive_add( short x );
ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );
short __ovld __conv intel_sub_group_scan_exclusive_min( short x );
ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );
short __ovld __conv intel_sub_group_scan_exclusive_max( short x );
ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );
short __ovld __conv intel_sub_group_scan_inclusive_add( short x );
ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );
short __ovld __conv intel_sub_group_scan_inclusive_min( short x );
ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );
short __ovld __conv intel_sub_group_scan_inclusive_max( short x );
ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );
uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );
ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);
void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);
void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );
void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
#endif // cl_intel_subgroups_short
#ifdef cl_amd_media_ops
uint __ovld amd_bitalign(uint a, uint b, uint c);
uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);

View File

@ -115,8 +115,8 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
}
/// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
/// vector of [4 x float] to float values stored in a 128-bit vector of
/// \brief Moves and duplicates odd-indexed values from a 128-bit vector
/// of [4 x float] to float values stored in a 128-bit vector of
/// [4 x float].
///
/// \headerfile <x86intrin.h>
@ -137,7 +137,7 @@ _mm_movehdup_ps(__m128 __a)
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
}
/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
/// \brief Duplicates even-indexed values from a 128-bit vector of
/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
///
/// \headerfile <x86intrin.h>

View File

@ -648,7 +648,7 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// input vectors are used as an input for dot product; otherwise that input
/// is treated as zero. Bits [1:0] determine which elements of the result
/// will receive a copy of the final dot product, with bit [0] corresponding
/// to the lowest element and bit [3] corresponding to the highest element of
/// to the lowest element and bit [1] corresponding to the highest element of
/// each [2 x double] vector. If a bit is set, the dot product is returned in
/// the corresponding element; otherwise that element is set to zero.
#define _mm_dp_pd(X, Y, M) __extension__ ({\
@ -866,8 +866,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Copies the selected bits from \a Y to result bits [127:96]. \n
/// Bits[3:0]: If any of these bits are set, the corresponding result
/// element is cleared.
/// \returns A 128-bit vector of [4 x float] containing the copied single-
/// precision floating point elements from the operands.
/// \returns A 128-bit vector of [4 x float] containing the copied
/// single-precision floating point elements from the operands.
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
/// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and

View File

@ -32,12 +32,15 @@
#define true 1
#define false 0
#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
/* Define _Bool, bool, false, true as a GNU extension. */
/* Define _Bool as a GNU extension. */
#define _Bool bool
#if __cplusplus < 201103L
/* For C++98, define bool, false, true as a GNU extension. */
#define bool bool
#define false false
#define true true
#endif
#endif
#define __bool_true_false_are_defined 1

View File

@ -76,7 +76,13 @@ typedef intptr_t _sleb128_t;
typedef uintptr_t _uleb128_t;
struct _Unwind_Context;
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
struct _Unwind_Control_Block;
typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
#else
struct _Unwind_Exception;
typedef struct _Unwind_Exception _Unwind_Exception;
#endif
typedef enum {
_URC_NO_REASON = 0,
#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
@ -109,8 +115,42 @@ typedef enum {
} _Unwind_Action;
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
struct _Unwind_Exception *);
_Unwind_Exception *);
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
typedef struct _Unwind_Control_Block _Unwind_Control_Block;
typedef uint32_t _Unwind_EHT_Header;
struct _Unwind_Control_Block {
uint64_t exception_class;
void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);
/* unwinder cache (private fields for the unwinder's use) */
struct {
uint32_t reserved1; /* forced unwind stop function, 0 if not forced */
uint32_t reserved2; /* personality routine */
uint32_t reserved3; /* callsite */
uint32_t reserved4; /* forced unwind stop argument */
uint32_t reserved5;
} unwinder_cache;
/* propagation barrier cache (valid after phase 1) */
struct {
uint32_t sp;
uint32_t bitpattern[5];
} barrier_cache;
/* cleanup cache (preserved over cleanup) */
struct {
uint32_t bitpattern[4];
} cleanup_cache;
/* personality cache (for personality's benefit) */
struct {
uint32_t fnstart; /* function start address */
_Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */
uint32_t additional; /* additional data */
uint32_t reserved1;
} pr_cache;
long long int : 0; /* force alignment of next item to 8-byte boundary */
} __attribute__((__aligned__(8)));
#else
struct _Unwind_Exception {
_Unwind_Exception_Class exception_class;
_Unwind_Exception_Cleanup_Fn exception_cleanup;
@ -120,23 +160,24 @@ struct _Unwind_Exception {
* aligned". GCC has interpreted this to mean "use the maximum useful
* alignment for the target"; so do we. */
} __attribute__((__aligned__));
#endif
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
_Unwind_Exception_Class,
struct _Unwind_Exception *,
_Unwind_Exception *,
struct _Unwind_Context *,
void *);
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
struct _Unwind_Context *);
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,
_Unwind_Exception_Class,
_Unwind_Exception *,
struct _Unwind_Context *);
typedef _Unwind_Personality_Fn __personality_routine;
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
void *);
#if defined(__arm__) && !defined(__APPLE__)
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
typedef enum {
_UVRSC_CORE = 0, /* integer register */
_UVRSC_VFP = 1, /* vfp */
@ -158,14 +199,12 @@ typedef enum {
_UVRSR_FAILED = 2
} _Unwind_VRS_Result;
#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__)
typedef uint32_t _Unwind_State;
#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)
#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)
#define _US_ACTION_MASK ((_Unwind_State)3)
#define _US_FORCE_UNWIND ((_Unwind_State)8)
#endif
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
_Unwind_VRS_RegClass __regclass,
@ -224,13 +263,12 @@ _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
/* DWARF EH functions; currently not available on Darwin/ARM */
#if !defined(__APPLE__) || !defined(__arm__)
_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
_Unwind_Stop_Fn, void *);
void _Unwind_DeleteException(struct _Unwind_Exception *);
void _Unwind_Resume(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,
void *);
void _Unwind_DeleteException(_Unwind_Exception *);
void _Unwind_Resume(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);
#endif
@ -241,11 +279,11 @@ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;
void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,
_Unwind_Stop_Fn, void *);
void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);
void _Unwind_SjLj_Resume(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);
void *_Unwind_FindEnclosingFunction(void *);

98
c_headers/vaesintrin.h Normal file
View File

@ -0,0 +1,98 @@
/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __VAESINTRIN_H
#define __VAESINTRIN_H
/* Default attributes for YMM forms. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes")))
/* Default attributes for ZMM forms. */
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes")))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesenc_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,
(__v8di) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesdec_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesdec_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,
(__v8di) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesenclast_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesenclast_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,
(__v8di) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,
(__v8di) __B);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_F
#endif

View File

@ -0,0 +1,42 @@
/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __VPCLMULQDQINTRIN_H
#define __VPCLMULQDQINTRIN_H
#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \
(__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), \
(char)(I)); })
#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \
(__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), \
(char)(I)); })
#endif // __VPCLMULQDQINTRIN_H

View File

@ -2035,9 +2035,11 @@ _mm_storer_ps(float *__p, __m128 __a)
_mm_store_ps(__p, __a);
}
#define _MM_HINT_T0 3
#define _MM_HINT_T1 2
#define _MM_HINT_T2 1
#define _MM_HINT_ET0 7
#define _MM_HINT_ET1 6
#define _MM_HINT_T0 3
#define _MM_HINT_T1 2
#define _MM_HINT_T2 1
#define _MM_HINT_NTA 0
#ifndef _MSC_VER
@ -2068,7 +2070,8 @@ _mm_storer_ps(float *__p, __m128 __a)
/// be generated. \n
/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
/// be generated.
#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \
((sel) >> 2) & 1, (sel) & 0x3))
#endif
/// \brief Stores a 64-bit integer in the specified aligned memory location. To

View File

@ -7,3 +7,4 @@ after_build:
- '%APPVEYOR_BUILD_FOLDER%\ci\appveyor\after_build.bat'
cache:
- 'llvm+clang-5.0.1-win64-msvc-release.tar.xz'
- 'llvm+clang-6.0.0-win64-msvc-release.tar.xz'

View File

@ -7,13 +7,13 @@ SET "PATH=C:\msys64\mingw64\bin;C:\msys64\usr\bin;%PATH%"
SET "MSYSTEM=MINGW64"
SET "APPVEYOR_CACHE_ENTRY_ZIP_ARGS=-m0=Copy"
bash -lc "cd ${APPVEYOR_BUILD_FOLDER} && if [ -s ""llvm+clang-5.0.1-win64-msvc-release.tar.xz"" ]; then echo 'skipping LLVM download'; else wget 'https://s3.amazonaws.com/ziglang.org/deps/llvm%%2bclang-5.0.1-win64-msvc-release.tar.xz'; fi && tar xf llvm+clang-5.0.1-win64-msvc-release.tar.xz" || exit /b
bash -lc "cd ${APPVEYOR_BUILD_FOLDER} && if [ -s ""llvm+clang-6.0.0-win64-msvc-release.tar.xz"" ]; then echo 'skipping LLVM download'; else wget 'https://s3.amazonaws.com/ziglang.org/deps/llvm%%2bclang-6.0.0-win64-msvc-release.tar.xz'; fi && tar xf llvm+clang-6.0.0-win64-msvc-release.tar.xz" || exit /b
SET "PATH=%PREVPATH%"
SET "MSYSTEM=%PREVMSYSTEM%"
SET "ZIGBUILDDIR=%APPVEYOR_BUILD_FOLDER%\build-msvc-release"
SET "ZIGPREFIXPATH=%APPVEYOR_BUILD_FOLDER%\llvm+clang-5.0.1-win64-msvc-release"
SET "ZIGPREFIXPATH=%APPVEYOR_BUILD_FOLDER%\llvm+clang-6.0.0-win64-msvc-release"
call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64
call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64

View File

@ -4,4 +4,4 @@ set -x
sudo apt-get remove -y llvm-*
sudo rm -rf /usr/local/*
sudo apt-get install -y clang-5.0 libclang-5.0 libclang-5.0-dev llvm-5.0 llvm-5.0-dev liblld-5.0 liblld-5.0-dev cmake wine1.6-amd64
sudo apt-get install -y clang-6.0 libclang-6.0 libclang-6.0-dev llvm-6.0 llvm-6.0-dev liblld-6.0 liblld-6.0-dev cmake wine1.6-amd64

View File

@ -26,16 +26,16 @@ if(MSVC)
else()
find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
PATHS
/usr/lib/llvm/5/include
/usr/lib/llvm-5.0/include
/usr/lib/llvm/6/include
/usr/lib/llvm-6.0/include
/mingw64/include)
macro(FIND_AND_ADD_CLANG_LIB _libname_)
string(TOUPPER ${_libname_} _prettylibname_)
find_library(CLANG_${_prettylibname_}_LIB NAMES ${_libname_}
PATHS
/usr/lib/llvm/5/lib
/usr/lib/llvm-5.0/lib
/usr/lib/llvm/6/lib
/usr/lib/llvm-6.0/lib
/mingw64/lib
/c/msys64/mingw64/lib
c:\\msys64\\mingw64\\lib)

View File

@ -6,12 +6,12 @@
# LLD_INCLUDE_DIRS
# LLD_LIBRARIES
find_path(LLD_INCLUDE_DIRS NAMES lld/Driver/Driver.h
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
PATHS
/usr/lib/llvm-5.0/include
/usr/lib/llvm-6.0/include
/mingw64/include)
find_library(LLD_LIBRARY NAMES lld-5.0 lld PATHS /usr/lib/llvm-5.0/lib)
find_library(LLD_LIBRARY NAMES lld-6.0 lld PATHS /usr/lib/llvm-6.0/lib)
if(EXISTS ${LLD_LIBRARY})
set(LLD_LIBRARIES ${LLD_LIBRARY})
else()
@ -19,7 +19,7 @@ else()
string(TOUPPER ${_libname_} _prettylibname_)
find_library(LLD_${_prettylibname_}_LIB NAMES ${_libname_}
PATHS
/usr/lib/llvm-5.0/lib
/usr/lib/llvm-6.0/lib
/mingw64/lib
/c/msys64/mingw64/lib
c:/msys64/mingw64/lib)
@ -29,13 +29,14 @@ else()
endmacro(FIND_AND_ADD_LLD_LIB)
FIND_AND_ADD_LLD_LIB(lldDriver)
FIND_AND_ADD_LLD_LIB(lldMinGW)
FIND_AND_ADD_LLD_LIB(lldELF)
FIND_AND_ADD_LLD_LIB(lldCOFF)
FIND_AND_ADD_LLD_LIB(lldMachO)
FIND_AND_ADD_LLD_LIB(lldReaderWriter)
FIND_AND_ADD_LLD_LIB(lldCore)
FIND_AND_ADD_LLD_LIB(lldYAML)
FIND_AND_ADD_LLD_LIB(lldConfig)
FIND_AND_ADD_LLD_LIB(lldCommon)
endif()
include(FindPackageHandleStandardArgs)

View File

@ -8,12 +8,12 @@
# LLVM_LIBDIRS
find_program(LLVM_CONFIG_EXE
NAMES llvm-config-5.0 llvm-config
NAMES llvm-config-6.0 llvm-config
PATHS
"/mingw64/bin"
"/c/msys64/mingw64/bin"
"c:/msys64/mingw64/bin"
"C:/Libraries/llvm-5.0.0/bin")
"C:/Libraries/llvm-6.0.0/bin")
if(NOT(CMAKE_BUILD_TYPE STREQUAL "Debug"))
execute_process(
@ -62,7 +62,7 @@ execute_process(
set(LLVM_LIBRARIES ${LLVM_LIBRARIES} ${LLVM_SYSTEM_LIBS})
if(NOT LLVM_LIBRARIES)
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-5.0 LLVM-5)
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-6.0 LLVM-6)
endif()
link_directories("${CMAKE_PREFIX_PATH}/lib")

View File

@ -12,9 +12,10 @@
#ifdef PREFIX
#define COMMA ,
PREFIX(prefix_0, {nullptr})
PREFIX(prefix_3, {"--" COMMA nullptr})
PREFIX(prefix_2, {"/" COMMA "-" COMMA nullptr})
PREFIX(prefix_1, {"/" COMMA "-" COMMA "-?" COMMA nullptr})
PREFIX(prefix_3, {"/?" COMMA "-?" COMMA nullptr})
PREFIX(prefix_4, {"/?" COMMA "-?" COMMA nullptr})
#undef COMMA
#endif // PREFIX
@ -30,19 +31,26 @@ OPTION(prefix_0, "<input>", INPUT, Input, INVALID, INVALID, nullptr, 0, 0, nullp
OPTION(prefix_0, "<unknown>", UNKNOWN, Unknown, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "align:", align, Joined, INVALID, INVALID, nullptr, 0, 0,
"Section alignment", nullptr, nullptr)
OPTION(prefix_1, "aligncomm:", aligncomm, Joined, INVALID, INVALID, nullptr, 0, 0,
"Set common symbol alignment", nullptr, nullptr)
OPTION(prefix_1, "allowbind:no", allowbind_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable DLL binding", nullptr, nullptr)
OPTION(prefix_1, "allowbind", allowbind, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "allowbind", allowbind, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable DLL binding (default)", nullptr, nullptr)
OPTION(prefix_1, "allowisolation:no", allowisolation_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Set NO_ISOLATION bit", nullptr, nullptr)
OPTION(prefix_1, "allowisolation", allowisolation, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
"Disable DLL isolation", nullptr, nullptr)
OPTION(prefix_1, "allowisolation", allowisolation, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable DLL isolation (default)", nullptr, nullptr)
OPTION(prefix_1, "alternatename:", alternatename, Joined, INVALID, INVALID, nullptr, 0, 0,
"Define weak alias", nullptr, nullptr)
OPTION(prefix_1, "appcontainer:no", appcontainer_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Image can run outside an app container (default)", nullptr, nullptr)
OPTION(prefix_1, "appcontainer", appcontainer, Flag, INVALID, INVALID, nullptr, 0, 0,
"Image can only be run in an app container", nullptr, nullptr)
OPTION(prefix_1, "appcontainer", appcontainer, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "base:", base, Joined, INVALID, INVALID, nullptr, 0, 0,
"Base address of the program", nullptr, nullptr)
OPTION(prefix_1, "debug:dwarf", debug_dwarf, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "debug:ghash", debug_ghash, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "debugtype:", debugtype, Joined, INVALID, INVALID, nullptr, 0, 0,
"Debug Info Options", nullptr, nullptr)
OPTION(prefix_1, "debug", debug, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -60,22 +68,25 @@ OPTION(prefix_1, "dll", dll, Flag, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_1, "driver:", driver, Joined, INVALID, INVALID, nullptr, 0, 0,
"Generate a Windows NT Kernel Mode Driver", nullptr, nullptr)
OPTION(prefix_1, "dynamicbase:no", dynamicbase_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable address space layout randomization", nullptr, nullptr)
OPTION(prefix_1, "dynamicbase", dynamicbase, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
"Disable ASLR (default when /fixed)", nullptr, nullptr)
OPTION(prefix_1, "dynamicbase", dynamicbase, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable ASLR (default unless /fixed)", nullptr, nullptr)
OPTION(prefix_1, "editandcontinue", editandcontinue, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "entry:", entry, Joined, INVALID, INVALID, nullptr, 0, 0,
"Name of entry point symbol", nullptr, nullptr)
OPTION(prefix_1, "errorlimit:", errorlimit, Joined, INVALID, INVALID, nullptr, 0, 0,
"Maximum number of errors to emit before stopping (0 = no limit)", nullptr, nullptr)
OPTION(prefix_1, "errorreport:", errorreport, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "export-all-symbols", export_all_symbols, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "export:", export, Joined, INVALID, INVALID, nullptr, 0, 0,
"Export a function", nullptr, nullptr)
OPTION(prefix_1, "failifmismatch:", failifmismatch, Joined, INVALID, INVALID, nullptr, 0, 0,
"", nullptr, nullptr)
OPTION(prefix_1, "fastfail", fastfail, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "fixed:no", fixed_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable base relocations", nullptr, nullptr)
OPTION(prefix_1, "fixed", fixed, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
"Enable base relocations (default)", nullptr, nullptr)
OPTION(prefix_1, "fixed", fixed, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable base relocations", nullptr, nullptr)
OPTION(prefix_1, "force:unresolved", force_unresolved, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "force", force, Flag, INVALID, INVALID, nullptr, 0, 0,
"Allow undefined symbols when creating executables", nullptr, nullptr)
@ -85,10 +96,12 @@ OPTION(prefix_1, "heap:", heap, Joined, INVALID, INVALID, nullptr, 0, 0,
"Size of the heap", nullptr, nullptr)
OPTION(prefix_1, "help", help, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "highentropyva:no", highentropyva_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Set HIGH_ENTROPY_VA bit", nullptr, nullptr)
OPTION(prefix_1, "highentropyva", highentropyva, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
"Disable 64-bit ASLR", nullptr, nullptr)
OPTION(prefix_1, "highentropyva", highentropyva, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable 64-bit ASLR (default on 64-bit)", nullptr, nullptr)
OPTION(prefix_1, "idlout:", idlout, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "ignore:", ignore, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "ignore:", ignore, Joined, INVALID, INVALID, nullptr, 0, 0,
"Specify warning codes to ignore", nullptr, nullptr)
OPTION(prefix_1, "ignoreidl", ignoreidl, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "implib:", implib, Joined, INVALID, INVALID, nullptr, 0, 0,
"Import library name", nullptr, nullptr)
@ -97,14 +110,20 @@ OPTION(prefix_2, "include:", incl, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_1, "incremental:no", no_incremental, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "incremental", incremental, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "largeaddressaware:no", largeaddressaware_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable large addresses", nullptr, nullptr)
OPTION(prefix_1, "largeaddressaware", largeaddressaware, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
"Disable large addresses (default on 32-bit)", nullptr, nullptr)
OPTION(prefix_1, "largeaddressaware", largeaddressaware, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable large addresses (default on 64-bit)", nullptr, nullptr)
OPTION(prefix_1, "libpath:", libpath, Joined, INVALID, INVALID, nullptr, 0, 0,
"Additional library search path", nullptr, nullptr)
OPTION(prefix_1, "linkrepro:", linkrepro, Joined, INVALID, INVALID, nullptr, 0, 0,
"Dump linker invocation and input files for debugging", nullptr, nullptr)
OPTION(prefix_1, "lldltocache:", lldltocache, Joined, INVALID, INVALID, nullptr, 0, 0,
"Path to ThinLTO cached object file directory", nullptr, nullptr)
OPTION(prefix_1, "lldltocachepolicy:", lldltocachepolicy, Joined, INVALID, INVALID, nullptr, 0, 0,
"Pruning policy for the ThinLTO cache", nullptr, nullptr)
OPTION(prefix_2, "lldmap:", lldmap_file, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "lldmap", lldmap, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "lldmingw", lldmingw, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "lldsavetemps", lldsavetemps, Flag, INVALID, INVALID, nullptr, 0, 0,
"Save temporary files instead of deleting them", nullptr, nullptr)
OPTION(prefix_1, "machine:", machine, Joined, INVALID, INVALID, nullptr, 0, 0,
@ -126,28 +145,31 @@ OPTION(prefix_1, "merge:", merge, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_1, "mllvm:", mllvm, Joined, INVALID, INVALID, nullptr, 0, 0,
"Options to pass to LLVM", nullptr, nullptr)
OPTION(prefix_1, "msvclto", msvclto, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "natvis:", natvis, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "nodefaultlib:", nodefaultlib, Joined, INVALID, INVALID, nullptr, 0, 0,
"Remove a default library", nullptr, nullptr)
OPTION(prefix_1, "nodefaultlib", nodefaultlib_all, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "noentry", noentry, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "nologo", nologo, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "nopdb", nopdb, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable PDB generation for DWARF users", nullptr, nullptr)
OPTION(prefix_1, "nosymtab", nosymtab, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "nxcompat:no", nxcompat_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable data execution provention", nullptr, nullptr)
OPTION(prefix_1, "nxcompat", nxcompat, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "nxcompat", nxcompat, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable data execution prevention (default)", nullptr, nullptr)
OPTION(prefix_1, "opt:", opt, Joined, INVALID, INVALID, nullptr, 0, 0,
"Control optimizations", nullptr, nullptr)
OPTION(prefix_1, "out:", out, Joined, INVALID, INVALID, nullptr, 0, 0,
"Path to file to write output", nullptr, nullptr)
OPTION(prefix_2, "output-def:", output_def, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "pdb:", pdb, Joined, INVALID, INVALID, nullptr, 0, 0,
"PDB file path", nullptr, nullptr)
OPTION(prefix_1, "pdbaltpath:", pdbaltpath, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "profile", profile, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_3, "rsp-quoting=", rsp_quoting, Joined, INVALID, INVALID, nullptr, 0, 0,
"Quoting style for response files, 'windows' (default) or 'posix'", nullptr, nullptr)
OPTION(prefix_1, "safeseh:no", safeseh_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Produce an image with Safe Exception Handler", nullptr, nullptr)
OPTION(prefix_1, "safeseh", safeseh, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
"Don't produce an image with Safe Exception Handler", nullptr, nullptr)
OPTION(prefix_1, "safeseh", safeseh, Flag, INVALID, INVALID, nullptr, 0, 0,
"Produce an image with Safe Exception Handler (only for x86)", nullptr, nullptr)
OPTION(prefix_1, "section:", section, Joined, INVALID, INVALID, nullptr, 0, 0,
"Specify section attributes", nullptr, nullptr)
OPTION(prefix_1, "stack:", stack, Joined, INVALID, INVALID, nullptr, 0, 0,
@ -163,12 +185,27 @@ OPTION(prefix_1, "tlbid:", tlbid, Joined, INVALID, INVALID, nullptr, 0, 0, nullp
OPTION(prefix_1, "tlbout:", tlbout, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "tsaware:no", tsaware_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create non-Terminal Server aware executable", nullptr, nullptr)
OPTION(prefix_1, "tsaware", tsaware, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "tsaware", tsaware, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create Terminal Server aware executable (default)", nullptr, nullptr)
OPTION(prefix_1, "verbose:", verbose_all, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "verbose", verbose, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "version:", version, Joined, INVALID, INVALID, nullptr, 0, 0,
"Specify a version number in the PE header", nullptr, nullptr)
OPTION(prefix_1, "wx:no", wx_no, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "wx", wx, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_3, "", help_q, Flag, INVALID, help, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_3, "version", dash_dash_version, Flag, INVALID, INVALID, nullptr, 0, 0,
"Print version information", nullptr, nullptr)
OPTION(prefix_1, "wholearchive:", wholearchive_file, Joined, INVALID, INVALID, nullptr, 0, 0,
"Include all object files from this archive", nullptr, nullptr)
OPTION(prefix_1, "wholearchive", wholearchive_flag, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "WX:no", WX_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Don't treat warnings as errors", nullptr, nullptr)
OPTION(prefix_1, "WX", WX, Flag, INVALID, INVALID, nullptr, 0, 0,
"Treat warnings as errors", nullptr, nullptr)
OPTION(prefix_4, "", help_q, Flag, INVALID, help, nullptr, 0, 0, nullptr, nullptr, nullptr)
#endif // OPTION
#ifdef OPTTABLE_ARG_INIT
//////////
// Option Values
#endif // OPTTABLE_ARG_INIT

View File

@ -187,3 +187,10 @@ OPTION(prefix_1, "v", v, Flag, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_1, "Z", Z, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not search standard directories for libraries or frameworks", nullptr, nullptr)
#endif // OPTION
#ifdef OPTTABLE_ARG_INIT
//////////
// Option Values
#endif // OPTTABLE_ARG_INIT

View File

@ -52,19 +52,23 @@ OPTION(prefix_2, "build-id", build_id, Flag, INVALID, INVALID, nullptr, 0, 0,
"Generate build ID note", nullptr, nullptr)
OPTION(prefix_2, "b", alias_format_b, Separate, INVALID, format, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "call_shared", alias_Bdynamic_call_shared, Flag, INVALID, Bdynamic, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "chroot", chroot, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "color-diagnostics=", color_diagnostics_eq, Joined, INVALID, INVALID, nullptr, 0, 0,
"Use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_2, "color-diagnostics", color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_2, "compress-debug-sections=", compress_debug_sections, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "compress-debug-sections=", compress_debug_sections_eq, Joined, INVALID, compress_debug_sections, nullptr, 0, 0,
"Compress DWARF debug sections", nullptr, nullptr)
OPTION(prefix_3, "cref", cref, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "compress-debug-sections", compress_debug_sections, Separate, INVALID, INVALID, nullptr, 0, 0,
"Compress DWARF debug sections", nullptr, nullptr)
OPTION(prefix_2, "cref", cref, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "dc", alias_define_common_dc, Flag, INVALID, define_common, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "define-common", define_common, Flag, INVALID, INVALID, nullptr, 0, 0,
"Assign space to common symbols", nullptr, nullptr)
OPTION(prefix_2, "defsym=", defsym, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "defsym=", defsym_eq, Joined, INVALID, defsym, nullptr, 0, 0,
"Define a symbol alias", nullptr, nullptr)
OPTION(prefix_2, "defsym", defsym, Separate, INVALID, INVALID, nullptr, 0, 0,
"Define a symbol alias", nullptr, nullptr)
OPTION(prefix_2, "defsym", alias_defsym, Separate, INVALID, defsym, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "demangle", demangle, Flag, INVALID, INVALID, nullptr, 0, 0,
"Demangle symbol names", nullptr, nullptr)
OPTION(prefix_2, "detect-odr-violations", detect_odr_violations, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -81,7 +85,8 @@ OPTION(prefix_2, "dn", alias_Bstatic_dn, Flag, INVALID, Bstatic, nullptr, 0, 0,
OPTION(prefix_2, "dp", alias_define_common_dp, Flag, INVALID, define_common, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "dynamic-linker", dynamic_linker, Separate, INVALID, INVALID, nullptr, 0, 0,
"Which dynamic linker to use", nullptr, nullptr)
OPTION(prefix_2, "dynamic-list=", alias_dynamic_list, Joined, INVALID, dynamic_list, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "dynamic-list=", dynamic_list_eq, Joined, INVALID, dynamic_list, nullptr, 0, 0,
"Read a list of dynamic symbols", nullptr, nullptr)
OPTION(prefix_2, "dynamic-list", dynamic_list, Separate, INVALID, INVALID, nullptr, 0, 0,
"Read a list of dynamic symbols", nullptr, nullptr)
OPTION(prefix_2, "dy", alias_Bdynamic_dy, Flag, INVALID, Bdynamic, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -97,18 +102,22 @@ OPTION(prefix_2, "enable-new-dtags", enable_new_dtags, Flag, INVALID, INVALID, n
OPTION(prefix_2, "end-group", end_group, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "end-lib", end_lib, Flag, INVALID, INVALID, nullptr, 0, 0,
"End a grouping of objects that should be treated as if they were together in an archive", nullptr, nullptr)
OPTION(prefix_2, "entry=", alias_entry_entry, Joined, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "entry=", entry_eq, Joined, INVALID, entry, nullptr, 0, 0,
"Name of entry point symbol", "<entry>", nullptr)
OPTION(prefix_2, "entry", entry, Separate, INVALID, INVALID, nullptr, 0, 0,
"Name of entry point symbol", "<entry>", nullptr)
OPTION(prefix_2, "error-limit=", alias_error_limit, Joined, INVALID, error_limit, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "error-limit=", error_limit_eq, Joined, INVALID, error_limit, nullptr, 0, 0,
"Maximum number of errors to emit before stopping (0 = no limit)", nullptr, nullptr)
OPTION(prefix_2, "error-limit", error_limit, Separate, INVALID, INVALID, nullptr, 0, 0,
"Maximum number of errors to emit before stopping (0 = no limit)", nullptr, nullptr)
OPTION(prefix_2, "error-unresolved-symbols", error_unresolved_symbols, Flag, INVALID, INVALID, nullptr, 0, 0,
"Report unresolved symbols as errors", nullptr, nullptr)
OPTION(prefix_2, "exclude-libs=", alias_exclude_libs, Joined, INVALID, exclude_libs, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "exclude-libs=", exclude_libs_eq, Joined, INVALID, exclude_libs, nullptr, 0, 0,
"Exclude static libraries from automatic export", nullptr, nullptr)
OPTION(prefix_2, "exclude-libs", exclude_libs, Separate, INVALID, INVALID, nullptr, 0, 0,
"Exclude static libraries from automatic export", nullptr, nullptr)
OPTION(prefix_2, "export-dynamic-symbol=", alias_export_dynamic_symbol, Joined, INVALID, export_dynamic_symbol, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "export-dynamic-symbol=", export_dynamic_symbol_eq, Joined, INVALID, export_dynamic_symbol, nullptr, 0, 0,
"Put a symbol in the dynamic symbol table", nullptr, nullptr)
OPTION(prefix_2, "export-dynamic-symbol", export_dynamic_symbol, Separate, INVALID, INVALID, nullptr, 0, 0,
"Put a symbol in the dynamic symbol table", nullptr, nullptr)
OPTION(prefix_2, "export-dynamic", export_dynamic, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -117,12 +126,19 @@ OPTION(prefix_1, "E", alias_export_dynamic_E, Flag, INVALID, export_dynamic, nul
OPTION(prefix_1, "e", alias_entry_e, JoinedOrSeparate, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "fatal-warnings", fatal_warnings, Flag, INVALID, INVALID, nullptr, 0, 0,
"Treat warnings as errors", nullptr, nullptr)
OPTION(prefix_2, "filter=", filter, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "filter=", filter_eq, Joined, INVALID, filter, nullptr, 0, 0,
"Set DT_FILTER field to the specified name", nullptr, nullptr)
OPTION(prefix_2, "fini=", alias_fini_fini, Joined, INVALID, fini, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "filter", filter, Separate, INVALID, INVALID, nullptr, 0, 0,
"Set DT_FILTER field to the specified name", nullptr, nullptr)
OPTION(prefix_2, "fini=", fini_eq, Joined, INVALID, fini, nullptr, 0, 0,
"Specify a finalizer function", "<symbol>", nullptr)
OPTION(prefix_2, "fini", fini, Separate, INVALID, INVALID, nullptr, 0, 0,
"Specify a finalizer function", "<symbol>", nullptr)
OPTION(prefix_2, "format=", format, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "fix-cortex-a53-843419", fix_cortex_a53_843419, Flag, INVALID, INVALID, nullptr, 0, 0,
"Apply fixes for AArch64 Cortex-A53 erratum 843419", nullptr, nullptr)
OPTION(prefix_2, "format=", format_eq, Joined, INVALID, format, nullptr, 0, 0,
"Change the input format of the inputs following this option", "<input-format>", nullptr)
OPTION(prefix_2, "format", format, Separate, INVALID, INVALID, nullptr, 0, 0,
"Change the input format of the inputs following this option", "<input-format>", nullptr)
OPTION(prefix_2, "full-shutdown", full_shutdown, Flag, INVALID, INVALID, nullptr, 0, 0,
"Perform a full shutdown instead of calling _exit", nullptr, nullptr)
@ -134,23 +150,36 @@ OPTION(prefix_2, "gdb-index", gdb_index, Flag, INVALID, INVALID, nullptr, 0, 0,
"Generate .gdb_index section", nullptr, nullptr)
OPTION(prefix_1, "G", G, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "g", g, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "hash-style=", alias_hash_style_hash_style, Joined, INVALID, hash_style, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "hash-style=", hash_style_eq, Joined, INVALID, hash_style, nullptr, 0, 0,
"Specify hash style (sysv, gnu or both)", nullptr, nullptr)
OPTION(prefix_2, "hash-style", hash_style, Separate, INVALID, INVALID, nullptr, 0, 0,
"Specify hash style (sysv, gnu or both)", nullptr, nullptr)
OPTION(prefix_2, "help", help, Flag, INVALID, INVALID, nullptr, 0, 0,
"Print option help", nullptr, nullptr)
OPTION(prefix_1, "h", alias_soname_h, JoinedOrSeparate, INVALID, soname, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "icf-data", icf_data, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable ICF to also fold identical read only data", nullptr, nullptr)
OPTION(prefix_2, "icf=all", icf_all, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable identical code folding", nullptr, nullptr)
OPTION(prefix_2, "icf=none", icf_none, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable identical code folding", nullptr, nullptr)
OPTION(prefix_2, "image-base=", image_base, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "image-base=", image_base_eq, Joined, INVALID, image_base, nullptr, 0, 0,
"Set the base address", nullptr, nullptr)
OPTION(prefix_2, "init=", alias_init_init, Joined, INVALID, init, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "image-base", image_base, Separate, INVALID, INVALID, nullptr, 0, 0,
"Set the base address", nullptr, nullptr)
OPTION(prefix_2, "init=", init_eq, Joined, INVALID, init, nullptr, 0, 0,
"Specify an initializer function", "<symbol>", nullptr)
OPTION(prefix_2, "init", init, Separate, INVALID, INVALID, nullptr, 0, 0,
"Specify an initializer function", "<symbol>", nullptr)
OPTION(prefix_2, "library-path=", alias_L__library_path, Joined, INVALID, L, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "library=", alias_l__library, Joined, INVALID, l, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "library-path=", library_path_eq, Joined, INVALID, library_path, nullptr, 0, 0,
"Add a directory to the library search path", "<dir>", nullptr)
OPTION(prefix_2, "library-path", library_path, Separate, INVALID, INVALID, nullptr, 0, 0,
"Add a directory to the library search path", "<dir>", nullptr)
OPTION(prefix_2, "library=", library_eq, Joined, INVALID, library, nullptr, 0, 0,
"Root name of library to use", "<libName>", nullptr)
OPTION(prefix_2, "library", library, Separate, INVALID, INVALID, nullptr, 0, 0,
"Root name of library to use", "<libName>", nullptr)
OPTION(prefix_2, "long-plt", long_plt, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "lto-aa-pipeline=", lto_aa_pipeline, Joined, INVALID, INVALID, nullptr, 0, 0,
"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes", nullptr, nullptr)
OPTION(prefix_2, "lto-newpm-passes=", lto_newpm_passes, Joined, INVALID, INVALID, nullptr, 0, 0,
@ -159,13 +188,14 @@ OPTION(prefix_2, "lto-O", lto_O, Joined, INVALID, INVALID, nullptr, 0, 0,
"Optimization level for LTO", "<opt-level>", nullptr)
OPTION(prefix_2, "lto-partitions=", lto_partitions, Joined, INVALID, INVALID, nullptr, 0, 0,
"Number of LTO codegen partitions", nullptr, nullptr)
OPTION(prefix_1, "L", L, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Add a directory to the library search path", "<dir>", nullptr)
OPTION(prefix_1, "l", l, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Root name of library to use", "<libName>", nullptr)
OPTION(prefix_2, "Map=", alias_Map_eq, Joined, INVALID, Map, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "Map", Map, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_1, "L", alias_library_path, JoinedOrSeparate, INVALID, library_path, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "l", alias_library, JoinedOrSeparate, INVALID, library, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "Map=", Map_eq, Joined, INVALID, Map, nullptr, 0, 0,
"Print a link map to the specified file", nullptr, nullptr)
OPTION(prefix_2, "Map", Map, Separate, INVALID, INVALID, nullptr, 0, 0,
"Print a link map to the specified file", nullptr, nullptr)
OPTION(prefix_2, "merge-exidx-entries", merge_exidx_entries, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable merging .ARM.exidx entries", nullptr, nullptr)
OPTION(prefix_2, "mllvm", mllvm, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "M", alias_print_map_M, Flag, INVALID, print_map, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "m", m, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
@ -176,21 +206,32 @@ OPTION(prefix_2, "no-as-needed", no_as_needed, Flag, INVALID, INVALID, nullptr,
"Always DT_NEEDED for shared libraries", nullptr, nullptr)
OPTION(prefix_2, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_2, "no-copy-dt-needed-entries", no_copy_dt_needed_entries, Flag, INVALID, no_add_needed, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-copy-dt-needed-entries", no_copy_dt_needed_entries, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-ctors-in-init-array", no_ctors_in_init_array, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-define-common", no_define_common, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not assign space to common symbols", nullptr, nullptr)
OPTION(prefix_2, "no-demangle", no_demangle, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not demangle symbol names", nullptr, nullptr)
OPTION(prefix_2, "no-dynamic-linker", no_dynamic_linker, Flag, INVALID, INVALID, nullptr, 0, 0,
"Inhibit output of .interp section", nullptr, nullptr)
OPTION(prefix_2, "no-eh-frame-hdr", no_eh_frame_hdr, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not create .eh_frame_hdr section", nullptr, nullptr)
OPTION(prefix_2, "no-export-dynamic", no_export_dynamic, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-fatal-warnings", no_fatal_warnings, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-gc-sections", no_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable garbage collection of unused sections", nullptr, nullptr)
OPTION(prefix_2, "no-gdb-index", no_gdb_index, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not generate .gdb_index section", nullptr, nullptr)
OPTION(prefix_2, "no-gnu-unique", no_gnu_unique, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable STB_GNU_UNIQUE symbol binding", nullptr, nullptr)
OPTION(prefix_2, "no-keep-memory", no_keep_memory, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-merge-exidx-entries", no_merge_exidx_entries, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable merging .ARM.exidx entries", nullptr, nullptr)
OPTION(prefix_2, "no-mmap-output-file", no_mmap_output_file, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_3, "no-omagic", no_omagic, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not set the text data sections to be writable", "<magic>", nullptr)
OPTION(prefix_2, "no-print-gc-sections", no_print_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not list removed unused sections", nullptr, nullptr)
OPTION(prefix_2, "no-rosegment", no_rosegment, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not put read-only non-executable sections in their own segment", nullptr, nullptr)
OPTION(prefix_2, "no-threads", no_threads, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -219,17 +260,25 @@ OPTION(prefix_3, "opt-remarks-filename", opt_remarks_filename, Separate, INVALID
"YAML output file for optimization remarks", nullptr, nullptr)
OPTION(prefix_3, "opt-remarks-with-hotness", opt_remarks_with_hotness, Flag, INVALID, INVALID, nullptr, 0, 0,
"Include hotness informations in the optimization remarks file", nullptr, nullptr)
OPTION(prefix_2, "orphan-handling=", orphan_handling_eq, Joined, INVALID, orphan_handling, nullptr, 0, 0,
"Control how orphan sections are handled when linker script used", nullptr, nullptr)
OPTION(prefix_2, "orphan-handling", orphan_handling, Separate, INVALID, INVALID, nullptr, 0, 0,
"Control how orphan sections are handled when linker script used", nullptr, nullptr)
OPTION(prefix_3, "output=", alias_o_output, Joined, INVALID, o, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_3, "output", alias_o_output2, Separate, INVALID, o, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "O", O, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_1, "O", O, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Optimize output file size", nullptr, nullptr)
OPTION(prefix_1, "o", o, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Path to file to write output", "<path>", nullptr)
OPTION(prefix_2, "pack-dyn-relocs=", pack_dyn_relocs_eq, Joined, INVALID, INVALID, nullptr, 0, 0,
"Pack dynamic relocations in the given format (none or android)", "<format>", nullptr)
OPTION(prefix_2, "pic-executable", alias_pie_pic_executable, Flag, INVALID, pie, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "pie", pie, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create a position independent executable", nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=", plugin_opt_eq, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin-opt", plugin_opt, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=", plugin_opt_eq, Joined, INVALID, plugin_opt, nullptr, 0, 0,
"specifies LTO options for compatibility with GNU linkers", nullptr, nullptr)
OPTION(prefix_2, "plugin-opt", plugin_opt, Separate, INVALID, INVALID, nullptr, 0, 0,
"specifies LTO options for compatibility with GNU linkers", nullptr, nullptr)
OPTION(prefix_2, "plugin=", plugin_eq, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin", plugin, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "print-gc-sections", print_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -240,34 +289,42 @@ OPTION(prefix_2, "Qy", Qy, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullp
OPTION(prefix_1, "q", alias_emit_relocs, Flag, INVALID, emit_relocs, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "relocatable", relocatable, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create relocatable object file", nullptr, nullptr)
OPTION(prefix_2, "reproduce=", alias_reproduce_eq, Joined, INVALID, reproduce, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "reproduce=", reproduce_eq, Joined, INVALID, reproduce, nullptr, 0, 0,
"Dump linker invocation and input files for debugging", nullptr, nullptr)
OPTION(prefix_2, "reproduce", reproduce, Separate, INVALID, INVALID, nullptr, 0, 0,
"Dump linker invocation and input files for debugging", nullptr, nullptr)
OPTION(prefix_2, "retain-symbols-file=", retain_symbols_file, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "retain-symbols-file=", retain_symbols_file_eq, Joined, INVALID, retain_symbols_file, nullptr, 0, 0,
"Retain only the symbols listed in the file", "<file>", nullptr)
OPTION(prefix_2, "retain-symbols-file", retain_symbols_file, Separate, INVALID, INVALID, nullptr, 0, 0,
"Retain only the symbols listed in the file", "<file>", nullptr)
OPTION(prefix_2, "retain-symbols-file", alias_retain_symbols_file, Separate, INVALID, retain_symbols_file, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "rpath-link=", rpath_link_eq, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "rpath-link", rpath_link, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "rpath=", alias_rpath_rpath, Joined, INVALID, rpath, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "rpath=", rpath_eq, Joined, INVALID, rpath, nullptr, 0, 0,
"Add a DT_RUNPATH to the output", nullptr, nullptr)
OPTION(prefix_2, "rpath", rpath, Separate, INVALID, INVALID, nullptr, 0, 0,
"Add a DT_RUNPATH to the output", nullptr, nullptr)
OPTION(prefix_2, "rsp-quoting=", rsp_quoting, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "rsp-quoting=", rsp_quoting_eq, Joined, INVALID, rsp_quoting, nullptr, 0, 0,
"Quoting style for response files. Values supported: windows|posix", nullptr, nullptr)
OPTION(prefix_2, "rsp-quoting", rsp_quoting, Separate, INVALID, INVALID, nullptr, 0, 0,
"Quoting style for response files. Values supported: windows|posix", nullptr, nullptr)
OPTION(prefix_1, "R", alias_rpath_R, JoinedOrSeparate, INVALID, rpath, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "r", alias_relocatable_r, Flag, INVALID, relocatable, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "save-temps", save_temps, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "script=", alias_script, Joined, INVALID, script, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "script=", script_eq, Joined, INVALID, script, nullptr, 0, 0,
"Read linker script", nullptr, nullptr)
OPTION(prefix_2, "script", script, Separate, INVALID, INVALID, nullptr, 0, 0,
"Read linker script", nullptr, nullptr)
OPTION(prefix_2, "section-start", section_start, Separate, INVALID, INVALID, nullptr, 0, 0,
"Set address of section", "<address>", nullptr)
OPTION(prefix_2, "shared", shared, Flag, INVALID, INVALID, nullptr, 0, 0,
"Build a shared object", nullptr, nullptr)
OPTION(prefix_2, "soname=", soname, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "soname=", soname_eq, Joined, INVALID, soname, nullptr, 0, 0,
"Set DT_SONAME", nullptr, nullptr)
OPTION(prefix_2, "soname", soname, Separate, INVALID, INVALID, nullptr, 0, 0,
"Set DT_SONAME", nullptr, nullptr)
OPTION(prefix_2, "soname", alias_soname_soname, Separate, INVALID, soname, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "sort-common", sort_common, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "sort-section=", alias_sort_section, Joined, INVALID, sort_section, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "sort-section=", sort_section_eq, Joined, INVALID, sort_section, nullptr, 0, 0,
"Specifies sections sorting rule when linkerscript is used", nullptr, nullptr)
OPTION(prefix_2, "sort-section", sort_section, Separate, INVALID, INVALID, nullptr, 0, 0,
"Specifies sections sorting rule when linkerscript is used", nullptr, nullptr)
OPTION(prefix_2, "start-group", start_group, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -281,7 +338,9 @@ OPTION(prefix_2, "strip-debug", strip_debug, Flag, INVALID, INVALID, nullptr, 0,
"Strip debugging information", nullptr, nullptr)
OPTION(prefix_2, "symbol-ordering-file", symbol_ordering_file, Separate, INVALID, INVALID, nullptr, 0, 0,
"Layout sections in the order specified by symbol file", nullptr, nullptr)
OPTION(prefix_2, "sysroot=", sysroot, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "sysroot=", sysroot_eq, Joined, INVALID, sysroot, nullptr, 0, 0,
"Set the system root", nullptr, nullptr)
OPTION(prefix_2, "sysroot", sysroot, Separate, INVALID, INVALID, nullptr, 0, 0,
"Set the system root", nullptr, nullptr)
OPTION(prefix_1, "S", alias_strip_debug_S, Flag, INVALID, strip_debug, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "s", alias_strip_all, Flag, INVALID, strip_all, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -289,12 +348,16 @@ OPTION(prefix_2, "target1-abs", target1_abs, Flag, INVALID, INVALID, nullptr, 0,
"Interpret R_ARM_TARGET1 as R_ARM_ABS32", nullptr, nullptr)
OPTION(prefix_2, "target1-rel", target1_rel, Flag, INVALID, INVALID, nullptr, 0, 0,
"Interpret R_ARM_TARGET1 as R_ARM_REL32", nullptr, nullptr)
OPTION(prefix_2, "target2=", target2, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "target2=", target2_eq, Joined, INVALID, target2, nullptr, 0, 0,
"Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel", "<type>", nullptr)
OPTION(prefix_2, "Tbss=", alias_Tbss, Joined, INVALID, Tbss, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "target2", target2, Separate, INVALID, INVALID, nullptr, 0, 0,
"Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel", "<type>", nullptr)
OPTION(prefix_2, "Tbss=", Tbss_eq, Joined, INVALID, Tbss, nullptr, 0, 0,
"Same as --section-start with .bss as the sectionname", nullptr, nullptr)
OPTION(prefix_2, "Tbss", Tbss, Separate, INVALID, INVALID, nullptr, 0, 0,
"Same as --section-start with .bss as the sectionname", nullptr, nullptr)
OPTION(prefix_2, "Tdata=", alias_Tdata, Joined, INVALID, Tdata, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "Tdata=", Tdata_eq, Joined, INVALID, Tdata, nullptr, 0, 0,
"Same as --section-start with .data as the sectionname", nullptr, nullptr)
OPTION(prefix_2, "Tdata", Tdata, Separate, INVALID, INVALID, nullptr, 0, 0,
"Same as --section-start with .data as the sectionname", nullptr, nullptr)
OPTION(prefix_2, "thinlto-cache-dir=", thinlto_cache_dir, Joined, INVALID, INVALID, nullptr, 0, 0,
@ -305,27 +368,33 @@ OPTION(prefix_2, "thinlto-jobs=", thinlto_jobs, Joined, INVALID, INVALID, nullpt
"Number of ThinLTO jobs", nullptr, nullptr)
OPTION(prefix_2, "threads", threads, Flag, INVALID, INVALID, nullptr, 0, 0,
"Run the linker multi-threaded", nullptr, nullptr)
OPTION(prefix_2, "trace-symbol=", trace_trace_symbol_eq, Joined, INVALID, trace_symbol, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "trace-symbol=", trace_symbol_eq, Joined, INVALID, trace_symbol, nullptr, 0, 0,
"Trace references to symbols", nullptr, nullptr)
OPTION(prefix_2, "trace-symbol", trace_symbol, Separate, INVALID, INVALID, nullptr, 0, 0,
"Trace references to symbols", nullptr, nullptr)
OPTION(prefix_2, "trace", trace, Flag, INVALID, INVALID, nullptr, 0, 0,
"Print the names of the input files", nullptr, nullptr)
OPTION(prefix_2, "Ttext-segment=", alias_Ttext_segment_eq, Joined, INVALID, Ttext, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "Ttext-segment", alias_Ttext_segment, Separate, INVALID, Ttext, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "Ttext=", alias_Ttext, Joined, INVALID, Ttext, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "Ttext=", Ttext_eq, Joined, INVALID, Ttext, nullptr, 0, 0,
"Same as --section-start with .text as the sectionname", nullptr, nullptr)
OPTION(prefix_2, "Ttext", Ttext, Separate, INVALID, INVALID, nullptr, 0, 0,
"Same as --section-start with .text as the sectionname", nullptr, nullptr)
OPTION(prefix_1, "T", alias_script_T, JoinedOrSeparate, INVALID, script, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "t", alias_trace, Flag, INVALID, trace, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "undefined=", alias_undefined_eq, Joined, INVALID, undefined, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "undefined=", undefined_eq, Joined, INVALID, undefined, nullptr, 0, 0,
"Force undefined symbol during linking", nullptr, nullptr)
OPTION(prefix_2, "undefined", undefined, Separate, INVALID, INVALID, nullptr, 0, 0,
"Force undefined symbol during linking", nullptr, nullptr)
OPTION(prefix_2, "unresolved-symbols=", unresolved_symbols, Joined, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "unresolved-symbols=", unresolved_symbols_eq, Joined, INVALID, unresolved_symbols, nullptr, 0, 0,
"Determine how to handle unresolved symbols", nullptr, nullptr)
OPTION(prefix_2, "unresolved-symbols", unresolved_symbols, Separate, INVALID, INVALID, nullptr, 0, 0,
"Determine how to handle unresolved symbols", nullptr, nullptr)
OPTION(prefix_1, "u", alias_undefined_u, JoinedOrSeparate, INVALID, undefined, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "verbose", verbose, Flag, INVALID, INVALID, nullptr, 0, 0,
"Verbose mode", nullptr, nullptr)
OPTION(prefix_2, "version-script=", alias_version_script_eq, Joined, INVALID, version_script, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "version-script=", version_script_eq, Joined, INVALID, version_script, nullptr, 0, 0,
"Read a version script", nullptr, nullptr)
OPTION(prefix_2, "version-script", version_script, Separate, INVALID, INVALID, nullptr, 0, 0,
"Read a version script", nullptr, nullptr)
OPTION(prefix_2, "version", version, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -336,12 +405,14 @@ OPTION(prefix_1, "v", v, Flag, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "warn-common", warn_common, Flag, INVALID, INVALID, nullptr, 0, 0,
"Warn about duplicate common symbols", nullptr, nullptr)
OPTION(prefix_2, "warn-execstack", warn_execstack, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "warn-once", warn_once, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "warn-shared-textrel", warn_shared_textrel, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "warn-unresolved-symbols", warn_unresolved_symbols, Flag, INVALID, INVALID, nullptr, 0, 0,
"Report unresolved symbols as warnings", nullptr, nullptr)
OPTION(prefix_2, "whole-archive", whole_archive, Flag, INVALID, INVALID, nullptr, 0, 0,
"Force load of all members in a static library", nullptr, nullptr)
OPTION(prefix_2, "wrap=", alias_wrap_wrap, Joined, INVALID, wrap, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "wrap=", wrap_eq, Joined, INVALID, wrap, nullptr, 0, 0,
"Use wrapper functions for symbol", "<symbol>", nullptr)
OPTION(prefix_2, "wrap", wrap, Separate, INVALID, INVALID, nullptr, 0, 0,
"Use wrapper functions for symbol", "<symbol>", nullptr)
OPTION(prefix_1, "X", alias_discard_locals_X, Flag, INVALID, discard_locals, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -350,3 +421,10 @@ OPTION(prefix_1, "y", alias_trace_symbol_y, JoinedOrSeparate, INVALID, trace_sym
OPTION(prefix_1, "z", z, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Linker option extensions", "<option>", nullptr)
#endif // OPTION
#ifdef OPTTABLE_ARG_INIT
//////////
// Option Values
#endif // OPTTABLE_ARG_INIT

114
deps/lld-prebuilt/MinGW/Options.inc vendored Normal file
View File

@ -0,0 +1,114 @@
/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\
|* *|
|* Option Parsing Definitions *|
|* *|
|* Automatically generated file, do not edit! *|
|* *|
\*===----------------------------------------------------------------------===*/
/////////
// Prefixes
#ifdef PREFIX
#define COMMA ,
PREFIX(prefix_0, {nullptr})
PREFIX(prefix_1, {"-" COMMA nullptr})
PREFIX(prefix_3, {"--" COMMA nullptr})
PREFIX(prefix_2, {"--" COMMA "-" COMMA nullptr})
#undef COMMA
#endif // PREFIX
/////////
// Groups
#ifdef OPTION
//////////
// Options
OPTION(prefix_0, "<input>", INPUT, Input, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_0, "<unknown>", UNKNOWN, Unknown, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "###", _HASH_HASH_HASH, Flag, INVALID, INVALID, nullptr, 0, 0,
"Print (but do not run) the commands to run for this compilation", nullptr, nullptr)
OPTION(prefix_2, "Bdynamic", Bdynamic, Flag, INVALID, INVALID, nullptr, 0, 0,
"Link against shared libraries", nullptr, nullptr)
OPTION(prefix_2, "Bstatic", Bstatic, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not link against shared libraries", nullptr, nullptr)
OPTION(prefix_2, "build-id", build_id, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "disable-auto-image-base", disable_auto_image_base, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "dynamicbase", dynamicbase, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable ASLR", nullptr, nullptr)
OPTION(prefix_2, "enable-auto-image-base", enable_auto_image_base, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "enable-auto-import", enable_auto_import, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "entry", entry, Separate, INVALID, INVALID, nullptr, 0, 0,
"Name of entry point symbol", "<entry>", nullptr)
OPTION(prefix_2, "export-all-symbols", export_all_symbols, Flag, INVALID, INVALID, nullptr, 0, 0,
"Export all symbols even if a def file or dllexport attributes are used", nullptr, nullptr)
OPTION(prefix_1, "e", alias_entry_e, JoinedOrSeparate, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_3, "full-shutdown", full_shutdown, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "gc-sections", gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"Remove unused sections", nullptr, nullptr)
OPTION(prefix_2, "high-entropy-va", high_entropy_va, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable 64-bit ASLR", nullptr, nullptr)
OPTION(prefix_2, "icf=", icf, Joined, INVALID, INVALID, nullptr, 0, 0,
"Identical code folding", nullptr, nullptr)
OPTION(prefix_2, "image-base", image_base, Separate, INVALID, INVALID, nullptr, 0, 0,
"Base address of the program", nullptr, nullptr)
OPTION(prefix_3, "large-address-aware", large_address_aware, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable large addresses", nullptr, nullptr)
OPTION(prefix_1, "L", L, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Add a directory to the library search path", "<dir>", nullptr)
OPTION(prefix_1, "l", l, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Root name of library to use", "<libName>", nullptr)
OPTION(prefix_2, "major-image-version", major_image_version, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "minor-image-version", minor_image_version, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "mllvm", mllvm, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "m", m, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Set target emulation", nullptr, nullptr)
OPTION(prefix_2, "no-gc-sections", no_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"Don't remove unused sections", nullptr, nullptr)
OPTION(prefix_2, "no-seh", no_seh, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-whole-archive", no_whole_archive, Flag, INVALID, INVALID, nullptr, 0, 0,
"No longer include all object files for following archives", nullptr, nullptr)
OPTION(prefix_2, "nxcompat", nxcompat, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable data execution prevention", nullptr, nullptr)
OPTION(prefix_3, "out-implib=", out_implib_eq, Joined, INVALID, out_implib, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_3, "out-implib", out_implib, Separate, INVALID, INVALID, nullptr, 0, 0,
"Import library name", nullptr, nullptr)
OPTION(prefix_2, "output-def", output_def, Separate, INVALID, INVALID, nullptr, 0, 0,
"Output def file", nullptr, nullptr)
OPTION(prefix_1, "O", O, Joined, INVALID, INVALID, nullptr, 0, 0,
"Optimize output file size", nullptr, nullptr)
OPTION(prefix_1, "o", o, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Path to file to write output", "<path>", nullptr)
OPTION(prefix_2, "pic-executable", pic_executable, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "shared", shared, Flag, INVALID, INVALID, nullptr, 0, 0,
"Build a shared object", nullptr, nullptr)
OPTION(prefix_2, "stack", stack, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "strip-all", strip_all, Flag, INVALID, INVALID, nullptr, 0, 0,
"Omit all symbol information from the output binary", nullptr, nullptr)
OPTION(prefix_2, "subsystem", subs, Separate, INVALID, INVALID, nullptr, 0, 0,
"Specify subsystem", nullptr, nullptr)
OPTION(prefix_2, "sysroot", sysroot, Joined, INVALID, INVALID, nullptr, 0, 0,
"Sysroot", nullptr, nullptr)
OPTION(prefix_1, "s", alias_strip_s, Flag, INVALID, strip_all, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "tsaware", tsaware, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create Terminal Server aware executable", nullptr, nullptr)
OPTION(prefix_2, "verbose", verbose, Flag, INVALID, INVALID, nullptr, 0, 0,
"Verbose mode", nullptr, nullptr)
OPTION(prefix_2, "version", version, Flag, INVALID, INVALID, nullptr, 0, 0,
"Display the version number and exit", nullptr, nullptr)
OPTION(prefix_1, "v", v, Flag, INVALID, INVALID, nullptr, 0, 0,
"Display the version number", nullptr, nullptr)
OPTION(prefix_2, "whole-archive", whole_archive, Flag, INVALID, INVALID, nullptr, 0, 0,
"Include all object files for following archives", nullptr, nullptr)
OPTION(prefix_2, "Xlink=", Xlink, Joined, INVALID, INVALID, nullptr, 0, 0,
"Pass <arg> to the COFF linker", "<arg>", nullptr)
#endif // OPTION
#ifdef OPTTABLE_ARG_INIT
//////////
// Option Values
#endif // OPTTABLE_ARG_INIT

View File

@ -1,6 +1,6 @@
#define LLD_VERSION 5.0.1
#define LLD_VERSION_STRING "5.0.1"
#define LLD_VERSION_MAJOR 5
#define LLD_VERSION 6.0.0
#define LLD_VERSION_STRING "6.0.0"
#define LLD_VERSION_MAJOR 6
#define LLD_VERSION_MINOR 0
#define LLD_REVISION_STRING ""
#define LLD_REPOSITORY_STRING ""

106
deps/lld-prebuilt/wasm/Options.inc vendored Normal file
View File

@ -0,0 +1,106 @@
/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\
|* *|
|* Option Parsing Definitions *|
|* *|
|* Automatically generated file, do not edit! *|
|* *|
\*===----------------------------------------------------------------------===*/
/////////
// Prefixes
#ifdef PREFIX
#define COMMA ,
PREFIX(prefix_0, {nullptr})
PREFIX(prefix_2, {"-" COMMA nullptr})
PREFIX(prefix_1, {"--" COMMA "-" COMMA nullptr})
#undef COMMA
#endif // PREFIX
/////////
// Groups
#ifdef OPTION
//////////
// Options
OPTION(prefix_0, "<input>", INPUT, Input, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_0, "<unknown>", UNKNOWN, Unknown, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "allow-undefined-file=", allow_undefined_file, Joined, INVALID, INVALID, nullptr, 0, 0,
"Allow symbols listed in <file> to be undefined in linked binary", nullptr, nullptr)
OPTION(prefix_2, "allow-undefined-file", allow_undefined_file_s, Separate, INVALID, allow_undefined_file, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "allow-undefined", allow_undefined, Flag, INVALID, INVALID, nullptr, 0, 0,
"Allow undefined symbols in linked binary", nullptr, nullptr)
OPTION(prefix_1, "check-signatures", check_signatures, Flag, INVALID, INVALID, nullptr, 0, 0,
"Check function signatures", nullptr, nullptr)
OPTION(prefix_1, "color-diagnostics=", color_diagnostics_eq, Joined, INVALID, INVALID, nullptr, 0, 0,
"Use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_1, "color-diagnostics", color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_1, "emit-relocs", emit_relocs, Flag, INVALID, INVALID, nullptr, 0, 0,
"Generate relocations in output", nullptr, nullptr)
OPTION(prefix_1, "entry=", alias_entry_entry, Joined, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "entry", entry, Separate, INVALID, INVALID, nullptr, 0, 0,
"Name of entry point symbol", "<entry>", nullptr)
OPTION(prefix_1, "error-limit=", error_limit, Joined, INVALID, INVALID, nullptr, 0, 0,
"Maximum number of errors to emit before stopping (0 = no limit)", nullptr, nullptr)
OPTION(prefix_2, "e", alias_entry_e, JoinedOrSeparate, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "global-base=", global_base, Joined, INVALID, INVALID, nullptr, 0, 0,
"Where to start to place global data", nullptr, nullptr)
OPTION(prefix_1, "help", help, Flag, INVALID, INVALID, nullptr, 0, 0,
"Print option help", nullptr, nullptr)
OPTION(prefix_1, "import-memory", import_memory, Flag, INVALID, INVALID, nullptr, 0, 0,
"Import memory from the environment", nullptr, nullptr)
OPTION(prefix_1, "initial-memory=", initial_memory, Joined, INVALID, INVALID, nullptr, 0, 0,
"Initial size of the linear memory", nullptr, nullptr)
OPTION(prefix_2, "i", alias_initial_memory_i, Flag, INVALID, initial_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "L", L, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Add a directory to the library search path", "<dir>", nullptr)
OPTION(prefix_2, "l", l, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Root name of library to use", "<libName>", nullptr)
OPTION(prefix_1, "max-memory=", max_memory, Joined, INVALID, INVALID, nullptr, 0, 0,
"Maximum size of the linear memory", nullptr, nullptr)
OPTION(prefix_1, "mllvm", mllvm, Separate, INVALID, INVALID, nullptr, 0, 0,
"Options to pass to LLVM", nullptr, nullptr)
OPTION(prefix_2, "m", alias_max_memory_m, Flag, INVALID, max_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "no-check-signatures", no_check_signatures, Flag, INVALID, INVALID, nullptr, 0, 0,
"Don't check function signatures", nullptr, nullptr)
OPTION(prefix_1, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_1, "no-entry", no_entry, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not output any entry point", nullptr, nullptr)
OPTION(prefix_1, "no-threads", no_threads, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not run the linker multi-threaded", nullptr, nullptr)
OPTION(prefix_2, "o", o, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Path to file to write output", "<path>", nullptr)
OPTION(prefix_1, "relocatable", relocatable, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create relocatable object file", nullptr, nullptr)
OPTION(prefix_2, "r", alias_relocatable_r, Flag, INVALID, relocatable, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "strip-all", strip_all, Flag, INVALID, INVALID, nullptr, 0, 0,
"Strip all symbols", nullptr, nullptr)
OPTION(prefix_1, "strip-debug", strip_debug, Flag, INVALID, INVALID, nullptr, 0, 0,
"Strip debugging information", nullptr, nullptr)
OPTION(prefix_1, "threads", threads, Flag, INVALID, INVALID, nullptr, 0, 0,
"Run the linker multi-threaded", nullptr, nullptr)
OPTION(prefix_1, "undefined=", undefined_eq, Joined, INVALID, undefined, nullptr, 0, 0,
"Force undefined symbol during linking", nullptr, nullptr)
OPTION(prefix_1, "undefined", undefined, Separate, INVALID, INVALID, nullptr, 0, 0,
"Force undefined symbol during linking", nullptr, nullptr)
OPTION(prefix_2, "u", alias_undefined_u, JoinedOrSeparate, INVALID, undefined, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "verbose", verbose, Flag, INVALID, INVALID, nullptr, 0, 0,
"Verbose mode", nullptr, nullptr)
OPTION(prefix_1, "version", version, Flag, INVALID, INVALID, nullptr, 0, 0,
"Display the version number and exit", nullptr, nullptr)
OPTION(prefix_2, "v", v, Flag, INVALID, INVALID, nullptr, 0, 0,
"Display the version number", nullptr, nullptr)
OPTION(prefix_2, "z", z, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Linker option extensions", "<option>", nullptr)
#endif // OPTION
#ifdef OPTTABLE_ARG_INIT
//////////
// Option Values
#endif // OPTTABLE_ARG_INIT

2
deps/lld/.arcconfig vendored
View File

@ -1,4 +1,4 @@
{
"project_id" : "lld",
"repository.callsign" : "LLD",
"conduit_uri" : "https://reviews.llvm.org/"
}

View File

@ -160,8 +160,8 @@ endif ()
# Configure the Version.inc file.
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Config/Version.inc.in
${CMAKE_CURRENT_BINARY_DIR}/include/lld/Config/Version.inc)
${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Common/Version.inc.in
${CMAKE_CURRENT_BINARY_DIR}/include/lld/Common/Version.inc)
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
@ -210,6 +210,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
)
endif()
add_subdirectory(Common)
add_subdirectory(lib)
add_subdirectory(tools/lld)
@ -221,4 +222,5 @@ endif()
add_subdirectory(docs)
add_subdirectory(COFF)
add_subdirectory(ELF)
add_subdirectory(MinGW)
add_subdirectory(wasm)

View File

@ -17,3 +17,6 @@ N: Lang Hames, Nick Kledzik
E: lhames@gmail.com, kledzik@apple.com
D: Mach-O backend
N: Sam Clegg
E: sbc@chromium.org
D: WebAssembly backend (wasm/*)

View File

@ -11,12 +11,12 @@ add_lld_library(lldCOFF
DLL.cpp
Driver.cpp
DriverUtils.cpp
Error.cpp
ICF.cpp
InputFiles.cpp
LTO.cpp
MapFile.cpp
MarkLive.cpp
MinGW.cpp
PDB.cpp
Strings.cpp
SymbolTable.cpp
@ -26,22 +26,20 @@ add_lld_library(lldCOFF
LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
BinaryFormat
BitReader
Core
DebugInfoCodeView
DebugInfoMSF
DebugInfoPDB
LTO
LibDriver
Object
LTO
MC
MCDisassembler
Target
Object
Option
Support
WindowsManifest
LINK_LIBS
lldCore
lldCommon
${LLVM_PTHREAD_LIB}
DEPENDS

View File

@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
@ -29,17 +29,14 @@ using llvm::support::ulittle32_t;
namespace lld {
namespace coff {
SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H)
SectionChunk::SectionChunk(ObjFile *F, const coff_section *H)
: Chunk(SectionKind), Repl(this), Header(H), File(F),
Relocs(File->getCOFFObj()->getRelocations(Header)),
NumRelocs(std::distance(Relocs.begin(), Relocs.end())) {
// Initialize SectionName.
File->getCOFFObj()->getSectionName(Header, SectionName);
Align = Header->getAlignment();
// Chunks may be discarded during comdat merging.
Discarded = false;
Alignment = Header->getAlignment();
// If linker GC is disabled, every chunk starts out alive. If linker GC is
// enabled, treat non-comdat sections as roots. Generally optimized object
@ -62,7 +59,10 @@ static void applySecRel(const SectionChunk *Sec, uint8_t *Off,
fatal("SECREL relocation cannot be applied to absolute symbols");
}
uint64_t SecRel = S - OS->getRVA();
assert(SecRel < INT32_MAX && "overflow in SECREL relocation");
if (SecRel > UINT32_MAX) {
error("overflow in SECREL relocation in section: " + Sec->getSectionName());
return;
}
add32(Off, SecRel);
}
@ -119,7 +119,7 @@ static uint16_t readMOV(uint8_t *Off) {
return Imm;
}
static void applyMOV32T(uint8_t *Off, uint32_t V) {
void applyMOV32T(uint8_t *Off, uint32_t V) {
uint16_t ImmW = readMOV(Off); // read MOVW operand
uint16_t ImmT = readMOV(Off + 4); // read MOVT operand
uint32_t Imm = ImmW | (ImmT << 16);
@ -129,6 +129,8 @@ static void applyMOV32T(uint8_t *Off, uint32_t V) {
}
static void applyBranch20T(uint8_t *Off, int32_t V) {
if (!isInt<21>(V))
fatal("relocation out of range");
uint32_t S = V < 0 ? 1 : 0;
uint32_t J1 = (V >> 19) & 1;
uint32_t J2 = (V >> 18) & 1;
@ -136,7 +138,7 @@ static void applyBranch20T(uint8_t *Off, int32_t V) {
or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff));
}
static void applyBranch24T(uint8_t *Off, int32_t V) {
void applyBranch24T(uint8_t *Off, int32_t V) {
if (!isInt<25>(V))
fatal("relocation out of range");
uint32_t S = V < 0 ? 1 : 0;
@ -167,36 +169,61 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS,
}
}
static void applyArm64Addr(uint8_t *Off, uint64_t Imm) {
// Interpret the existing immediate value as a byte offset to the
// target symbol, then update the instruction with the immediate as
// the page offset from the current instruction to the target.
static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P) {
uint32_t Orig = read32le(Off);
uint64_t Imm = ((Orig >> 29) & 0x3) | ((Orig >> 3) & 0x1FFFFC);
S += Imm;
Imm = (S >> 12) - (P >> 12);
uint32_t ImmLo = (Imm & 0x3) << 29;
uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3);
write32le(Off, (read32le(Off) & ~Mask) | ImmLo | ImmHi);
write32le(Off, (Orig & ~Mask) | ImmLo | ImmHi);
}
// Update the immediate field in a AARCH64 ldr, str, and add instruction.
static void applyArm64Imm(uint8_t *Off, uint64_t Imm) {
// Optionally limit the range of the written immediate by one or more bits
// (RangeLimit).
static void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) {
uint32_t Orig = read32le(Off);
Imm += (Orig >> 10) & 0xFFF;
Orig &= ~(0xFFF << 10);
write32le(Off, Orig | ((Imm & 0xFFF) << 10));
write32le(Off, Orig | ((Imm & (0xFFF >> RangeLimit)) << 10));
}
// Add the 12 bit page offset to the existing immediate.
// Ldr/str instructions store the opcode immediate scaled
// by the load/store size (giving a larger range for larger
// loads/stores). The immediate is always (both before and after
// fixing up the relocation) stored scaled similarly.
// Even if larger loads/stores have a larger range, limit the
// effective offset to 12 bit, since it is intended to be a
// page offset.
static void applyArm64Ldr(uint8_t *Off, uint64_t Imm) {
int Size = read32le(Off) >> 30;
Imm >>= Size;
applyArm64Imm(Off, Imm);
uint32_t Orig = read32le(Off);
uint32_t Size = Orig >> 30;
// 0x04000000 indicates SIMD/FP registers
// 0x00800000 indicates 128 bit
if ((Orig & 0x4800000) == 0x4800000)
Size += 4;
if ((Imm & ((1 << Size) - 1)) != 0)
fatal("misaligned ldr/str offset");
applyArm64Imm(Off, Imm >> Size, Size);
}
void SectionChunk::applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS,
uint64_t S, uint64_t P) const {
switch (Type) {
case IMAGE_REL_ARM64_PAGEBASE_REL21: applyArm64Addr(Off, (S >> 12) - (P >> 12)); break;
case IMAGE_REL_ARM64_PAGEOFFSET_12A: applyArm64Imm(Off, S & 0xfff); break;
case IMAGE_REL_ARM64_PAGEBASE_REL21: applyArm64Addr(Off, S, P); break;
case IMAGE_REL_ARM64_PAGEOFFSET_12A: applyArm64Imm(Off, S & 0xfff, 0); break;
case IMAGE_REL_ARM64_PAGEOFFSET_12L: applyArm64Ldr(Off, S & 0xfff); break;
case IMAGE_REL_ARM64_BRANCH26: or32(Off, ((S - P) & 0x0FFFFFFC) >> 2); break;
case IMAGE_REL_ARM64_ADDR32: add32(Off, S + Config->ImageBase); break;
case IMAGE_REL_ARM64_ADDR32NB: add32(Off, S); break;
case IMAGE_REL_ARM64_ADDR64: add64(Off, S + Config->ImageBase); break;
case IMAGE_REL_ARM64_SECREL: applySecRel(this, Off, OS, S); break;
default:
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type));
}
@ -224,8 +251,19 @@ void SectionChunk::writeTo(uint8_t *Buf) const {
// Get the output section of the symbol for this relocation. The output
// section is needed to compute SECREL and SECTION relocations used in debug
// info.
SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex);
Defined *Sym = cast<Defined>(Body);
auto *Sym =
dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex));
if (!Sym) {
if (isCodeView() || isDWARF())
continue;
// Symbols in early discarded sections are represented using null pointers,
// so we need to retrieve the name from the object file.
COFFSymbolRef Sym =
check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex));
StringRef Name;
File->getCOFFObj()->getSymbolName(Sym, Name);
fatal("relocation against symbol in discarded section: " + Name);
}
Chunk *C = Sym->getChunk();
OutputSection *OS = C ? C->getOutputSection() : nullptr;
@ -301,8 +339,8 @@ void SectionChunk::getBaserels(std::vector<Baserel> *Res) {
uint8_t Ty = getBaserelType(Rel);
if (Ty == IMAGE_REL_BASED_ABSOLUTE)
continue;
SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex);
if (isa<DefinedAbsolute>(Body))
Symbol *Target = File->getSymbol(Rel.SymbolTableIndex);
if (!Target || isa<DefinedAbsolute>(Target))
continue;
Res->emplace_back(RVA + Rel.VirtualAddress, Ty);
}
@ -323,12 +361,8 @@ bool SectionChunk::isCOMDAT() const {
void SectionChunk::printDiscardedMessage() const {
// Removed by dead-stripping. If it's removed by ICF, ICF already
// printed out the name, so don't repeat that here.
if (Sym && this == Repl) {
if (Discarded)
message("Discarded comdat symbol " + Sym->getName());
else if (!Live)
message("Discarded " + Sym->getName());
}
if (Sym && this == Repl)
message("Discarded " + Sym->getName());
}
StringRef SectionChunk::getDebugName() {
@ -351,7 +385,7 @@ void SectionChunk::replace(SectionChunk *Other) {
CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) {
// Common symbols are aligned on natural boundaries up to 32 bytes.
// This is what MSVC link.exe does.
Align = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue()));
Alignment = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue()));
}
uint32_t CommonChunk::getPermissions() const {
@ -366,7 +400,7 @@ void StringChunk::writeTo(uint8_t *Buf) const {
ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) {
// Intel Optimization Manual says that all branch targets
// should be 16-byte aligned. MSVC linker does this too.
Align = 16;
Alignment = 16;
}
void ImportThunkChunkX64::writeTo(uint8_t *Buf) const {
@ -397,10 +431,9 @@ void ImportThunkChunkARM::writeTo(uint8_t *Buf) const {
}
void ImportThunkChunkARM64::writeTo(uint8_t *Buf) const {
int64_t PageOff = (ImpSymbol->getRVA() >> 12) - (RVA >> 12);
int64_t Off = ImpSymbol->getRVA() & 0xfff;
memcpy(Buf + OutputSectionOff, ImportThunkARM64, sizeof(ImportThunkARM64));
applyArm64Addr(Buf + OutputSectionOff, PageOff);
applyArm64Addr(Buf + OutputSectionOff, ImpSymbol->getRVA(), RVA);
applyArm64Ldr(Buf + OutputSectionOff + 4, Off);
}
@ -488,8 +521,10 @@ void BaserelChunk::writeTo(uint8_t *Buf) const {
uint8_t Baserel::getDefaultType() {
switch (Config->Machine) {
case AMD64:
case ARM64:
return IMAGE_REL_BASED_DIR64;
case I386:
case ARMNT:
return IMAGE_REL_BASED_HIGHLOW;
default:
llvm_unreachable("unknown machine type");

View File

@ -12,7 +12,7 @@
#include "Config.h"
#include "InputFiles.h"
#include "lld/Core/LLVM.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
@ -33,9 +33,9 @@ class Baserel;
class Defined;
class DefinedImportData;
class DefinedRegular;
class ObjectFile;
class ObjFile;
class OutputSection;
class SymbolBody;
class Symbol;
// Mask for section types (code, data, bss, disacardable, etc.)
// and permissions (writable, readable or executable).
@ -62,7 +62,6 @@ public:
// The writer sets and uses the addresses.
uint64_t getRVA() const { return RVA; }
uint32_t getAlign() const { return Align; }
void setRVA(uint64_t V) { RVA = V; }
// Returns true if this has non-zero data. BSS chunks return
@ -82,7 +81,7 @@ public:
// An output section has pointers to chunks in the section, and each
// chunk has a back pointer to an output section.
void setOutputSection(OutputSection *O) { Out = O; }
OutputSection *getOutputSection() { return Out; }
OutputSection *getOutputSection() const { return Out; }
// Windows-specific.
// Collect all locations that contain absolute addresses for base relocations.
@ -92,23 +91,22 @@ public:
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() { return ""; }
// The alignment of this chunk. The writer uses the value.
uint32_t Alignment = 1;
protected:
Chunk(Kind K = OtherKind) : ChunkKind(K) {}
const Kind ChunkKind;
// The alignment of this chunk. The writer uses the value.
uint32_t Align = 1;
// The RVA of this chunk in the output. The writer sets a value.
uint64_t RVA = 0;
// The output section for this chunk.
OutputSection *Out = nullptr;
public:
// The offset from beginning of the output section. The writer sets a value.
uint64_t OutputSectionOff = 0;
protected:
// The output section for this chunk.
OutputSection *Out = nullptr;
};
// A chunk corresponding a section of an input file.
@ -119,23 +117,21 @@ class SectionChunk final : public Chunk {
public:
class symbol_iterator : public llvm::iterator_adaptor_base<
symbol_iterator, const coff_relocation *,
std::random_access_iterator_tag, SymbolBody *> {
std::random_access_iterator_tag, Symbol *> {
friend SectionChunk;
ObjectFile *File;
ObjFile *File;
symbol_iterator(ObjectFile *File, const coff_relocation *I)
symbol_iterator(ObjFile *File, const coff_relocation *I)
: symbol_iterator::iterator_adaptor_base(I), File(File) {}
public:
symbol_iterator() = default;
SymbolBody *operator*() const {
return File->getSymbolBody(I->SymbolTableIndex);
}
Symbol *operator*() const { return File->getSymbol(I->SymbolTableIndex); }
};
SectionChunk(ObjectFile *File, const coff_section *Header);
SectionChunk(ObjFile *File, const coff_section *Header);
static bool classof(const Chunk *C) { return C->kind() == SectionKind; }
size_t getSize() const override { return Header->SizeOfRawData; }
ArrayRef<uint8_t> getContents() const;
@ -163,10 +159,9 @@ public:
void addAssociative(SectionChunk *Child);
StringRef getDebugName() override;
void setSymbol(DefinedRegular *S) { if (!Sym) Sym = S; }
// Returns true if the chunk was not dropped by GC or COMDAT deduplication.
bool isLive() { return Live && !Discarded; }
// Returns true if the chunk was not dropped by GC.
bool isLive() { return Live; }
// Used by the garbage collector.
void markLive() {
@ -175,21 +170,16 @@ public:
Live = true;
}
// Returns true if this chunk was dropped by COMDAT deduplication.
bool isDiscarded() const { return Discarded; }
// Used by the SymbolTable when discarding unused comdat sections. This is
// redundant when GC is enabled, as all comdat sections will start out dead.
void markDiscarded() { Discarded = true; }
// True if this is a codeview debug info chunk. These will not be laid out in
// the image. Instead they will end up in the PDB, if one is requested.
bool isCodeView() const {
return SectionName == ".debug" || SectionName.startswith(".debug$");
}
// True if this is a DWARF debug info chunk.
bool isDWARF() const { return SectionName.startswith(".debug_"); }
// True if this is a DWARF debug info or exception handling chunk.
bool isDWARF() const {
return SectionName.startswith(".debug_") || SectionName == ".eh_frame";
}
// Allow iteration over the bodies of this chunk's relocated symbols.
llvm::iterator_range<symbol_iterator> symbols() const {
@ -213,7 +203,10 @@ public:
const coff_section *Header;
// The file that this chunk was created from.
ObjectFile *File;
ObjFile *File;
// The COMDAT leader symbol if this is a COMDAT chunk.
DefinedRegular *Sym = nullptr;
private:
StringRef SectionName;
@ -221,18 +214,12 @@ private:
llvm::iterator_range<const coff_relocation *> Relocs;
size_t NumRelocs;
// True if this chunk was discarded because it was a duplicate comdat section.
bool Discarded;
// Used by the garbage collector.
bool Live;
// Used for ICF (Identical COMDAT Folding)
void replace(SectionChunk *Other);
uint32_t Class[2] = {0, 0};
// Sym points to a section symbol if this is a COMDAT chunk.
DefinedRegular *Sym = nullptr;
};
// A chunk for common symbols. Common chunks don't have actual data.
@ -369,6 +356,9 @@ public:
uint8_t Type;
};
void applyMOV32T(uint8_t *Off, uint32_t V);
void applyBranch24T(uint8_t *Off, int32_t V);
} // namespace coff
} // namespace lld

View File

@ -12,6 +12,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/CachePruning.h"
#include <cstdint>
#include <map>
#include <set>
@ -26,8 +27,7 @@ using llvm::StringRef;
class DefinedAbsolute;
class DefinedRelative;
class StringChunk;
struct Symbol;
class SymbolBody;
class Symbol;
// Short aliases.
static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64;
@ -39,7 +39,7 @@ static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386;
struct Export {
StringRef Name; // N in /export:N or /export:E=N
StringRef ExtName; // E in /export:E=N
SymbolBody *Sym = nullptr;
Symbol *Sym = nullptr;
uint16_t Ordinal = 0;
bool Noname = false;
bool Data = false;
@ -79,24 +79,23 @@ struct Configuration {
llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
bool Verbose = false;
WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN;
SymbolBody *Entry = nullptr;
Symbol *Entry = nullptr;
bool NoEntry = false;
std::string OutputFile;
std::string ImportName;
bool ColorDiagnostics;
bool DoGC = true;
bool DoICF = true;
uint64_t ErrorLimit = 20;
bool Relocatable = true;
bool Force = false;
bool Debug = false;
bool WriteSymtab = true;
bool DebugDwarf = false;
bool DebugGHashes = false;
unsigned DebugTypes = static_cast<unsigned>(DebugType::None);
llvm::SmallString<128> PDBPath;
std::vector<llvm::StringRef> Argv;
// Symbols in this set are considered as live by the garbage collector.
std::set<SymbolBody *> GCRoot;
std::vector<Symbol *> GCRoot;
std::set<StringRef> NoDefaultLibs;
bool NoDefaultLibAll = false;
@ -107,7 +106,7 @@ struct Configuration {
std::vector<Export> Exports;
std::set<std::string> DelayLoads;
std::map<std::string, int> DLLOrder;
SymbolBody *DelayLoadHelper = nullptr;
Symbol *DelayLoadHelper = nullptr;
bool SaveTemps = false;
@ -123,6 +122,11 @@ struct Configuration {
// Used for /opt:lldltopartitions=N
unsigned LTOPartitions = 1;
// Used for /opt:lldltocache=path
StringRef LTOCache;
// Used for /opt:lldltocachepolicy=policy
llvm::CachePruningPolicy LTOCachePolicy;
// Used for /merge:from=to (e.g. /merge:.rdata=.text)
std::map<StringRef, StringRef> Merge;
@ -139,6 +143,9 @@ struct Configuration {
StringRef ManifestUIAccess = "'false'";
StringRef ManifestFile;
// Used for /aligncomm.
std::map<std::string, int> AlignComm;
// Used for /failifmismatch.
std::map<StringRef, StringRef> MustMatch;
@ -159,12 +166,15 @@ struct Configuration {
uint32_t MinorOSVersion = 0;
bool CanExitEarly = false;
bool DynamicBase = true;
bool AllowBind = true;
bool NxCompat = true;
bool AllowIsolation = true;
bool TerminalServerAware = true;
bool LargeAddressAware = false;
bool HighEntropyVA = false;
bool AppContainer = false;
bool MinGW = false;
bool WarnLocallyDefinedImported = true;
};
extern Configuration *Config;

67
deps/lld/COFF/DLL.cpp vendored
View File

@ -61,7 +61,7 @@ private:
// A chunk for the import descriptor table.
class LookupChunk : public Chunk {
public:
explicit LookupChunk(Chunk *C) : HintName(C) {}
explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = ptrSize(); }
size_t getSize() const override { return ptrSize(); }
void writeTo(uint8_t *Buf) const override {
@ -76,7 +76,7 @@ public:
// See Microsoft PE/COFF spec 7.1. Import Header for details.
class OrdinalOnlyChunk : public Chunk {
public:
explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {}
explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) { Alignment = ptrSize(); }
size_t getSize() const override { return ptrSize(); }
void writeTo(uint8_t *Buf) const override {
@ -117,7 +117,6 @@ public:
explicit NullChunk(size_t N) : Size(N) {}
bool hasData() const override { return false; }
size_t getSize() const override { return Size; }
void setAlign(size_t N) { Align = N; }
private:
size_t Size;
@ -215,6 +214,22 @@ static const uint8_t ThunkX86[] = {
0xFF, 0xE0, // jmp eax
};
static const uint8_t ThunkARM[] = {
0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 __imp_<FUNCNAME>
0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 __imp_<FUNCNAME>
0x2d, 0xe9, 0x0f, 0x48, // push.w {r0, r1, r2, r3, r11, lr}
0x0d, 0xf2, 0x10, 0x0b, // addw r11, sp, #16
0x2d, 0xed, 0x10, 0x0b, // vpush {d0, d1, d2, d3, d4, d5, d6, d7}
0x61, 0x46, // mov r1, ip
0x40, 0xf2, 0x00, 0x00, // mov.w r0, #0 DELAY_IMPORT_DESCRIPTOR
0xc0, 0xf2, 0x00, 0x00, // mov.t r0, #0 DELAY_IMPORT_DESCRIPTOR
0x00, 0xf0, 0x00, 0xd0, // bl #0 __delayLoadHelper2
0x84, 0x46, // mov ip, r0
0xbd, 0xec, 0x10, 0x0b, // vpop {d0, d1, d2, d3, d4, d5, d6, d7}
0xbd, 0xe8, 0x0f, 0x48, // pop.w {r0, r1, r2, r3, r11, lr}
0x60, 0x47, // bx ip
};
// A chunk for the delay import thunk.
class ThunkChunkX64 : public Chunk {
public:
@ -259,17 +274,45 @@ public:
Defined *Helper = nullptr;
};
class ThunkChunkARM : public Chunk {
public:
ThunkChunkARM(Defined *I, Chunk *D, Defined *H)
: Imp(I), Desc(D), Helper(H) {}
size_t getSize() const override { return sizeof(ThunkARM); }
void writeTo(uint8_t *Buf) const override {
memcpy(Buf + OutputSectionOff, ThunkARM, sizeof(ThunkARM));
applyMOV32T(Buf + OutputSectionOff + 0, Imp->getRVA() + Config->ImageBase);
applyMOV32T(Buf + OutputSectionOff + 22, Desc->getRVA() + Config->ImageBase);
applyBranch24T(Buf + OutputSectionOff + 30, Helper->getRVA() - RVA - 34);
}
void getBaserels(std::vector<Baserel> *Res) override {
Res->emplace_back(RVA + 0, IMAGE_REL_BASED_ARM_MOV32T);
Res->emplace_back(RVA + 22, IMAGE_REL_BASED_ARM_MOV32T);
}
Defined *Imp = nullptr;
Chunk *Desc = nullptr;
Defined *Helper = nullptr;
};
// A chunk for the import descriptor table.
class DelayAddressChunk : public Chunk {
public:
explicit DelayAddressChunk(Chunk *C) : Thunk(C) {}
explicit DelayAddressChunk(Chunk *C) : Thunk(C) { Alignment = ptrSize(); }
size_t getSize() const override { return ptrSize(); }
void writeTo(uint8_t *Buf) const override {
if (Config->is64()) {
write64le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase);
} else {
write32le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase);
uint32_t Bit = 0;
// Pointer to thumb code must have the LSB set, so adjust it.
if (Config->Machine == ARMNT)
Bit = 1;
write32le(Buf + OutputSectionOff, (Thunk->getRVA() + Config->ImageBase) | Bit);
}
}
@ -319,12 +362,16 @@ public:
size_t getSize() const override { return Size * 4; }
void writeTo(uint8_t *Buf) const override {
for (Export &E : Config->Exports) {
for (const Export &E : Config->Exports) {
uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4;
uint32_t Bit = 0;
// Pointer to thumb code must have the LSB set, so adjust it.
if (Config->Machine == ARMNT && !E.Data)
Bit = 1;
if (E.ForwardChunk) {
write32le(P, E.ForwardChunk->getRVA());
write32le(P, E.ForwardChunk->getRVA() | Bit);
} else {
write32le(P, cast<Defined>(E.Sym)->getRVA());
write32le(P, cast<Defined>(E.Sym)->getRVA() | Bit);
}
}
}
@ -487,7 +534,7 @@ void DelayLoadContents::create(Defined *H) {
for (int I = 0, E = Syms.size(); I < E; ++I)
Syms[I]->setLocation(Addresses[Base + I]);
auto *MH = make<NullChunk>(8);
MH->setAlign(8);
MH->Alignment = 8;
ModuleHandles.push_back(MH);
// Fill the delay import table header fields.
@ -506,6 +553,8 @@ Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) {
return make<ThunkChunkX64>(S, Dir, Helper);
case I386:
return make<ThunkChunkX86>(S, Dir, Helper);
case ARMNT:
return make<ThunkChunkARM>(S, Dir, Helper);
default:
llvm_unreachable("unsupported machine type");
}

View File

@ -9,13 +9,15 @@
#include "Driver.h"
#include "Config.h"
#include "Error.h"
#include "InputFiles.h"
#include "Memory.h"
#include "MinGW.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Driver/Driver.h"
#include "lld/Common/Driver.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "lld/Common/Version.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Magic.h"
@ -48,27 +50,29 @@ namespace coff {
Configuration *Config;
LinkerDriver *Driver;
BumpPtrAllocator BAlloc;
StringSaver Saver{BAlloc};
std::vector<SpecificAllocBase *> SpecificAllocBase::Instances;
bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) {
ErrorCount = 0;
ErrorOS = &Diag;
errorHandler().LogName = Args[0];
errorHandler().ErrorOS = &Diag;
errorHandler().ColorDiagnostics = Diag.has_colors();
errorHandler().ErrorLimitExceededMsg =
"too many errors emitted, stopping now"
" (use /ERRORLIMIT:0 to see all errors)";
errorHandler().ExitEarly = CanExitEarly;
Config = make<Configuration>();
Config->Argv = {Args.begin(), Args.end()};
Config->ColorDiagnostics =
(ErrorOS == &llvm::errs() && Process::StandardErrHasColors());
Config->CanExitEarly = CanExitEarly;
Symtab = make<SymbolTable>();
Driver = make<LinkerDriver>();
Driver->link(Args);
// Call exit() if we can to avoid calling destructors.
if (CanExitEarly)
exitLld(ErrorCount ? 1 : 0);
exitLld(errorCount() ? 1 : 0);
freeArena();
return !ErrorCount;
return !errorCount();
}
// Drop directory components and replace extension with ".exe" or ".dll".
@ -114,30 +118,46 @@ MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> MB) {
return MBRef;
}
void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB) {
void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB,
bool WholeArchive) {
MemoryBufferRef MBRef = takeBuffer(std::move(MB));
FilePaths.push_back(MBRef.getBufferIdentifier());
// File type is detected by contents, not by file extension.
file_magic Magic = identify_magic(MBRef.getBuffer());
if (Magic == file_magic::windows_resource) {
switch (identify_magic(MBRef.getBuffer())) {
case file_magic::windows_resource:
Resources.push_back(MBRef);
return;
}
break;
FilePaths.push_back(MBRef.getBufferIdentifier());
if (Magic == file_magic::archive)
return Symtab.addFile(make<ArchiveFile>(MBRef));
if (Magic == file_magic::bitcode)
return Symtab.addFile(make<BitcodeFile>(MBRef));
case file_magic::archive:
if (WholeArchive) {
std::unique_ptr<Archive> File =
CHECK(Archive::create(MBRef),
MBRef.getBufferIdentifier() + ": failed to parse archive");
if (Magic == file_magic::coff_cl_gl_object)
for (MemoryBufferRef M : getArchiveMembers(File.get()))
addArchiveBuffer(M, "<whole-archive>", MBRef.getBufferIdentifier());
return;
}
Symtab->addFile(make<ArchiveFile>(MBRef));
break;
case file_magic::bitcode:
Symtab->addFile(make<BitcodeFile>(MBRef));
break;
case file_magic::coff_cl_gl_object:
error(MBRef.getBufferIdentifier() + ": is not a native COFF file. "
"Recompile without /GL");
else
Symtab.addFile(make<ObjectFile>(MBRef));
break;
default:
Symtab->addFile(make<ObjFile>(MBRef));
break;
}
}
void LinkerDriver::enqueuePath(StringRef Path) {
void LinkerDriver::enqueuePath(StringRef Path, bool WholeArchive) {
auto Future =
std::make_shared<std::future<MBErrPair>>(createFutureForFile(Path));
std::string PathStr = Path;
@ -146,7 +166,7 @@ void LinkerDriver::enqueuePath(StringRef Path) {
if (MBOrErr.second)
error("could not open " + PathStr + ": " + MBOrErr.second.message());
else
Driver->addBuffer(std::move(MBOrErr.first));
Driver->addBuffer(std::move(MBOrErr.first), WholeArchive);
});
}
@ -154,13 +174,13 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef MB, StringRef SymName,
StringRef ParentName) {
file_magic Magic = identify_magic(MB.getBuffer());
if (Magic == file_magic::coff_import_library) {
Symtab.addFile(make<ImportFile>(MB));
Symtab->addFile(make<ImportFile>(MB));
return;
}
InputFile *Obj;
if (Magic == file_magic::coff_object) {
Obj = make<ObjectFile>(MB);
Obj = make<ObjFile>(MB);
} else if (Magic == file_magic::bitcode) {
Obj = make<BitcodeFile>(MB);
} else {
@ -169,7 +189,7 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef MB, StringRef SymName,
}
Obj->ParentName = ParentName;
Symtab.addFile(Obj);
Symtab->addFile(Obj);
log("Loaded " + toString(Obj) + " for " + SymName);
}
@ -177,7 +197,7 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &C,
StringRef SymName,
StringRef ParentName) {
if (!C.getParent()->isThin()) {
MemoryBufferRef MB = check(
MemoryBufferRef MB = CHECK(
C.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " + SymName);
enqueueTask([=]() { Driver->addArchiveBuffer(MB, SymName, ParentName); });
@ -185,39 +205,61 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &C,
}
auto Future = std::make_shared<std::future<MBErrPair>>(createFutureForFile(
check(C.getFullName(),
CHECK(C.getFullName(),
"could not get the filename for the member defining symbol " +
SymName)));
enqueueTask([=]() {
auto MBOrErr = Future->get();
if (MBOrErr.second)
fatal(MBOrErr.second,
"could not get the buffer for the member defining " + SymName);
fatal("could not get the buffer for the member defining " + SymName +
": " + MBOrErr.second.message());
Driver->addArchiveBuffer(takeBuffer(std::move(MBOrErr.first)), SymName,
ParentName);
});
}
static bool isDecorated(StringRef Sym) {
return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?");
return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
(!Config->MinGW && Sym.contains('@'));
}
// Parses .drectve section contents and returns a list of files
// specified by /defaultlib.
void LinkerDriver::parseDirectives(StringRef S) {
opt::InputArgList Args = Parser.parse(S);
ArgParser Parser;
// .drectve is always tokenized using Windows shell rules.
opt::InputArgList Args = Parser.parseDirectives(S);
for (auto *Arg : Args) {
switch (Arg->getOption().getID()) {
switch (Arg->getOption().getUnaliasedOption().getID()) {
case OPT_aligncomm:
parseAligncomm(Arg->getValue());
break;
case OPT_alternatename:
parseAlternateName(Arg->getValue());
break;
case OPT_defaultlib:
if (Optional<StringRef> Path = findLib(Arg->getValue()))
enqueuePath(*Path);
enqueuePath(*Path, false);
break;
case OPT_entry:
Config->Entry = addUndefined(mangle(Arg->getValue()));
break;
case OPT_export: {
// If a common header file contains dllexported function
// declarations, many object files may end up with having the
// same /EXPORT options. In order to save cost of parsing them,
// we dedup them first.
if (!DirectivesExports.insert(Arg->getValue()).second)
break;
Export E = parseExport(Arg->getValue());
if (Config->Machine == I386 && Config->MinGW) {
if (!isDecorated(E.Name))
E.Name = Saver.save("_" + E.Name);
if (!E.ExtName.empty() && !isDecorated(E.ExtName))
E.ExtName = Saver.save("_" + E.ExtName);
}
E.Directives = true;
Config->Exports.push_back(E);
break;
@ -237,9 +279,14 @@ void LinkerDriver::parseDirectives(StringRef S) {
case OPT_section:
parseSection(Arg->getValue());
break;
case OPT_subsystem:
parseSubsystem(Arg->getValue(), &Config->Subsystem,
&Config->MajorOSVersion, &Config->MinorOSVersion);
break;
case OPT_editandcontinue:
case OPT_fastfail:
case OPT_guardsym:
case OPT_natvis:
case OPT_throwingnew:
break;
default:
@ -254,7 +301,7 @@ StringRef LinkerDriver::doFindFile(StringRef Filename) {
bool HasPathSep = (Filename.find_first_of("/\\") != StringRef::npos);
if (HasPathSep)
return Filename;
bool HasExt = (Filename.find('.') != StringRef::npos);
bool HasExt = Filename.contains('.');
for (StringRef Dir : SearchPaths) {
SmallString<128> Path = Dir;
sys::path::append(Path, Filename);
@ -276,13 +323,15 @@ Optional<StringRef> LinkerDriver::findFile(StringRef Filename) {
bool Seen = !VisitedFiles.insert(Path.lower()).second;
if (Seen)
return None;
if (Path.endswith_lower(".lib"))
VisitedLibs.insert(sys::path::filename(Path));
return Path;
}
// Find library file from search path.
StringRef LinkerDriver::doFindLib(StringRef Filename) {
// Add ".lib" to Filename if that has no file extension.
bool HasExt = (Filename.find('.') != StringRef::npos);
bool HasExt = Filename.contains('.');
if (!HasExt)
Filename = Saver.save(Filename + ".lib");
return doFindFile(Filename);
@ -317,9 +366,12 @@ void LinkerDriver::addLibSearchPaths() {
}
}
SymbolBody *LinkerDriver::addUndefined(StringRef Name) {
SymbolBody *B = Symtab.addUndefined(Name);
Config->GCRoot.insert(B);
Symbol *LinkerDriver::addUndefined(StringRef Name) {
Symbol *B = Symtab->addUndefined(Name);
if (!B->IsGCRoot) {
B->IsGCRoot = true;
Config->GCRoot.push_back(B);
}
return B;
}
@ -341,8 +393,8 @@ StringRef LinkerDriver::findDefaultEntry() {
{"wWinMain", "wWinMainCRTStartup"},
};
for (auto E : Entries) {
StringRef Entry = Symtab.findMangle(mangle(E[0]));
if (!Entry.empty() && !isa<Undefined>(Symtab.find(Entry)->body()))
StringRef Entry = Symtab->findMangle(mangle(E[0]));
if (!Entry.empty() && !isa<Undefined>(Symtab->find(Entry)))
return mangle(E[1]);
}
return "";
@ -351,9 +403,9 @@ StringRef LinkerDriver::findDefaultEntry() {
WindowsSubsystem LinkerDriver::inferSubsystem() {
if (Config->DLL)
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
if (Symtab.findUnderscore("main") || Symtab.findUnderscore("wmain"))
if (Symtab->findUnderscore("main") || Symtab->findUnderscore("wmain"))
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
if (Symtab.findUnderscore("WinMain") || Symtab.findUnderscore("wWinMain"))
if (Symtab->findUnderscore("WinMain") || Symtab->findUnderscore("wWinMain"))
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
return IMAGE_SUBSYSTEM_UNKNOWN;
}
@ -376,9 +428,15 @@ static std::string createResponseFile(const opt::InputArgList &Args,
case OPT_INPUT:
case OPT_defaultlib:
case OPT_libpath:
case OPT_manifest:
case OPT_manifest_colon:
case OPT_manifestdependency:
case OPT_manifestfile:
case OPT_manifestinput:
case OPT_manifestuac:
break;
default:
OS << toString(Arg) << "\n";
OS << toString(*Arg) << "\n";
}
}
@ -476,15 +534,17 @@ static void createImportLibrary(bool AsLib) {
Exports.push_back(E2);
}
writeImportLibrary(getImportName(AsLib), getImplibPath(), Exports,
Config->Machine, false);
auto E = writeImportLibrary(getImportName(AsLib), getImplibPath(), Exports,
Config->Machine, false);
handleAllErrors(std::move(E),
[&](ErrorInfoBase &EIB) { error(EIB.message()); });
}
static void parseModuleDefs(StringRef Path) {
std::unique_ptr<MemoryBuffer> MB = check(
MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path);
COFFModuleDefinition M =
check(parseCOFFModuleDefinition(MB->getMemBufferRef(), Config->Machine));
std::unique_ptr<MemoryBuffer> MB = CHECK(
MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path);
COFFModuleDefinition M = check(parseCOFFModuleDefinition(
MB->getMemBufferRef(), Config->Machine, Config->MinGW));
if (Config->OutputFile.empty())
Config->OutputFile = Saver.save(M.OutputFile);
@ -522,31 +582,12 @@ static void parseModuleDefs(StringRef Path) {
}
}
std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) {
std::vector<MemoryBufferRef> V;
Error Err = Error::success();
for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
Archive::Child C =
check(COrErr,
File->getFileName() + ": could not get the child of the archive");
MemoryBufferRef MBRef =
check(C.getMemoryBufferRef(),
File->getFileName() +
": could not get the buffer for a child of the archive");
V.push_back(MBRef);
}
if (Err)
fatal(File->getFileName() +
": Archive::children failed: " + toString(std::move(Err)));
return V;
}
// A helper function for filterBitcodeFiles.
static bool needsRebuilding(MemoryBufferRef MB) {
// The MSVC linker doesn't support thin archives, so if it's a thin
// archive, we always need to rebuild it.
std::unique_ptr<Archive> File =
check(Archive::create(MB), "Failed to read " + MB.getBufferIdentifier());
CHECK(Archive::create(MB), "Failed to read " + MB.getBufferIdentifier());
if (File->isThin())
return true;
@ -567,7 +608,7 @@ static bool needsRebuilding(MemoryBufferRef MB) {
// its path is returned.
static Optional<std::string>
filterBitcodeFiles(StringRef Path, std::vector<std::string> &TemporaryFiles) {
std::unique_ptr<MemoryBuffer> MB = check(
std::unique_ptr<MemoryBuffer> MB = CHECK(
MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path);
MemoryBufferRef MBRef = MB->getMemBufferRef();
file_magic Magic = identify_magic(MBRef.getBuffer());
@ -580,7 +621,7 @@ filterBitcodeFiles(StringRef Path, std::vector<std::string> &TemporaryFiles) {
return Path.str();
std::unique_ptr<Archive> File =
check(Archive::create(MBRef),
CHECK(Archive::create(MBRef),
MBRef.getBufferIdentifier() + ": failed to parse archive");
std::vector<NewArchiveMember> New;
@ -596,16 +637,17 @@ filterBitcodeFiles(StringRef Path, std::vector<std::string> &TemporaryFiles) {
SmallString<128> S;
if (auto EC = sys::fs::createTemporaryFile("lld-" + sys::path::stem(Path),
".lib", S))
fatal(EC, "cannot create a temporary file");
fatal("cannot create a temporary file: " + EC.message());
std::string Temp = S.str();
TemporaryFiles.push_back(Temp);
std::pair<StringRef, std::error_code> Ret =
Error E =
llvm::writeArchive(Temp, New, /*WriteSymtab=*/true, Archive::Kind::K_GNU,
/*Deterministics=*/true,
/*Thin=*/false);
if (Ret.second)
error("failed to create a new archive " + S.str() + ": " + Ret.first);
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
error("failed to create a new archive " + S.str() + ": " + EI.message());
});
return Temp;
}
@ -617,16 +659,16 @@ void LinkerDriver::invokeMSVC(opt::InputArgList &Args) {
// Write out archive members that we used in symbol resolution and pass these
// to MSVC before any archives, so that MSVC uses the same objects to satisfy
// references.
for (const auto *O : Symtab.ObjectFiles) {
if (O->ParentName.empty())
for (ObjFile *Obj : ObjFile::Instances) {
if (Obj->ParentName.empty())
continue;
SmallString<128> S;
int Fd;
if (auto EC = sys::fs::createTemporaryFile(
"lld-" + sys::path::filename(O->ParentName), ".obj", Fd, S))
fatal(EC, "cannot create a temporary file");
"lld-" + sys::path::filename(Obj->ParentName), ".obj", Fd, S))
fatal("cannot create a temporary file: " + EC.message());
raw_fd_ostream OS(Fd, /*shouldClose*/ true);
OS << O->MB.getBuffer();
OS << Obj->MB.getBuffer();
Temps.push_back(S.str());
Rsp += quote(S) + "\n";
}
@ -642,7 +684,7 @@ void LinkerDriver::invokeMSVC(opt::InputArgList &Args) {
break;
case OPT_opt:
if (!StringRef(Arg->getValue()).startswith("lld"))
Rsp += toString(Arg) + " ";
Rsp += toString(*Arg) + " ";
break;
case OPT_INPUT: {
if (Optional<StringRef> Path = doFindFile(Arg->getValue())) {
@ -654,12 +696,12 @@ void LinkerDriver::invokeMSVC(opt::InputArgList &Args) {
break;
}
default:
Rsp += toString(Arg) + "\n";
Rsp += toString(*Arg) + "\n";
}
}
std::vector<StringRef> ObjectFiles = Symtab.compileBitcodeFiles();
runMSVCLinker(Rsp, ObjectFiles);
std::vector<StringRef> ObjFiles = Symtab->compileBitcodeFiles();
runMSVCLinker(Rsp, ObjFiles);
for (StringRef Path : Temps)
sys::fs::remove(Path);
@ -696,6 +738,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
InitializeAllDisassemblers();
// Parse command line options.
ArgParser Parser;
opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1));
// Parse and evaluate -mllvm options.
@ -711,7 +754,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
StringRef S = Arg->getValue();
if (S.getAsInteger(10, N))
error(Arg->getSpelling() + " number expected, but got " + S);
Config->ErrorLimit = N;
errorHandler().ErrorLimit = N;
}
// Handle /help
@ -720,6 +763,18 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
return;
}
// Handle --version, which is an lld extension. This option is a bit odd
// because it doesn't start with "/", but we deliberately chose "--" to
// avoid conflict with /version and for compatibility with clang-cl.
if (Args.hasArg(OPT_dash_dash_version)) {
outs() << getLLDVersion() << "\n";
return;
}
// Handle /lldmingw early, since it can potentially affect how other
// options are handled.
Config->MinGW = Args.hasArg(OPT_lldmingw);
if (auto *Arg = Args.getLastArg(OPT_linkrepro)) {
SmallString<64> Path = StringRef(Arg->getValue());
sys::path::append(Path, "repro.tar");
@ -735,8 +790,8 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
}
}
if (!Args.hasArgNoClaim(OPT_INPUT)) {
if (Args.hasArgNoClaim(OPT_deffile))
if (!Args.hasArg(OPT_INPUT)) {
if (Args.hasArg(OPT_deffile))
Config->NoEntry = true;
else
fatal("no input files");
@ -748,6 +803,13 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
SearchPaths.push_back(Arg->getValue());
addLibSearchPaths();
// Handle /ignore
for (auto *Arg : Args.filtered(OPT_ignore)) {
if (StringRef(Arg->getValue()) == "4217")
Config->WarnLocallyDefinedImported = false;
// Other warning numbers are ignored.
}
// Handle /out
if (auto *Arg = Args.getLastArg(OPT_out))
Config->OutputFile = Arg->getValue();
@ -755,23 +817,26 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Handle /verbose
if (Args.hasArg(OPT_verbose))
Config->Verbose = true;
errorHandler().Verbose = Config->Verbose;
// Handle /force or /force:unresolved
if (Args.hasArg(OPT_force) || Args.hasArg(OPT_force_unresolved))
if (Args.hasArg(OPT_force, OPT_force_unresolved))
Config->Force = true;
// Handle /debug
if (Args.hasArg(OPT_debug)) {
if (Args.hasArg(OPT_debug, OPT_debug_dwarf, OPT_debug_ghash)) {
Config->Debug = true;
Config->DebugTypes =
Args.hasArg(OPT_debugtype)
? parseDebugType(Args.getLastArg(OPT_debugtype)->getValue())
: getDefaultDebugType(Args);
if (auto *Arg = Args.getLastArg(OPT_debugtype))
Config->DebugTypes = parseDebugType(Arg->getValue());
else
Config->DebugTypes = getDefaultDebugType(Args);
}
// Create a dummy PDB file to satisfy build sytem rules.
if (auto *Arg = Args.getLastArg(OPT_pdb))
Config->PDBPath = Arg->getValue();
// Handle /pdb
bool ShouldCreatePDB = Args.hasArg(OPT_debug, OPT_debug_ghash);
if (ShouldCreatePDB)
if (auto *Arg = Args.getLastArg(OPT_pdb))
Config->PDBPath = Arg->getValue();
// Handle /noentry
if (Args.hasArg(OPT_noentry)) {
@ -787,9 +852,18 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
Config->ManifestID = 2;
}
// Handle /fixed
if (Args.hasArg(OPT_fixed)) {
if (Args.hasArg(OPT_dynamicbase)) {
// Handle /dynamicbase and /fixed. We can't use hasFlag for /dynamicbase
// because we need to explicitly check whether that option or its inverse was
// present in the argument list in order to handle /fixed.
auto *DynamicBaseArg = Args.getLastArg(OPT_dynamicbase, OPT_dynamicbase_no);
if (DynamicBaseArg &&
DynamicBaseArg->getOption().getID() == OPT_dynamicbase_no)
Config->DynamicBase = false;
bool Fixed = Args.hasFlag(OPT_fixed, OPT_fixed_no, false);
if (Fixed) {
if (DynamicBaseArg &&
DynamicBaseArg->getOption().getID() == OPT_dynamicbase) {
error("/fixed must not be specified with /dynamicbase");
} else {
Config->Relocatable = false;
@ -797,8 +871,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
}
}
if (Args.hasArg(OPT_appcontainer))
Config->AppContainer = true;
// Handle /appcontainer
Config->AppContainer =
Args.hasFlag(OPT_appcontainer, OPT_appcontainer_no, false);
// Handle /machine
if (auto *Arg = Args.getLastArg(OPT_machine))
@ -846,54 +921,65 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
if (auto *Arg = Args.getLastArg(OPT_implib))
Config->Implib = Arg->getValue();
// Handle /opt
// Handle /opt.
bool DoGC = !Args.hasArg(OPT_debug);
unsigned ICFLevel = 1; // 0: off, 1: limited, 2: on
for (auto *Arg : Args.filtered(OPT_opt)) {
std::string Str = StringRef(Arg->getValue()).lower();
SmallVector<StringRef, 1> Vec;
StringRef(Str).split(Vec, ',');
for (StringRef S : Vec) {
if (S == "noref") {
Config->DoGC = false;
Config->DoICF = false;
continue;
}
if (S == "icf" || StringRef(S).startswith("icf=")) {
Config->DoICF = true;
continue;
}
if (S == "noicf") {
Config->DoICF = false;
continue;
}
if (StringRef(S).startswith("lldlto=")) {
StringRef OptLevel = StringRef(S).substr(7);
if (S == "ref") {
DoGC = true;
} else if (S == "noref") {
DoGC = false;
} else if (S == "icf" || S.startswith("icf=")) {
ICFLevel = 2;
} else if (S == "noicf") {
ICFLevel = 0;
} else if (S.startswith("lldlto=")) {
StringRef OptLevel = S.substr(7);
if (OptLevel.getAsInteger(10, Config->LTOOptLevel) ||
Config->LTOOptLevel > 3)
error("/opt:lldlto: invalid optimization level: " + OptLevel);
continue;
}
if (StringRef(S).startswith("lldltojobs=")) {
StringRef Jobs = StringRef(S).substr(11);
} else if (S.startswith("lldltojobs=")) {
StringRef Jobs = S.substr(11);
if (Jobs.getAsInteger(10, Config->LTOJobs) || Config->LTOJobs == 0)
error("/opt:lldltojobs: invalid job count: " + Jobs);
continue;
}
if (StringRef(S).startswith("lldltopartitions=")) {
StringRef N = StringRef(S).substr(17);
} else if (S.startswith("lldltopartitions=")) {
StringRef N = S.substr(17);
if (N.getAsInteger(10, Config->LTOPartitions) ||
Config->LTOPartitions == 0)
error("/opt:lldltopartitions: invalid partition count: " + N);
continue;
}
if (S != "ref" && S != "lbr" && S != "nolbr")
} else if (S != "lbr" && S != "nolbr")
error("/opt: unknown option: " + S);
}
}
// Limited ICF is enabled if GC is enabled and ICF was never mentioned
// explicitly.
// FIXME: LLD only implements "limited" ICF, i.e. it only merges identical
// code. If the user passes /OPT:ICF explicitly, LLD should merge identical
// comdat readonly data.
if (ICFLevel == 1 && !DoGC)
ICFLevel = 0;
Config->DoGC = DoGC;
Config->DoICF = ICFLevel > 0;
// Handle /lldsavetemps
if (Args.hasArg(OPT_lldsavetemps))
Config->SaveTemps = true;
// Handle /lldltocache
if (auto *Arg = Args.getLastArg(OPT_lldltocache))
Config->LTOCache = Arg->getValue();
// Handle /lldsavecachepolicy
if (auto *Arg = Args.getLastArg(OPT_lldltocachepolicy))
Config->LTOCachePolicy = CHECK(
parseCachePruningPolicy(Arg->getValue()),
Twine("/lldltocachepolicy: invalid cache policy: ") + Arg->getValue());
// Handle /failifmismatch
for (auto *Arg : Args.filtered(OPT_failifmismatch))
checkFailIfMismatch(Arg->getValue());
@ -906,6 +992,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
for (auto *Arg : Args.filtered(OPT_section))
parseSection(Arg->getValue());
// Handle /aligncomm
for (auto *Arg : Args.filtered(OPT_aligncomm))
parseAligncomm(Arg->getValue());
// Handle /manifestdependency. This enables /manifest unless /manifest:no is
// also passed.
if (auto *Arg = Args.getLastArg(OPT_manifestdependency)) {
@ -939,35 +1029,42 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
}
// Handle miscellaneous boolean flags.
if (Args.hasArg(OPT_allowisolation_no))
Config->AllowIsolation = false;
if (Args.hasArg(OPT_dynamicbase_no))
Config->DynamicBase = false;
if (Args.hasArg(OPT_nxcompat_no))
Config->NxCompat = false;
if (Args.hasArg(OPT_tsaware_no))
Config->TerminalServerAware = false;
if (Args.hasArg(OPT_nosymtab))
Config->WriteSymtab = false;
Config->AllowBind = Args.hasFlag(OPT_allowbind, OPT_allowbind_no, true);
Config->AllowIsolation =
Args.hasFlag(OPT_allowisolation, OPT_allowisolation_no, true);
Config->NxCompat = Args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true);
Config->TerminalServerAware = Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true);
Config->DebugDwarf = Args.hasArg(OPT_debug_dwarf);
Config->DebugGHashes = Args.hasArg(OPT_debug_ghash);
Config->MapFile = getMapFile(Args);
if (ErrorCount)
if (errorCount())
return;
bool WholeArchiveFlag = Args.hasArg(OPT_wholearchive_flag);
// Create a list of input files. Files can be given as arguments
// for /defaultlib option.
std::vector<MemoryBufferRef> MBs;
for (auto *Arg : Args.filtered(OPT_INPUT))
if (Optional<StringRef> Path = findFile(Arg->getValue()))
enqueuePath(*Path);
for (auto *Arg : Args.filtered(OPT_INPUT, OPT_wholearchive_file)) {
switch (Arg->getOption().getID()) {
case OPT_INPUT:
if (Optional<StringRef> Path = findFile(Arg->getValue()))
enqueuePath(*Path, WholeArchiveFlag);
break;
case OPT_wholearchive_file:
if (Optional<StringRef> Path = findFile(Arg->getValue()))
enqueuePath(*Path, true);
break;
}
}
for (auto *Arg : Args.filtered(OPT_defaultlib))
if (Optional<StringRef> Path = findLib(Arg->getValue()))
enqueuePath(*Path);
enqueuePath(*Path, false);
// Windows specific -- Create a resource file containing a manifest file.
if (Config->Manifest == Configuration::Embed)
addBuffer(createManifestRes());
addBuffer(createManifestRes(), false);
// Read all input files given via the command line.
run();
@ -983,7 +1080,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// WindowsResource to convert resource files to a regular COFF file,
// then link the resulting file normally.
if (!Resources.empty())
addBuffer(convertResToCOFF(Resources));
Symtab->addFile(make<ObjFile>(convertResToCOFF(Resources)));
if (Tar)
Tar->append("response.txt",
@ -991,28 +1088,36 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
ArrayRef<StringRef>(SearchPaths).slice(1)));
// Handle /largeaddressaware
if (Config->is64() || Args.hasArg(OPT_largeaddressaware))
Config->LargeAddressAware = true;
Config->LargeAddressAware = Args.hasFlag(
OPT_largeaddressaware, OPT_largeaddressaware_no, Config->is64());
// Handle /highentropyva
if (Config->is64() && !Args.hasArg(OPT_highentropyva_no))
Config->HighEntropyVA = true;
Config->HighEntropyVA =
Config->is64() &&
Args.hasFlag(OPT_highentropyva, OPT_highentropyva_no, true);
if (!Config->DynamicBase &&
(Config->Machine == ARMNT || Config->Machine == ARM64))
error("/dynamicbase:no is not compatible with " +
machineToStr(Config->Machine));
// Handle /entry and /dll
if (auto *Arg = Args.getLastArg(OPT_entry)) {
Config->Entry = addUndefined(mangle(Arg->getValue()));
} else if (Args.hasArg(OPT_dll) && !Config->NoEntry) {
StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12"
: "_DllMainCRTStartup";
Config->Entry = addUndefined(S);
} else if (!Config->NoEntry) {
// Windows specific -- If entry point name is not given, we need to
// infer that from user-defined entry name.
StringRef S = findDefaultEntry();
if (S.empty())
fatal("entry point must be defined");
Config->Entry = addUndefined(S);
log("Entry name inferred: " + S);
} else if (!Config->Entry && !Config->NoEntry) {
if (Args.hasArg(OPT_dll)) {
StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12"
: "_DllMainCRTStartup";
Config->Entry = addUndefined(S);
} else {
// Windows specific -- If entry point name is not given, we need to
// infer that from user-defined entry name.
StringRef S = findDefaultEntry();
if (S.empty())
fatal("entry point must be defined");
Config->Entry = addUndefined(S);
log("Entry name inferred: " + S);
}
}
// Handle /export
@ -1034,7 +1139,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
}
// Handle generation of import library from a def file.
if (!Args.hasArgNoClaim(OPT_INPUT)) {
if (!Args.hasArg(OPT_INPUT)) {
fixupExports();
createImportLibrary(/*AsLib=*/true);
return;
@ -1057,34 +1162,32 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
}
// Put the PDB next to the image if no /pdb flag was passed.
if (Config->Debug && Config->PDBPath.empty()) {
if (ShouldCreatePDB && Config->PDBPath.empty()) {
Config->PDBPath = Config->OutputFile;
sys::path::replace_extension(Config->PDBPath, ".pdb");
}
// Disable PDB generation if the user requested it.
if (Args.hasArg(OPT_nopdb))
Config->PDBPath = "";
// Set default image base if /base is not given.
if (Config->ImageBase == uint64_t(-1))
Config->ImageBase = getDefaultImageBase();
Symtab.addSynthetic(mangle("__ImageBase"), nullptr);
Symtab->addSynthetic(mangle("__ImageBase"), nullptr);
if (Config->Machine == I386) {
Symtab.addAbsolute("___safe_se_handler_table", 0);
Symtab.addAbsolute("___safe_se_handler_count", 0);
Symtab->addAbsolute("___safe_se_handler_table", 0);
Symtab->addAbsolute("___safe_se_handler_count", 0);
}
// We do not support /guard:cf (control flow protection) yet.
// Define CFG symbols anyway so that we can link MSVC 2015 CRT.
Symtab.addAbsolute(mangle("__guard_fids_count"), 0);
Symtab.addAbsolute(mangle("__guard_fids_table"), 0);
Symtab.addAbsolute(mangle("__guard_flags"), 0x100);
Symtab.addAbsolute(mangle("__guard_iat_count"), 0);
Symtab.addAbsolute(mangle("__guard_iat_table"), 0);
Symtab.addAbsolute(mangle("__guard_longjmp_count"), 0);
Symtab.addAbsolute(mangle("__guard_longjmp_table"), 0);
Symtab->addAbsolute(mangle("__guard_fids_count"), 0);
Symtab->addAbsolute(mangle("__guard_fids_table"), 0);
Symtab->addAbsolute(mangle("__guard_flags"), 0x100);
Symtab->addAbsolute(mangle("__guard_iat_count"), 0);
Symtab->addAbsolute(mangle("__guard_iat_table"), 0);
Symtab->addAbsolute(mangle("__guard_longjmp_count"), 0);
Symtab->addAbsolute(mangle("__guard_longjmp_table"), 0);
// Needed for MSVC 2017 15.5 CRT.
Symtab->addAbsolute(mangle("__enclave_config"), 0);
// This code may add new undefined symbols to the link, which may enqueue more
// symbol resolution tasks, so we need to continue executing tasks until we
@ -1093,7 +1196,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Windows specific -- if entry point is not found,
// search for its mangled names.
if (Config->Entry)
Symtab.mangleMaybe(Config->Entry);
Symtab->mangleMaybe(Config->Entry);
// Windows specific -- Make sure we resolve all dllexported symbols.
for (Export &E : Config->Exports) {
@ -1101,7 +1204,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
continue;
E.Sym = addUndefined(E.Name);
if (!E.Directives)
Symtab.mangleMaybe(E.Sym);
Symtab->mangleMaybe(E.Sym);
}
// Add weak aliases. Weak aliases is a mechanism to give remaining
@ -1109,20 +1212,20 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
for (auto Pair : Config->AlternateNames) {
StringRef From = Pair.first;
StringRef To = Pair.second;
Symbol *Sym = Symtab.find(From);
Symbol *Sym = Symtab->find(From);
if (!Sym)
continue;
if (auto *U = dyn_cast<Undefined>(Sym->body()))
if (auto *U = dyn_cast<Undefined>(Sym))
if (!U->WeakAlias)
U->WeakAlias = Symtab.addUndefined(To);
U->WeakAlias = Symtab->addUndefined(To);
}
// Windows specific -- if __load_config_used can be resolved, resolve it.
if (Symtab.findUnderscore("_load_config_used"))
if (Symtab->findUnderscore("_load_config_used"))
addUndefined(mangle("_load_config_used"));
} while (run());
if (ErrorCount)
if (errorCount())
return;
// If /msvclto is given, we use the MSVC linker to link LTO output files.
@ -1134,11 +1237,13 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Do LTO by compiling bitcode input files to a set of native COFF files then
// link those files.
Symtab.addCombinedLTOObjects();
Symtab->addCombinedLTOObjects();
run();
// Make sure we have resolved all symbols.
Symtab.reportRemainingUndefines();
Symtab->reportRemainingUndefines();
if (errorCount())
return;
// Windows specific -- if no /subsystem is given, we need to infer
// that from entry point name.
@ -1149,14 +1254,34 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
}
// Handle /safeseh.
if (Args.hasArg(OPT_safeseh)) {
for (ObjectFile *File : Symtab.ObjectFiles)
if (Args.hasFlag(OPT_safeseh, OPT_safeseh_no, false)) {
for (ObjFile *File : ObjFile::Instances)
if (!File->SEHCompat)
error("/safeseh: " + File->getName() + " is not compatible with SEH");
if (ErrorCount)
if (errorCount())
return;
}
// In MinGW, all symbols are automatically exported if no symbols
// are chosen to be exported.
if (Config->DLL && ((Config->MinGW && Config->Exports.empty()) ||
Args.hasArg(OPT_export_all_symbols))) {
AutoExporter Exporter;
Symtab->forEachSymbol([=](Symbol *S) {
auto *Def = dyn_cast<Defined>(S);
if (!Exporter.shouldExport(Def))
return;
Export E;
E.Name = Def->getName();
E.Sym = Def;
if (Def->getChunk() &&
!(Def->getChunk()->getPermissions() & IMAGE_SCN_MEM_EXECUTE))
E.Data = true;
Config->Exports.push_back(E);
});
}
// Windows specific -- when we are creating a .dll file, we also
// need to create a .lib file.
if (!Config->Exports.empty() || Config->DLL) {
@ -1165,20 +1290,45 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
assignExportOrdinals();
}
// Handle /output-def (MinGW specific).
if (auto *Arg = Args.getLastArg(OPT_output_def))
writeDefFile(Arg->getValue());
// Set extra alignment for .comm symbols
for (auto Pair : Config->AlignComm) {
StringRef Name = Pair.first;
uint32_t Alignment = Pair.second;
Symbol *Sym = Symtab->find(Name);
if (!Sym) {
warn("/aligncomm symbol " + Name + " not found");
continue;
}
auto *DC = dyn_cast<DefinedCommon>(Sym);
if (!DC) {
warn("/aligncomm symbol " + Name + " of wrong kind");
continue;
}
CommonChunk *C = DC->getChunk();
C->Alignment = std::max(C->Alignment, Alignment);
}
// Windows specific -- Create a side-by-side manifest file.
if (Config->Manifest == Configuration::SideBySide)
createSideBySideManifest();
// Identify unreferenced COMDAT sections.
if (Config->DoGC)
markLive(Symtab.getChunks());
markLive(Symtab->getChunks());
// Identify identical COMDAT sections to merge them.
if (Config->DoICF)
doICF(Symtab.getChunks());
doICF(Symtab->getChunks());
// Write the result.
writeResult(&Symtab);
writeResult();
}
} // namespace coff

View File

@ -12,10 +12,11 @@
#include "Config.h"
#include "SymbolTable.h"
#include "lld/Core/LLVM.h"
#include "lld/Core/Reproduce.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Reproduce.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Option/Arg.h"
@ -36,31 +37,39 @@ using llvm::COFF::WindowsSubsystem;
using llvm::Optional;
// Implemented in MarkLive.cpp.
void markLive(const std::vector<Chunk *> &Chunks);
void markLive(ArrayRef<Chunk *> Chunks);
// Implemented in ICF.cpp.
void doICF(const std::vector<Chunk *> &Chunks);
void doICF(ArrayRef<Chunk *> Chunks);
class COFFOptTable : public llvm::opt::OptTable {
public:
COFFOptTable();
};
class ArgParser {
public:
// Parses command line options.
llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args);
// Concatenate LINK environment varirable and given arguments and parse them.
// Concatenate LINK environment variable and given arguments and parse them.
llvm::opt::InputArgList parseLINK(std::vector<const char *> Args);
// Tokenizes a given string and then parses as command line options.
llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); }
// Tokenizes a given string and then parses as command line options in
// .drectve section.
llvm::opt::InputArgList parseDirectives(StringRef S);
private:
// Parses command line options.
llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args);
std::vector<const char *> tokenize(StringRef S);
std::vector<const char *> replaceResponseFiles(std::vector<const char *>);
COFFOptTable Table;
};
class LinkerDriver {
public:
LinkerDriver() { coff::Symtab = &Symtab; }
void link(llvm::ArrayRef<const char *> Args);
// Used by the resolver to parse .drectve section contents.
@ -70,10 +79,9 @@ public:
void enqueueArchiveMember(const Archive::Child &C, StringRef SymName,
StringRef ParentName);
private:
ArgParser Parser;
SymbolTable Symtab;
MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB);
private:
std::unique_ptr<llvm::TarWriter> Tar; // for /linkrepro
// Opens a file. Path has to be resolved already.
@ -93,7 +101,7 @@ private:
std::set<std::string> VisitedFiles;
std::set<std::string> VisitedLibs;
SymbolBody *addUndefined(StringRef Sym);
Symbol *addUndefined(StringRef Sym);
StringRef mangle(StringRef Sym);
// Windows specific -- "main" is not the only main function in Windows.
@ -108,12 +116,11 @@ private:
void invokeMSVC(llvm::opt::InputArgList &Args);
MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB);
void addBuffer(std::unique_ptr<MemoryBuffer> MB);
void addBuffer(std::unique_ptr<MemoryBuffer> MB, bool WholeArchive);
void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName,
StringRef ParentName);
void enqueuePath(StringRef Path);
void enqueuePath(StringRef Path, bool WholeArchive);
void enqueueTask(std::function<void()> Task);
bool run();
@ -121,6 +128,8 @@ private:
std::list<std::function<void()>> TaskQueue;
std::vector<StringRef> FilePaths;
std::vector<MemoryBufferRef> Resources;
llvm::StringSet<> DirectivesExports;
};
// Functions below this line are defined in DriverUtils.cpp.
@ -145,6 +154,7 @@ void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major,
void parseAlternateName(StringRef);
void parseMerge(StringRef);
void parseSection(StringRef);
void parseAligncomm(StringRef);
// Parses a string in the form of "EMBED[,=<integer>]|NO".
void parseManifest(StringRef Arg);
@ -167,10 +177,8 @@ void assignExportOrdinals();
// incompatible objects.
void checkFailIfMismatch(StringRef Arg);
// Convert Windows resource files (.res files) to a .obj file
// using cvtres.exe.
std::unique_ptr<MemoryBuffer>
convertResToCOFF(const std::vector<MemoryBufferRef> &MBs);
// Convert Windows resource files (.res files) to a .obj file.
MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> MBs);
void runMSVCLinker(std::string Rsp, ArrayRef<StringRef> Objects);

View File

@ -15,9 +15,9 @@
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "Memory.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/COFF.h"
@ -32,12 +32,11 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/WindowsManifest/WindowsManifestMerger.h"
#include <memory>
using namespace llvm::COFF;
using namespace llvm;
using llvm::cl::ExpandResponseFiles;
using llvm::cl::TokenizeWindowsCommandLine;
using llvm::sys::Process;
namespace lld {
@ -58,7 +57,7 @@ public:
void run() {
ErrorOr<std::string> ExeOrErr = sys::findProgramByName(Prog);
if (auto EC = ExeOrErr.getError())
fatal(EC, "unable to find " + Prog + " in PATH: ");
fatal("unable to find " + Prog + " in PATH: " + EC.message());
StringRef Exe = Saver.save(*ExeOrErr);
Args.insert(Args.begin(), Exe);
@ -221,6 +220,22 @@ void parseSection(StringRef S) {
Config->Section[Name] = parseSectionAttributes(Attrs);
}
// Parses /aligncomm option argument.
void parseAligncomm(StringRef S) {
StringRef Name, Align;
std::tie(Name, Align) = S.split(',');
if (Name.empty() || Align.empty()) {
error("/aligncomm: invalid argument: " + S);
return;
}
int V;
if (Align.getAsInteger(0, V)) {
error("/aligncomm: invalid argument: " + S);
return;
}
Config->AlignComm[Name] = std::max(Config->AlignComm[Name], 1 << V);
}
// Parses a string in the form of "EMBED[,=<integer>]|NO".
// Results are directly written to Config.
void parseManifest(StringRef Arg) {
@ -273,14 +288,14 @@ public:
TemporaryFile(StringRef Prefix, StringRef Extn, StringRef Contents = "") {
SmallString<128> S;
if (auto EC = sys::fs::createTemporaryFile("lld-" + Prefix, Extn, S))
fatal(EC, "cannot create a temporary file");
fatal("cannot create a temporary file: " + EC.message());
Path = S.str();
if (!Contents.empty()) {
std::error_code EC;
raw_fd_ostream OS(Path, EC, sys::fs::F_None);
if (EC)
fatal(EC, "failed to open " + Path);
fatal("failed to open " + Path + ": " + EC.message());
OS << Contents;
}
}
@ -302,7 +317,7 @@ public:
// is called (you cannot remove an opened file on Windows.)
std::unique_ptr<MemoryBuffer> getMemoryBuffer() {
// IsVolatileSize=true forces MemoryBuffer to not use mmap().
return check(MemoryBuffer::getFile(Path, /*FileSize=*/-1,
return CHECK(MemoryBuffer::getFile(Path, /*FileSize=*/-1,
/*RequiresNullTerminator=*/false,
/*IsVolatileSize=*/true),
"could not open " + Path);
@ -312,16 +327,9 @@ public:
};
}
// Create the default manifest file as a temporary file.
TemporaryFile createDefaultXml() {
// Create a temporary file.
TemporaryFile File("defaultxml", "manifest");
// Open the temporary file for writing.
std::error_code EC;
raw_fd_ostream OS(File.Path, EC, sys::fs::F_Text);
if (EC)
fatal(EC, "failed to open " + File.Path);
static std::string createDefaultXml() {
std::string Ret;
raw_string_ostream OS(Ret);
// Emit the XML. Note that we do *not* verify that the XML attributes are
// syntactically correct. This is intentional for link.exe compatibility.
@ -337,46 +345,77 @@ TemporaryFile createDefaultXml() {
<< " </requestedPrivileges>\n"
<< " </security>\n"
<< " </trustInfo>\n";
if (!Config->ManifestDependency.empty()) {
OS << " <dependency>\n"
<< " <dependentAssembly>\n"
<< " <assemblyIdentity " << Config->ManifestDependency << " />\n"
<< " </dependentAssembly>\n"
<< " </dependency>\n";
}
}
if (!Config->ManifestDependency.empty()) {
OS << " <dependency>\n"
<< " <dependentAssembly>\n"
<< " <assemblyIdentity " << Config->ManifestDependency << " />\n"
<< " </dependentAssembly>\n"
<< " </dependency>\n";
}
OS << "</assembly>\n";
return OS.str();
}
static std::string createManifestXmlWithInternalMt(StringRef DefaultXml) {
std::unique_ptr<MemoryBuffer> DefaultXmlCopy =
MemoryBuffer::getMemBufferCopy(DefaultXml);
windows_manifest::WindowsManifestMerger Merger;
if (auto E = Merger.merge(*DefaultXmlCopy.get()))
fatal("internal manifest tool failed on default xml: " +
toString(std::move(E)));
for (StringRef Filename : Config->ManifestInput) {
std::unique_ptr<MemoryBuffer> Manifest =
check(MemoryBuffer::getFile(Filename));
if (auto E = Merger.merge(*Manifest.get()))
fatal("internal manifest tool failed on file " + Filename + ": " +
toString(std::move(E)));
}
return Merger.getMergedManifest().get()->getBuffer();
}
static std::string createManifestXmlWithExternalMt(StringRef DefaultXml) {
// Create the default manifest file as a temporary file.
TemporaryFile Default("defaultxml", "manifest");
std::error_code EC;
raw_fd_ostream OS(Default.Path, EC, sys::fs::F_Text);
if (EC)
fatal("failed to open " + Default.Path + ": " + EC.message());
OS << DefaultXml;
OS.close();
return File;
}
static std::string readFile(StringRef Path) {
std::unique_ptr<MemoryBuffer> MB =
check(MemoryBuffer::getFile(Path), "could not open " + Path);
return MB->getBuffer();
}
static std::string createManifestXml() {
// Create the default manifest file.
TemporaryFile File1 = createDefaultXml();
if (Config->ManifestInput.empty())
return readFile(File1.Path);
// If manifest files are supplied by the user using /MANIFESTINPUT
// option, we need to merge them with the default manifest.
TemporaryFile File2("user", "manifest");
// Merge user-supplied manifests if they are given. Since libxml2 is not
// enabled, we must shell out to Microsoft's mt.exe tool.
TemporaryFile User("user", "manifest");
Executor E("mt.exe");
E.add("/manifest");
E.add(File1.Path);
E.add(Default.Path);
for (StringRef Filename : Config->ManifestInput) {
E.add("/manifest");
E.add(Filename);
}
E.add("/nologo");
E.add("/out:" + StringRef(File2.Path));
E.add("/out:" + StringRef(User.Path));
E.run();
return readFile(File2.Path);
return CHECK(MemoryBuffer::getFile(User.Path), "could not open " + User.Path)
.get()
->getBuffer();
}
static std::string createManifestXml() {
std::string DefaultXml = createDefaultXml();
if (Config->ManifestInput.empty())
return DefaultXml;
if (windows_manifest::isAvailable())
return createManifestXmlWithInternalMt(DefaultXml);
return createManifestXmlWithExternalMt(DefaultXml);
}
static std::unique_ptr<MemoryBuffer>
@ -386,7 +425,8 @@ createMemoryBufferForManifestRes(size_t ManifestSize) {
sizeof(object::WinResHeaderPrefix) + sizeof(object::WinResIDs) +
sizeof(object::WinResHeaderSuffix) + ManifestSize,
object::WIN_RES_DATA_ALIGNMENT);
return MemoryBuffer::getNewMemBuffer(ResSize);
return MemoryBuffer::getNewMemBuffer(ResSize,
Config->OutputFile + ".manifest.res");
}
static void writeResFileHeader(char *&Buf) {
@ -444,7 +484,7 @@ void createSideBySideManifest() {
std::error_code EC;
raw_fd_ostream Out(Path, EC, sys::fs::F_Text);
if (EC)
fatal(EC, "failed to create manifest");
fatal("failed to create manifest: " + EC.message());
Out << createManifestXml();
}
@ -459,12 +499,12 @@ Export parseExport(StringRef Arg) {
if (E.Name.empty())
goto err;
if (E.Name.find('=') != StringRef::npos) {
if (E.Name.contains('=')) {
StringRef X, Y;
std::tie(X, Y) = E.Name.split("=");
// If "<name>=<dllname>.<name>".
if (Y.find(".") != StringRef::npos) {
if (Y.contains(".")) {
E.Name = X;
E.ForwardTo = Y;
return E;
@ -534,7 +574,7 @@ void fixupExports() {
}
for (Export &E : Config->Exports) {
SymbolBody *Sym = E.Sym;
Symbol *Sym = E.Sym;
if (!E.ForwardTo.empty() || !Sym) {
E.SymbolName = E.Name;
} else {
@ -554,7 +594,7 @@ void fixupExports() {
}
// Uniquefy by name.
std::map<StringRef, Export *> Map;
DenseMap<StringRef, Export *> Map(Config->Exports.size());
std::vector<Export> V;
for (Export &E : Config->Exports) {
auto Pair = Map.insert(std::make_pair(E.ExportName, &E));
@ -601,10 +641,8 @@ void checkFailIfMismatch(StringRef Arg) {
Config->MustMatch[K] = V;
}
// Convert Windows resource files (.res files) to a .obj file
// using cvtres.exe.
std::unique_ptr<MemoryBuffer>
convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) {
// Convert Windows resource files (.res files) to a .obj file.
MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> MBs) {
object::WindowsResourceParser Parser;
for (MemoryBufferRef MB : MBs) {
@ -613,14 +651,17 @@ convertResToCOFF(const std::vector<MemoryBufferRef> &MBs) {
if (!RF)
fatal("cannot compile non-resource file as resource");
if (auto EC = Parser.parse(RF))
fatal(EC, "failed to parse .res file");
fatal("failed to parse .res file: " + toString(std::move(EC)));
}
Expected<std::unique_ptr<MemoryBuffer>> E =
llvm::object::writeWindowsResourceCOFF(Config->Machine, Parser);
if (!E)
fatal(errorToErrorCode(E.takeError()), "failed to write .res to COFF");
return std::move(E.get());
fatal("failed to write .res to COFF: " + toString(E.takeError()));
MemoryBufferRef MBRef = **E;
make<std::unique_ptr<MemoryBuffer>>(std::move(*E)); // take ownership
return MBRef;
}
// Run MSVC link.exe for given in-memory object files.
@ -651,7 +692,7 @@ void runMSVCLinker(std::string Rsp, ArrayRef<StringRef> Objects) {
#undef PREFIX
// Create table mapping all options defined in Options.td
static const llvm::opt::OptTable::Info infoTable[] = {
static const llvm::opt::OptTable::Info InfoTable[] = {
#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
{X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \
X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
@ -659,30 +700,65 @@ static const llvm::opt::OptTable::Info infoTable[] = {
#undef OPTION
};
class COFFOptTable : public llvm::opt::OptTable {
public:
COFFOptTable() : OptTable(infoTable, true) {}
};
COFFOptTable::COFFOptTable() : OptTable(InfoTable, true) {}
static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) {
if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) {
StringRef S = Arg->getValue();
if (S != "windows" && S != "posix")
error("invalid response file quoting: " + S);
if (S == "windows")
return cl::TokenizeWindowsCommandLine;
return cl::TokenizeGNUCommandLine;
}
// The COFF linker always defaults to Windows quoting.
return cl::TokenizeWindowsCommandLine;
}
// Parses a given list of options.
opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) {
// First, replace respnose files (@<file>-style options).
std::vector<const char *> Argv = replaceResponseFiles(ArgsArr);
opt::InputArgList ArgParser::parse(ArrayRef<const char *> Argv) {
// Make InputArgList from string vectors.
COFFOptTable Table;
unsigned MissingIndex;
unsigned MissingCount;
opt::InputArgList Args = Table.ParseArgs(Argv, MissingIndex, MissingCount);
SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size());
// We need to get the quoting style for response files before parsing all
// options so we parse here before and ignore all the options but
// --rsp-quoting.
opt::InputArgList Args = Table.ParseArgs(Vec, MissingIndex, MissingCount);
// Expand response files (arguments in the form of @<filename>)
// and then parse the argument again.
cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec);
Args = Table.ParseArgs(Vec, MissingIndex, MissingCount);
// Print the real command line if response files are expanded.
if (Args.hasArg(OPT_verbose) && ArgsArr.size() != Argv.size()) {
if (Args.hasArg(OPT_verbose) && Argv.size() != Vec.size()) {
std::string Msg = "Command line:";
for (const char *S : Argv)
for (const char *S : Vec)
Msg += " " + std::string(S);
message(Msg);
}
// Handle /WX early since it converts missing argument warnings to errors.
errorHandler().FatalWarnings = Args.hasFlag(OPT_WX, OPT_WX_no, false);
if (MissingCount)
fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument");
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
warn("ignoring unknown argument: " + Arg->getSpelling());
return Args;
}
// Tokenizes and parses a given string as command line in .drective section.
opt::InputArgList ArgParser::parseDirectives(StringRef S) {
// Make InputArgList from string vectors.
unsigned MissingIndex;
unsigned MissingCount;
opt::InputArgList Args =
Table.ParseArgs(tokenize(S), MissingIndex, MissingCount);
if (MissingCount)
fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument");
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
@ -693,17 +769,17 @@ opt::InputArgList ArgParser::parse(ArrayRef<const char *> ArgsArr) {
// link.exe has an interesting feature. If LINK or _LINK_ environment
// variables exist, their contents are handled as command line strings.
// So you can pass extra arguments using them.
opt::InputArgList ArgParser::parseLINK(std::vector<const char *> Args) {
opt::InputArgList ArgParser::parseLINK(std::vector<const char *> Argv) {
// Concatenate LINK env and command line arguments, and then parse them.
if (Optional<std::string> S = Process::GetEnv("LINK")) {
std::vector<const char *> V = tokenize(*S);
Args.insert(Args.begin(), V.begin(), V.end());
Argv.insert(Argv.begin(), V.begin(), V.end());
}
if (Optional<std::string> S = Process::GetEnv("_LINK_")) {
std::vector<const char *> V = tokenize(*S);
Args.insert(Args.begin(), V.begin(), V.end());
Argv.insert(Argv.begin(), V.begin(), V.end());
}
return parse(Args);
return parse(Argv);
}
std::vector<const char *> ArgParser::tokenize(StringRef S) {
@ -712,18 +788,8 @@ std::vector<const char *> ArgParser::tokenize(StringRef S) {
return std::vector<const char *>(Tokens.begin(), Tokens.end());
}
// Creates a new command line by replacing options starting with '@'
// character. '@<filename>' is replaced by the file's contents.
std::vector<const char *>
ArgParser::replaceResponseFiles(std::vector<const char *> Argv) {
SmallVector<const char *, 256> Tokens(Argv.data(), Argv.data() + Argv.size());
ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens);
return std::vector<const char *>(Tokens.begin(), Tokens.end());
}
void printHelp(const char *Argv0) {
COFFOptTable Table;
Table.PrintHelp(outs(), Argv0, "LLVM Linker", false);
COFFOptTable().PrintHelp(outs(), Argv0, "LLVM Linker", false);
}
} // namespace coff

View File

@ -1,115 +0,0 @@
//===- Error.cpp ----------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Error.h"
#include "Config.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include <mutex>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#endif
using namespace llvm;
namespace lld {
// The functions defined in this file can be called from multiple threads,
// but outs() or errs() are not thread-safe. We protect them using a mutex.
static std::mutex Mu;
namespace coff {
uint64_t ErrorCount;
raw_ostream *ErrorOS;
LLVM_ATTRIBUTE_NORETURN void exitLld(int Val) {
// Dealloc/destroy ManagedStatic variables before calling
// _exit(). In a non-LTO build, this is a nop. In an LTO
// build allows us to get the output of -time-passes.
llvm_shutdown();
outs().flush();
errs().flush();
_exit(Val);
}
static void print(StringRef S, raw_ostream::Colors C) {
*ErrorOS << Config->Argv[0] << ": ";
if (Config->ColorDiagnostics) {
ErrorOS->changeColor(C, true);
*ErrorOS << S;
ErrorOS->resetColor();
} else {
*ErrorOS << S;
}
}
void log(const Twine &Msg) {
if (Config->Verbose) {
std::lock_guard<std::mutex> Lock(Mu);
outs() << Config->Argv[0] << ": " << Msg << "\n";
outs().flush();
}
}
void message(const Twine &Msg) {
std::lock_guard<std::mutex> Lock(Mu);
outs() << Msg << "\n";
outs().flush();
}
void error(const Twine &Msg) {
std::lock_guard<std::mutex> Lock(Mu);
if (Config->ErrorLimit == 0 || ErrorCount < Config->ErrorLimit) {
print("error: ", raw_ostream::RED);
*ErrorOS << Msg << "\n";
} else if (ErrorCount == Config->ErrorLimit) {
print("error: ", raw_ostream::RED);
*ErrorOS << "too many errors emitted, stopping now"
<< " (use /ERRORLIMIT:0 to see all errors)\n";
if (Config->CanExitEarly)
exitLld(1);
}
++ErrorCount;
}
void fatal(const Twine &Msg) {
if (Config->ColorDiagnostics) {
errs().changeColor(raw_ostream::RED, /*bold=*/true);
errs() << "error: ";
errs().resetColor();
} else {
errs() << "error: ";
}
errs() << Msg << "\n";
exitLld(1);
}
void fatal(std::error_code EC, const Twine &Msg) {
fatal(Msg + ": " + EC.message());
}
void fatal(llvm::Error &Err, const Twine &Msg) {
fatal(errorToErrorCode(std::move(Err)), Msg);
}
void warn(const Twine &Msg) {
std::lock_guard<std::mutex> Lock(Mu);
print("warning: ", raw_ostream::MAGENTA);
*ErrorOS << Msg << "\n";
}
} // namespace coff
} // namespace lld

64
deps/lld/COFF/Error.h vendored
View File

@ -1,64 +0,0 @@
//===- Error.h --------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_ERROR_H
#define LLD_COFF_ERROR_H
#include "lld/Core/LLVM.h"
#include "llvm/Support/Error.h"
namespace lld {
namespace coff {
extern uint64_t ErrorCount;
extern llvm::raw_ostream *ErrorOS;
void log(const Twine &Msg);
void message(const Twine &Msg);
void warn(const Twine &Msg);
void error(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg);
LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix);
LLVM_ATTRIBUTE_NORETURN void fatal(llvm::Error &Err, const Twine &Prefix);
LLVM_ATTRIBUTE_NORETURN void exitLld(int Val);
template <class T> T check(ErrorOr<T> V, const Twine &Prefix) {
if (auto EC = V.getError())
fatal(EC, Prefix);
return std::move(*V);
}
template <class T> T check(Expected<T> E, const Twine &Prefix) {
if (llvm::Error Err = E.takeError())
fatal(Err, Prefix);
return std::move(*E);
}
template <class T> T check(ErrorOr<T> EO) {
if (!EO)
fatal(EO.getError().message());
return std::move(*EO);
}
template <class T> T check(Expected<T> E) {
if (!E) {
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(E.takeError(), OS, "");
OS.flush();
fatal(Buf);
}
return std::move(*E);
}
} // namespace coff
} // namespace lld
#endif

51
deps/lld/COFF/ICF.cpp vendored
View File

@ -19,8 +19,8 @@
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Error.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Parallel.h"
@ -36,7 +36,7 @@ namespace coff {
class ICF {
public:
void run(const std::vector<Chunk *> &V);
void run(ArrayRef<Chunk *> V);
private:
void segregate(size_t Begin, size_t End, bool Constant);
@ -61,12 +61,9 @@ private:
// Returns a hash value for S.
uint32_t ICF::getHash(SectionChunk *C) {
return hash_combine(C->getPermissions(),
hash_value(C->SectionName),
C->NumRelocs,
C->getAlign(),
uint32_t(C->Header->SizeOfRawData),
C->Checksum);
return hash_combine(C->getPermissions(), C->SectionName, C->NumRelocs,
C->Alignment, uint32_t(C->Header->SizeOfRawData),
C->Checksum, C->getContents());
}
// Returns true if section S is subject of ICF.
@ -76,12 +73,21 @@ uint32_t ICF::getHash(SectionChunk *C) {
// 2017) says that /opt:icf folds both functions and read-only data.
// Despite that, the MSVC linker folds only functions. We found
// a few instances of programs that are not safe for data merging.
// Therefore, we merge only functions just like the MSVC tool.
// Therefore, we merge only functions just like the MSVC tool. However, we merge
// identical .xdata sections, because the address of unwind information is
// insignificant to the user program and the Visual C++ linker does this.
bool ICF::isEligible(SectionChunk *C) {
bool Global = C->Sym && C->Sym->isExternal();
bool Executable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE;
// Non-comdat chunks, dead chunks, and writable chunks are not elegible.
bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
return C->isCOMDAT() && C->isLive() && Global && Executable && !Writable;
if (!C->isCOMDAT() || !C->isLive() || Writable)
return false;
// Code sections are eligible.
if (C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
return true;
// .xdata unwind info sections are eligble.
return C->getSectionName().split('$').first == ".xdata";
}
// Split an equivalence class into smaller classes.
@ -122,8 +128,8 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
R1.VirtualAddress != R2.VirtualAddress) {
return false;
}
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex);
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex);
Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
if (B1 == B2)
return true;
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
@ -137,19 +143,17 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
// Compare section attributes and contents.
return A->getPermissions() == B->getPermissions() &&
A->SectionName == B->SectionName &&
A->getAlign() == B->getAlign() &&
A->SectionName == B->SectionName && A->Alignment == B->Alignment &&
A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
A->Checksum == B->Checksum &&
A->getContents() == B->getContents();
A->Checksum == B->Checksum && A->getContents() == B->getContents();
}
// Compare "moving" part of two sections, namely relocation targets.
bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
// Compare relocations.
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex);
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex);
Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
if (B1 == B2)
return true;
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
@ -202,7 +206,7 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
// Merge identical COMDAT sections.
// Two sections are considered the same if their section headers,
// contents and relocations are all the same.
void ICF::run(const std::vector<Chunk *> &Vec) {
void ICF::run(ArrayRef<Chunk *> Vec) {
// Collect only mergeable sections and group by hash value.
uint32_t NextId = 1;
for (Chunk *C : Vec) {
@ -215,9 +219,10 @@ void ICF::run(const std::vector<Chunk *> &Vec) {
}
// Initially, we use hash values to partition sections.
for (SectionChunk *SC : Chunks)
for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) {
// Set MSB to 1 to avoid collisions with non-hash classs.
SC->Class[0] = getHash(SC) | (1 << 31);
});
// From now on, sections in Chunks are ordered so that sections in
// the same group are consecutive in the vector.
@ -252,7 +257,7 @@ void ICF::run(const std::vector<Chunk *> &Vec) {
}
// Entry point to ICF.
void doICF(const std::vector<Chunk *> &Chunks) { ICF().run(Chunks); }
void doICF(ArrayRef<Chunk *> Chunks) { ICF().run(Chunks); }
} // namespace coff
} // namespace lld

View File

@ -11,10 +11,10 @@
#include "Chunks.h"
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "Memory.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm-c/lto.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
@ -43,14 +43,18 @@ using llvm::support::ulittle32_t;
namespace lld {
namespace coff {
std::vector<ObjFile *> ObjFile::Instances;
std::vector<ImportFile *> ImportFile::Instances;
std::vector<BitcodeFile *> BitcodeFile::Instances;
/// Checks that Source is compatible with being a weak alias to Target.
/// If Source is Undefined and has no weak alias set, makes it a weak
/// alias to Target.
static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
SymbolBody *Source, SymbolBody *Target) {
Symbol *Source, Symbol *Target) {
if (auto *U = dyn_cast<Undefined>(Source)) {
if (U->WeakAlias && U->WeakAlias != Target)
Symtab->reportDuplicate(Source->symbol(), F);
Symtab->reportDuplicate(Source, F);
U->WeakAlias = Target;
}
}
@ -59,7 +63,7 @@ ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
File = check(Archive::create(MB), toString(this));
File = CHECK(Archive::create(MB), this);
// Read the symbol table to construct Lazy objects.
for (const Archive::Symbol &Sym : File->symbols())
@ -69,7 +73,7 @@ void ArchiveFile::parse() {
// Returns a buffer pointing to a member file containing a given symbol.
void ArchiveFile::addMember(const Archive::Symbol *Sym) {
const Archive::Child &C =
check(Sym->getMember(),
CHECK(Sym->getMember(),
"could not get the member for symbol " + Sym->getName());
// Return an empty buffer if we have already returned the same buffer.
@ -79,9 +83,28 @@ void ArchiveFile::addMember(const Archive::Symbol *Sym) {
Driver->enqueueArchiveMember(C, Sym->getName(), getName());
}
void ObjectFile::parse() {
std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) {
std::vector<MemoryBufferRef> V;
Error Err = Error::success();
for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
Archive::Child C =
CHECK(COrErr,
File->getFileName() + ": could not get the child of the archive");
MemoryBufferRef MBRef =
CHECK(C.getMemoryBufferRef(),
File->getFileName() +
": could not get the buffer for a child of the archive");
V.push_back(MBRef);
}
if (Err)
fatal(File->getFileName() +
": Archive::children failed: " + toString(std::move(Err)));
return V;
}
void ObjFile::parse() {
// Parse a memory buffer as a COFF file.
std::unique_ptr<Binary> Bin = check(createBinary(MB), toString(this));
std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this);
if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
Bin.release();
@ -93,114 +116,184 @@ void ObjectFile::parse() {
// Read section and symbol tables.
initializeChunks();
initializeSymbols();
initializeSEH();
}
void ObjectFile::initializeChunks() {
// We set SectionChunk pointers in the SparseChunks vector to this value
// temporarily to mark comdat sections as having an unknown resolution. As we
// walk the object file's symbol table, once we visit either a leader symbol or
// an associative section definition together with the parent comdat's leader,
// we set the pointer to either nullptr (to mark the section as discarded) or a
// valid SectionChunk for that section.
static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1);
void ObjFile::initializeChunks() {
uint32_t NumSections = COFFObj->getNumberOfSections();
Chunks.reserve(NumSections);
SparseChunks.resize(NumSections + 1);
for (uint32_t I = 1; I < NumSections + 1; ++I) {
const coff_section *Sec;
StringRef Name;
if (auto EC = COFFObj->getSection(I, Sec))
fatal(EC, "getSection failed: #" + Twine(I));
if (auto EC = COFFObj->getSectionName(Sec, Name))
fatal(EC, "getSectionName failed: #" + Twine(I));
if (Name == ".sxdata") {
SXData = Sec;
continue;
}
if (Name == ".drectve") {
ArrayRef<uint8_t> Data;
COFFObj->getSectionContents(Sec, Data);
Directives = std::string((const char *)Data.data(), Data.size());
continue;
}
fatal("getSection failed: #" + Twine(I) + ": " + EC.message());
// Object files may have DWARF debug info or MS CodeView debug info
// (or both).
//
// DWARF sections don't need any special handling from the perspective
// of the linker; they are just a data section containing relocations.
// We can just link them to complete debug info.
//
// CodeView needs a linker support. We need to interpret and debug
// info, and then write it to a separate .pdb file.
// Ignore debug info unless /debug is given.
if (!Config->Debug && Name.startswith(".debug"))
continue;
if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
continue;
auto *C = make<SectionChunk>(this, Sec);
// CodeView sections are stored to a different vector because they are not
// linked in the regular manner.
if (C->isCodeView())
DebugChunks.push_back(C);
if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
SparseChunks[I] = PendingComdat;
else
Chunks.push_back(C);
SparseChunks[I] = C;
SparseChunks[I] = readSection(I, nullptr);
}
}
void ObjectFile::initializeSymbols() {
uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
SymbolBodies.reserve(NumSymbols);
SparseSymbolBodies.resize(NumSymbols);
SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
const coff_aux_section_definition *Def) {
const coff_section *Sec;
StringRef Name;
if (auto EC = COFFObj->getSection(SectionNumber, Sec))
fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
if (auto EC = COFFObj->getSectionName(Sec, Name))
fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
EC.message());
if (Name == ".sxdata") {
ArrayRef<uint8_t> Data;
COFFObj->getSectionContents(Sec, Data);
if (Data.size() % 4 != 0)
fatal(".sxdata must be an array of symbol table indices");
SXData = {reinterpret_cast<const ulittle32_t *>(Data.data()),
Data.size() / 4};
return nullptr;
}
if (Name == ".drectve") {
ArrayRef<uint8_t> Data;
COFFObj->getSectionContents(Sec, Data);
Directives = std::string((const char *)Data.data(), Data.size());
return nullptr;
}
SmallVector<std::pair<SymbolBody *, uint32_t>, 8> WeakAliases;
int32_t LastSectionNumber = 0;
// Object files may have DWARF debug info or MS CodeView debug info
// (or both).
//
// DWARF sections don't need any special handling from the perspective
// of the linker; they are just a data section containing relocations.
// We can just link them to complete debug info.
//
// CodeView needs a linker support. We need to interpret and debug
// info, and then write it to a separate .pdb file.
// Ignore debug info unless /debug is given.
if (!Config->Debug && Name.startswith(".debug"))
return nullptr;
if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
return nullptr;
auto *C = make<SectionChunk>(this, Sec);
if (Def)
C->Checksum = Def->CheckSum;
// CodeView sections are stored to a different vector because they are not
// linked in the regular manner.
if (C->isCodeView())
DebugChunks.push_back(C);
else
Chunks.push_back(C);
return C;
}
void ObjFile::readAssociativeDefinition(
COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())];
// If the parent is pending, it probably means that its section definition
// appears after us in the symbol table. Leave the associated section as
// pending; we will handle it during the second pass in initializeSymbols().
if (Parent == PendingComdat)
return;
// Check whether the parent is prevailing. If it is, so are we, and we read
// the section; otherwise mark it as discarded.
int32_t SectionNumber = Sym.getSectionNumber();
if (Parent) {
SparseChunks[SectionNumber] = readSection(SectionNumber, Def);
if (SparseChunks[SectionNumber])
Parent->addAssociative(SparseChunks[SectionNumber]);
} else {
SparseChunks[SectionNumber] = nullptr;
}
}
Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
if (Sym.isExternal()) {
StringRef Name;
COFFObj->getSymbolName(Sym, Name);
if (SC)
return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
return Symtab->addUndefined(Name, this, false);
}
if (SC)
return make<DefinedRegular>(this, /*Name*/ "", false,
/*IsExternal*/ false, Sym.getGeneric(), SC);
return nullptr;
}
void ObjFile::initializeSymbols() {
uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
Symbols.resize(NumSymbols);
SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases;
std::vector<uint32_t> PendingIndexes;
PendingIndexes.reserve(NumSymbols);
std::vector<const coff_aux_section_definition *> ComdatDefs(
COFFObj->getNumberOfSections() + 1);
for (uint32_t I = 0; I < NumSymbols; ++I) {
// Get a COFFSymbolRef object.
ErrorOr<COFFSymbolRef> SymOrErr = COFFObj->getSymbol(I);
if (!SymOrErr)
fatal(SymOrErr.getError(), "broken object file: " + toString(this));
COFFSymbolRef Sym = *SymOrErr;
const void *AuxP = nullptr;
if (Sym.getNumberOfAuxSymbols())
AuxP = COFFObj->getSymbol(I + 1)->getRawPtr();
bool IsFirst = (LastSectionNumber != Sym.getSectionNumber());
SymbolBody *Body = nullptr;
if (Sym.isUndefined()) {
Body = createUndefined(Sym);
} else if (Sym.isWeakExternal()) {
Body = createUndefined(Sym);
uint32_t TagIndex =
static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex;
WeakAliases.emplace_back(Body, TagIndex);
COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
if (COFFSym.isUndefined()) {
Symbols[I] = createUndefined(COFFSym);
} else if (COFFSym.isWeakExternal()) {
Symbols[I] = createUndefined(COFFSym);
uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
WeakAliases.emplace_back(Symbols[I], TagIndex);
} else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) {
Symbols[I] = *OptSym;
} else {
Body = createDefined(Sym, AuxP, IsFirst);
// createDefined() returns None if a symbol belongs to a section that
// was pending at the point when the symbol was read. This can happen in
// two cases:
// 1) section definition symbol for a comdat leader;
// 2) symbol belongs to a comdat section associated with a section whose
// section definition symbol appears later in the symbol table.
// In both of these cases, we can expect the section to be resolved by
// the time we finish visiting the remaining symbols in the symbol
// table. So we postpone the handling of this symbol until that time.
PendingIndexes.push_back(I);
}
if (Body) {
SymbolBodies.push_back(Body);
SparseSymbolBodies[I] = Body;
}
I += Sym.getNumberOfAuxSymbols();
LastSectionNumber = Sym.getSectionNumber();
I += COFFSym.getNumberOfAuxSymbols();
}
for (uint32_t I : PendingIndexes) {
COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
if (auto *Def = Sym.getSectionDefinition())
if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
readAssociativeDefinition(Sym, Def);
Symbols[I] = createRegular(Sym);
}
for (auto &KV : WeakAliases) {
SymbolBody *Sym = KV.first;
Symbol *Sym = KV.first;
uint32_t Idx = KV.second;
checkAndSetWeakAlias(Symtab, this, Sym, SparseSymbolBodies[Idx]);
checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]);
}
}
SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) {
Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {
StringRef Name;
COFFObj->getSymbolName(Sym, Name);
return Symtab->addUndefined(Name, this, Sym.isWeakExternal())->body();
return Symtab->addUndefined(Name, this, Sym.isWeakExternal());
}
SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
bool IsFirst) {
Optional<Symbol *> ObjFile::createDefined(
COFFSymbolRef Sym,
std::vector<const coff_aux_section_definition *> &ComdatDefs) {
StringRef Name;
if (Sym.isCommon()) {
auto *C = make<CommonChunk>(Sym);
@ -208,7 +301,7 @@ SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
COFFObj->getSymbolName(Sym, Name);
Symbol *S =
Symtab->addCommon(this, Name, Sym.getValue(), Sym.getGeneric(), C);
return S->body();
return S;
}
if (Sym.isAbsolute()) {
COFFObj->getSymbolName(Sym, Name);
@ -222,7 +315,7 @@ SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
return nullptr;
}
if (Sym.isExternal())
return Symtab->addAbsolute(Name, Sym)->body();
return Symtab->addAbsolute(Name, Sym);
else
return make<DefinedAbsolute>(Name, Sym);
}
@ -239,54 +332,49 @@ SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
if ((uint32_t)SectionNumber >= SparseChunks.size())
fatal("broken object file: " + toString(this));
// Nothing else to do without a section chunk.
auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]);
if (!SC)
return nullptr;
// Handle section definitions
if (IsFirst && AuxP) {
auto *Aux = reinterpret_cast<const coff_aux_section_definition *>(AuxP);
if (Aux->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
if (auto *ParentSC = cast_or_null<SectionChunk>(
SparseChunks[Aux->getNumber(Sym.isBigObj())])) {
ParentSC->addAssociative(SC);
// If we already discarded the parent, discard the child.
if (ParentSC->isDiscarded())
SC->markDiscarded();
}
SC->Checksum = Aux->CheckSum;
// Handle comdat leader symbols.
if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
ComdatDefs[SectionNumber] = nullptr;
Symbol *Leader;
bool Prevailing;
if (Sym.isExternal()) {
COFFObj->getSymbolName(Sym, Name);
std::tie(Leader, Prevailing) =
Symtab->addComdat(this, Name, Sym.getGeneric());
} else {
Leader = make<DefinedRegular>(this, /*Name*/ "", false,
/*IsExternal*/ false, Sym.getGeneric());
Prevailing = true;
}
if (Prevailing) {
SectionChunk *C = readSection(SectionNumber, Def);
SparseChunks[SectionNumber] = C;
C->Sym = cast<DefinedRegular>(Leader);
cast<DefinedRegular>(Leader)->Data = &C->Repl;
} else {
SparseChunks[SectionNumber] = nullptr;
}
return Leader;
}
DefinedRegular *B;
if (Sym.isExternal()) {
COFFObj->getSymbolName(Sym, Name);
Symbol *S =
Symtab->addRegular(this, Name, SC->isCOMDAT(), Sym.getGeneric(), SC);
B = cast<DefinedRegular>(S->body());
} else
B = make<DefinedRegular>(this, /*Name*/ "", SC->isCOMDAT(),
/*IsExternal*/ false, Sym.getGeneric(), SC);
if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP)
SC->setSymbol(B);
// Read associative section definitions and prepare to handle the comdat
// leader symbol by setting the section's ComdatDefs pointer if we encounter a
// non-associative comdat.
if (SparseChunks[SectionNumber] == PendingComdat) {
if (auto *Def = Sym.getSectionDefinition()) {
if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
readAssociativeDefinition(Sym, Def);
else
ComdatDefs[SectionNumber] = Def;
}
}
return B;
if (SparseChunks[SectionNumber] == PendingComdat)
return None;
return createRegular(Sym);
}
void ObjectFile::initializeSEH() {
if (!SEHCompat || !SXData)
return;
ArrayRef<uint8_t> A;
COFFObj->getSectionContents(SXData, A);
if (A.size() % 4 != 0)
fatal(".sxdata must be an array of symbol table indices");
auto *I = reinterpret_cast<const ulittle32_t *>(A.data());
auto *E = reinterpret_cast<const ulittle32_t *>(A.data() + A.size());
for (; I != E; ++I)
SEHandlers.insert(SparseSymbolBodies[*I]);
}
MachineTypes ObjectFile::getMachineType() {
MachineTypes ObjFile::getMachineType() {
if (COFFObj)
return static_cast<MachineTypes>(COFFObj->getMachine());
return IMAGE_FILE_MACHINE_UNKNOWN;
@ -332,26 +420,27 @@ void ImportFile::parse() {
this->Hdr = Hdr;
ExternalName = ExtName;
ImpSym = cast<DefinedImportData>(
Symtab->addImportData(ImpName, this)->body());
ImpSym = Symtab->addImportData(ImpName, this);
if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
ConstSym =
cast<DefinedImportData>(Symtab->addImportData(Name, this)->body());
static_cast<void>(Symtab->addImportData(Name, this));
// If type is function, we need to create a thunk which jump to an
// address pointed by the __imp_ symbol. (This allows you to call
// DLL functions just like regular non-DLL functions.)
if (Hdr->getType() != llvm::COFF::IMPORT_CODE)
return;
ThunkSym = cast<DefinedImportThunk>(
Symtab->addImportThunk(Name, ImpSym, Hdr->Machine)->body());
if (Hdr->getType() == llvm::COFF::IMPORT_CODE)
ThunkSym = Symtab->addImportThunk(Name, ImpSym, Hdr->Machine);
}
void BitcodeFile::parse() {
Obj = check(lto::InputFile::create(MemoryBufferRef(
MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier()))));
std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size());
for (size_t I = 0; I != Obj->getComdatTable().size(); ++I)
Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I]));
for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) {
StringRef SymName = Saver.save(ObjSym.getName());
int ComdatIndex = ObjSym.getComdatIndex();
Symbol *Sym;
if (ObjSym.isUndefined()) {
Sym = Symtab->addUndefined(SymName, this, false);
@ -361,13 +450,19 @@ void BitcodeFile::parse() {
// Weak external.
Sym = Symtab->addUndefined(SymName, this, true);
std::string Fallback = ObjSym.getCOFFWeakExternalFallback();
SymbolBody *Alias = Symtab->addUndefined(Saver.save(Fallback));
checkAndSetWeakAlias(Symtab, this, Sym->body(), Alias);
Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback));
checkAndSetWeakAlias(Symtab, this, Sym, Alias);
} else if (ComdatIndex != -1) {
if (SymName == Obj->getComdatTable()[ComdatIndex])
Sym = Comdat[ComdatIndex].first;
else if (Comdat[ComdatIndex].second)
Sym = Symtab->addRegular(this, SymName);
else
Sym = Symtab->addUndefined(SymName, this, false);
} else {
bool IsCOMDAT = ObjSym.getComdatIndex() != -1;
Sym = Symtab->addRegular(this, SymName, IsCOMDAT);
Sym = Symtab->addRegular(this, SymName);
}
SymbolBodies.push_back(Sym->body());
SymbolBodies.push_back(Sym);
}
Directives = Obj->getCOFFLinkerOpts();
}
@ -398,14 +493,13 @@ static StringRef getBasename(StringRef Path) {
}
// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
std::string lld::toString(coff::InputFile *File) {
std::string lld::toString(const coff::InputFile *File) {
if (!File)
return "(internal)";
return "<internal>";
if (File->ParentName.empty())
return File->getName().lower();
return File->getName();
std::string Res =
(getBasename(File->ParentName) + "(" + getBasename(File->getName()) + ")")
.str();
return StringRef(Res).lower();
return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) +
")")
.str();
}

View File

@ -11,7 +11,7 @@
#define LLD_COFF_INPUT_FILES_H
#include "Config.h"
#include "lld/Core/LLVM.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/LTO/LTO.h"
@ -31,6 +31,8 @@ class DbiModuleDescriptorBuilder;
namespace lld {
namespace coff {
std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *File);
using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
using llvm::COFF::MachineTypes;
using llvm::object::Archive;
@ -45,8 +47,7 @@ class DefinedImportData;
class DefinedImportThunk;
class Lazy;
class SectionChunk;
struct Symbol;
class SymbolBody;
class Symbol;
class Undefined;
// The root class of input files.
@ -57,7 +58,7 @@ public:
virtual ~InputFile() {}
// Returns the filename.
StringRef getName() { return MB.getBufferIdentifier(); }
StringRef getName() const { return MB.getBufferIdentifier(); }
// Reads a file (the constructor doesn't do that).
virtual void parse() = 0;
@ -101,32 +102,34 @@ private:
};
// .obj or .o file. This may be a member of an archive file.
class ObjectFile : public InputFile {
class ObjFile : public InputFile {
public:
explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {}
explicit ObjFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
void parse() override;
MachineTypes getMachineType() override;
std::vector<Chunk *> &getChunks() { return Chunks; }
std::vector<SectionChunk *> &getDebugChunks() { return DebugChunks; }
std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; }
ArrayRef<Chunk *> getChunks() { return Chunks; }
ArrayRef<SectionChunk *> getDebugChunks() { return DebugChunks; }
ArrayRef<Symbol *> getSymbols() { return Symbols; }
// Returns a SymbolBody object for the SymbolIndex'th symbol in the
// Returns a Symbol object for the SymbolIndex'th symbol in the
// underlying object file.
SymbolBody *getSymbolBody(uint32_t SymbolIndex) {
return SparseSymbolBodies[SymbolIndex];
Symbol *getSymbol(uint32_t SymbolIndex) {
return Symbols[SymbolIndex];
}
// Returns the underying COFF file.
COFFObjectFile *getCOFFObj() { return COFFObj.get(); }
static std::vector<ObjFile *> Instances;
// True if this object file is compatible with SEH.
// COFF-specific and x86-only.
bool SEHCompat = false;
// The list of safe exception handlers listed in .sxdata section.
// The symbol table indexes of the safe exception handlers.
// COFF-specific and x86-only.
std::set<SymbolBody *> SEHandlers;
ArrayRef<llvm::support::ulittle32_t> SXData;
// Pointer to the PDB module descriptor builder. Various debug info records
// will reference object files by "module index", which is here. Things like
@ -137,13 +140,23 @@ public:
private:
void initializeChunks();
void initializeSymbols();
void initializeSEH();
SymbolBody *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst);
SymbolBody *createUndefined(COFFSymbolRef Sym);
SectionChunk *
readSection(uint32_t SectionNumber,
const llvm::object::coff_aux_section_definition *Def);
void readAssociativeDefinition(
COFFSymbolRef COFFSym,
const llvm::object::coff_aux_section_definition *Def);
llvm::Optional<Symbol *>
createDefined(COFFSymbolRef Sym,
std::vector<const llvm::object::coff_aux_section_definition *>
&ComdatDefs);
Symbol *createRegular(COFFSymbolRef Sym);
Symbol *createUndefined(COFFSymbolRef Sym);
std::unique_ptr<COFFObjectFile> COFFObj;
const coff_section *SXData = nullptr;
// List of all chunks defined by this file. This includes both section
// chunks and non-section chunks for common symbols.
@ -157,16 +170,13 @@ private:
// Nonexistent section indices are filled with null pointers.
// (Because section number is 1-based, the first slot is always a
// null pointer.)
std::vector<Chunk *> SparseChunks;
std::vector<SectionChunk *> SparseChunks;
// List of all symbols referenced or defined by this file.
std::vector<SymbolBody *> SymbolBodies;
// This vector contains the same symbols as SymbolBodies, but they
// are indexed such that you can get a SymbolBody by symbol
// This vector contains a list of all symbols defined or referenced by this
// file. They are indexed such that you can get a Symbol by symbol
// index. Nonexistent indices (which are occupied by auxiliary
// symbols in the real symbol table) are filled with null pointers.
std::vector<SymbolBody *> SparseSymbolBodies;
std::vector<Symbol *> Symbols;
};
// This type represents import library members that contain DLL names
@ -179,8 +189,9 @@ public:
static bool classof(const InputFile *F) { return F->kind() == ImportKind; }
static std::vector<ImportFile *> Instances;
DefinedImportData *ImpSym = nullptr;
DefinedImportData *ConstSym = nullptr;
DefinedImportThunk *ThunkSym = nullptr;
std::string DLLName;
@ -206,18 +217,19 @@ class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
std::vector<SymbolBody *> &getSymbols() { return SymbolBodies; }
ArrayRef<Symbol *> getSymbols() { return SymbolBodies; }
MachineTypes getMachineType() override;
static std::vector<BitcodeFile *> Instances;
std::unique_ptr<llvm::lto::InputFile> Obj;
private:
void parse() override;
std::vector<SymbolBody *> SymbolBodies;
std::vector<Symbol *> SymbolBodies;
};
} // namespace coff
std::string toString(coff::InputFile *File);
std::string toString(const coff::InputFile *File);
} // namespace lld
#endif

59
deps/lld/COFF/LTO.cpp vendored
View File

@ -9,15 +9,16 @@
#include "LTO.h"
#include "Config.h"
#include "Error.h"
#include "InputFiles.h"
#include "Symbols.h"
#include "lld/Core/TargetOptionsCommandFlags.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/TargetOptionsCommandFlags.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/LTO/Caching.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/SymbolicFile.h"
@ -48,10 +49,8 @@ static void diagnosticHandler(const DiagnosticInfo &DI) {
}
static void checkError(Error E) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) -> Error {
error(EIB.message());
return Error::success();
});
handleAllErrors(std::move(E),
[&](ErrorInfoBase &EIB) { error(EIB.message()); });
}
static void saveBuffer(StringRef Buffer, const Twine &Path) {
@ -65,7 +64,13 @@ static void saveBuffer(StringRef Buffer, const Twine &Path) {
static std::unique_ptr<lto::LTO> createLTO() {
lto::Config Conf;
Conf.Options = InitTargetOptionsFromCodeGenFlags();
Conf.RelocModel = Reloc::PIC_;
// Use static reloc model on 32-bit x86 because it usually results in more
// compact code, and because there are also known code generation bugs when
// using the PIC model (see PR34306).
if (Config->Machine == COFF::IMAGE_FILE_MACHINE_I386)
Conf.RelocModel = Reloc::Static;
else
Conf.RelocModel = Reloc::PIC_;
Conf.DisableVerify = true;
Conf.DiagHandler = diagnosticHandler;
Conf.OptLevel = Config->LTOOptLevel;
@ -83,20 +88,17 @@ BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {}
BitcodeCompiler::~BitcodeCompiler() = default;
static void undefine(Symbol *S) {
replaceBody<Undefined>(S, S->body()->getName());
}
static void undefine(Symbol *S) { replaceSymbol<Undefined>(S, S->getName()); }
void BitcodeCompiler::add(BitcodeFile &F) {
lto::InputFile &Obj = *F.Obj;
unsigned SymNum = 0;
std::vector<SymbolBody *> SymBodies = F.getSymbols();
std::vector<Symbol *> SymBodies = F.getSymbols();
std::vector<lto::SymbolResolution> Resols(SymBodies.size());
// Provide a resolution to the LTO API for each symbol.
for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) {
SymbolBody *B = SymBodies[SymNum];
Symbol *Sym = B->symbol();
Symbol *Sym = SymBodies[SymNum];
lto::SymbolResolution &R = Resols[SymNum];
++SymNum;
@ -105,7 +107,7 @@ void BitcodeCompiler::add(BitcodeFile &F) {
// flags an undefined in IR with a definition in ASM as prevailing.
// Once IRObjectFile is fixed to report only one symbol this hack can
// be removed.
R.Prevailing = !ObjSym.isUndefined() && B->getFile() == &F;
R.Prevailing = !ObjSym.isUndefined() && Sym->getFile() == &F;
R.VisibleToRegularObj = Sym->IsUsedInRegularObj;
if (R.Prevailing)
undefine(Sym);
@ -118,11 +120,27 @@ void BitcodeCompiler::add(BitcodeFile &F) {
std::vector<StringRef> BitcodeCompiler::compile() {
unsigned MaxTasks = LTOObj->getMaxTasks();
Buff.resize(MaxTasks);
Files.resize(MaxTasks);
checkError(LTOObj->run([&](size_t Task) {
return llvm::make_unique<lto::NativeObjectStream>(
llvm::make_unique<raw_svector_ostream>(Buff[Task]));
}));
// The /lldltocache option specifies the path to a directory in which to cache
// native object files for ThinLTO incremental builds. If a path was
// specified, configure LTO to use it as the cache directory.
lto::NativeObjectCache Cache;
if (!Config->LTOCache.empty())
Cache = check(
lto::localCache(Config->LTOCache,
[&](size_t Task, std::unique_ptr<MemoryBuffer> MB,
StringRef Path) { Files[Task] = std::move(MB); }));
checkError(LTOObj->run(
[&](size_t Task) {
return llvm::make_unique<lto::NativeObjectStream>(
llvm::make_unique<raw_svector_ostream>(Buff[Task]));
},
Cache));
if (!Config->LTOCache.empty())
pruneCache(Config->LTOCache, Config->LTOCachePolicy);
std::vector<StringRef> Ret;
for (unsigned I = 0; I != MaxTasks; ++I) {
@ -136,5 +154,10 @@ std::vector<StringRef> BitcodeCompiler::compile() {
}
Ret.emplace_back(Buff[I].data(), Buff[I].size());
}
for (std::unique_ptr<MemoryBuffer> &File : Files)
if (File)
Ret.push_back(File->getBuffer());
return Ret;
}

3
deps/lld/COFF/LTO.h vendored
View File

@ -21,7 +21,7 @@
#ifndef LLD_COFF_LTO_H
#define LLD_COFF_LTO_H
#include "lld/Core/LLVM.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/SmallString.h"
#include <memory>
#include <vector>
@ -49,6 +49,7 @@ public:
private:
std::unique_ptr<llvm::lto::LTO> LTOObj;
std::vector<SmallString<0>> Buff;
std::vector<std::unique_ptr<MemoryBuffer>> Files;
};
}
}

View File

@ -20,11 +20,11 @@
//===----------------------------------------------------------------------===//
#include "MapFile.h"
#include "Error.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/raw_ostream.h"
@ -48,9 +48,9 @@ static std::string indent(int Depth) { return std::string(Depth * 8, ' '); }
// Returns a list of all symbols that we want to print out.
static std::vector<DefinedRegular *> getSymbols() {
std::vector<DefinedRegular *> V;
for (coff::ObjectFile *File : Symtab->ObjectFiles)
for (SymbolBody *B : File->getSymbols())
if (auto *Sym = dyn_cast<DefinedRegular>(B))
for (ObjFile *File : ObjFile::Instances)
for (Symbol *B : File->getSymbols())
if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
if (Sym && !Sym->getCOFFSymbol().isSectionDefinition())
V.push_back(Sym);
return V;
@ -115,7 +115,7 @@ void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) {
if (!SC)
continue;
writeHeader(OS, SC->getRVA(), SC->getSize(), SC->getAlign());
writeHeader(OS, SC->getRVA(), SC->getSize(), SC->Alignment);
OS << indent(1) << SC->File->getName() << ":(" << SC->getSectionName()
<< ")\n";
for (DefinedRegular *Sym : SectionSyms[SC])

View File

@ -18,7 +18,7 @@ namespace coff {
// Set live bit on for each reachable chunk. Unmarked (unreachable)
// COMDAT chunks will be ignored by Writer, so they will be excluded
// from the final output.
void markLive(const std::vector<Chunk *> &Chunks) {
void markLive(ArrayRef<Chunk *> Chunks) {
// We build up a worklist of sections which have been marked as live. We only
// push into the worklist when we discover an unmarked section, and we mark
// as we push, so sections never appear twice in the list.
@ -37,7 +37,7 @@ void markLive(const std::vector<Chunk *> &Chunks) {
Worklist.push_back(C);
};
auto AddSym = [&](SymbolBody *B) {
auto AddSym = [&](Symbol *B) {
if (auto *Sym = dyn_cast<DefinedRegular>(B))
Enqueue(Sym->getChunk());
else if (auto *Sym = dyn_cast<DefinedImportData>(B))
@ -47,23 +47,17 @@ void markLive(const std::vector<Chunk *> &Chunks) {
};
// Add GC root chunks.
for (SymbolBody *B : Config->GCRoot)
for (Symbol *B : Config->GCRoot)
AddSym(B);
while (!Worklist.empty()) {
SectionChunk *SC = Worklist.pop_back_val();
// If this section was discarded, there are relocations referring to
// discarded sections. Ignore these sections to avoid crashing. They will be
// diagnosed during relocation processing.
if (SC->isDiscarded())
continue;
assert(SC->isLive() && "We mark as live when pushing onto the worklist!");
// Mark all symbols listed in the relocation table for this section.
for (SymbolBody *B : SC->symbols())
AddSym(B);
for (Symbol *B : SC->symbols())
if (B)
AddSym(B);
// Mark associative sections if any.
for (SectionChunk *C : SC->children())

View File

@ -1,52 +0,0 @@
//===- Memory.h -------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// See ELF/Memory.h
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_MEMORY_H
#define LLD_COFF_MEMORY_H
#include "llvm/Support/Allocator.h"
#include "llvm/Support/StringSaver.h"
#include <vector>
namespace lld {
namespace coff {
extern llvm::BumpPtrAllocator BAlloc;
extern llvm::StringSaver Saver;
struct SpecificAllocBase {
SpecificAllocBase() { Instances.push_back(this); }
virtual ~SpecificAllocBase() = default;
virtual void reset() = 0;
static std::vector<SpecificAllocBase *> Instances;
};
template <class T> struct SpecificAlloc : public SpecificAllocBase {
void reset() override { Alloc.DestroyAll(); }
llvm::SpecificBumpPtrAllocator<T> Alloc;
};
template <typename T, typename... U> T *make(U &&... Args) {
static SpecificAlloc<T> Alloc;
return new (Alloc.Alloc.Allocate()) T(std::forward<U>(Args)...);
}
inline void freeArena() {
for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances)
Alloc->reset();
BAlloc.Reset();
}
}
}
#endif

146
deps/lld/COFF/MinGW.cpp vendored Normal file
View File

@ -0,0 +1,146 @@
//===- MinGW.cpp ----------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "MinGW.h"
#include "SymbolTable.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
using namespace lld;
using namespace lld::coff;
using namespace llvm;
using namespace llvm::COFF;
AutoExporter::AutoExporter() {
if (Config->Machine == I386) {
ExcludeSymbols = {
"__NULL_IMPORT_DESCRIPTOR",
"__pei386_runtime_relocator",
"_do_pseudo_reloc",
"_impure_ptr",
"__impure_ptr",
"__fmode",
"_environ",
"___dso_handle",
// These are the MinGW names that differ from the standard
// ones (lacking an extra underscore).
"_DllMain@12",
"_DllEntryPoint@12",
"_DllMainCRTStartup@12",
};
} else {
ExcludeSymbols = {
"_NULL_IMPORT_DESCRIPTOR",
"_pei386_runtime_relocator",
"do_pseudo_reloc",
"impure_ptr",
"_impure_ptr",
"_fmode",
"environ",
"__dso_handle",
// These are the MinGW names that differ from the standard
// ones (lacking an extra underscore).
"DllMain",
"DllEntryPoint",
"DllMainCRTStartup",
};
}
ExcludeLibs = {
"libgcc",
"libgcc_s",
"libstdc++",
"libmingw32",
"libmingwex",
"libg2c",
"libsupc++",
"libobjc",
"libgcj",
"libclang_rt.builtins-aarch64",
"libclang_rt.builtins-arm",
"libclang_rt.builtins-i386",
"libclang_rt.builtins-x86_64",
"libc++",
"libc++abi",
"libunwind",
"libmsvcrt",
"libucrtbase",
};
ExcludeObjects = {
"crt0.o",
"crt1.o",
"crt1u.o",
"crt2.o",
"crt2u.o",
"dllcrt1.o",
"dllcrt2.o",
"gcrt0.o",
"gcrt1.o",
"gcrt2.o",
"crtbegin.o",
"crtend.o",
};
}
bool AutoExporter::shouldExport(Defined *Sym) const {
if (!Sym || !Sym->isLive() || !Sym->getChunk())
return false;
// Only allow the symbol kinds that make sense to export; in particular,
// disallow import symbols.
if (!isa<DefinedRegular>(Sym) && !isa<DefinedCommon>(Sym))
return false;
if (ExcludeSymbols.count(Sym->getName()))
return false;
// Don't export anything that looks like an import symbol (which also can be
// a manually defined data symbol with such a name).
if (Sym->getName().startswith("__imp_"))
return false;
// If a corresponding __imp_ symbol exists and is defined, don't export it.
if (Symtab->find(("__imp_" + Sym->getName()).str()))
return false;
// Check that file is non-null before dereferencing it, symbols not
// originating in regular object files probably shouldn't be exported.
if (!Sym->getFile())
return false;
StringRef LibName = sys::path::filename(Sym->getFile()->ParentName);
// Drop the file extension.
LibName = LibName.substr(0, LibName.rfind('.'));
if (!LibName.empty())
return !ExcludeLibs.count(LibName);
StringRef FileName = sys::path::filename(Sym->getFile()->getName());
return !ExcludeObjects.count(FileName);
}
void coff::writeDefFile(StringRef Name) {
std::error_code EC;
raw_fd_ostream OS(Name, EC, sys::fs::F_None);
if (EC)
fatal("cannot open " + Name + ": " + EC.message());
OS << "EXPORTS\n";
for (Export &E : Config->Exports) {
OS << " " << E.ExportName << " "
<< "@" << E.Ordinal;
if (auto *Def = dyn_cast_or_null<Defined>(E.Sym)) {
if (Def && Def->getChunk() &&
!(Def->getChunk()->getPermissions() & IMAGE_SCN_MEM_EXECUTE))
OS << " DATA";
}
OS << "\n";
}
}

38
deps/lld/COFF/MinGW.h vendored Normal file
View File

@ -0,0 +1,38 @@
//===- MinGW.h --------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_COFF_MINGW_H
#define LLD_COFF_MINGW_H
#include "Config.h"
#include "Symbols.h"
#include "lld/Common/LLVM.h"
namespace lld {
namespace coff {
// Logic for deciding what symbols to export, when exporting all
// symbols for MinGW.
class AutoExporter {
public:
AutoExporter();
llvm::StringSet<> ExcludeSymbols;
llvm::StringSet<> ExcludeLibs;
llvm::StringSet<> ExcludeObjects;
bool shouldExport(Defined *Sym) const;
};
void writeDefFile(StringRef Name);
} // namespace coff
} // namespace lld
#endif

View File

@ -9,13 +9,15 @@ class F<string name> : Flag<["/", "-", "-?"], name>;
class P<string name, string help> :
Joined<["/", "-", "-?"], name#":">, HelpText<help>;
// Boolean flag suffixed by ":no".
multiclass B<string name, string help> {
def "" : F<name>;
def _no : F<name#":no">, HelpText<help>;
// Boolean flag which can be suffixed by ":no". Using it unsuffixed turns the
// flag on and using it suffixed by ":no" turns it off.
multiclass B<string name, string help_on, string help_off> {
def "" : F<name>, HelpText<help_on>;
def _no : F<name#":no">, HelpText<help_off>;
}
def align : P<"align", "Section alignment">;
def aligncomm : P<"aligncomm", "Set common symbol alignment">;
def alternatename : P<"alternatename", "Define weak alias">;
def base : P<"base", "Base address of the program">;
def defaultlib : P<"defaultlib", "Add the library to the list of input files">;
@ -27,9 +29,12 @@ def export : P<"export", "Export a function">;
// No help text because /failifmismatch is not intended to be used by the user.
def failifmismatch : P<"failifmismatch", "">;
def heap : P<"heap", "Size of the heap">;
def ignore : P<"ignore", "Specify warning codes to ignore">;
def implib : P<"implib", "Import library name">;
def libpath : P<"libpath", "Additional library search path">;
def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">;
def lldltocache : P<"lldltocache", "Path to ThinLTO cached object file directory">;
def lldltocachepolicy : P<"lldltocachepolicy", "Pruning policy for the ThinLTO cache">;
def lldsavetemps : F<"lldsavetemps">,
HelpText<"Save temporary files instead of deleting them">;
def machine : P<"machine", "Specify target platform">;
@ -44,6 +49,7 @@ def stack : P<"stack", "Size of the stack">;
def stub : P<"stub", "Specify DOS stub file">;
def subsystem : P<"subsystem", "Specify subsystem">;
def version : P<"version", "Specify a version number in the PE header">;
def wholearchive_file : P<"wholearchive", "Include all object files from this archive">;
def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>;
@ -75,31 +81,53 @@ def profile : F<"profile">;
def swaprun_cd : F<"swaprun:cd">;
def swaprun_net : F<"swaprun:net">;
def verbose : F<"verbose">;
def wholearchive_flag : F<"wholearchive">;
def force : F<"force">,
HelpText<"Allow undefined symbols when creating executables">;
def force_unresolved : F<"force:unresolved">;
defm WX : B<"WX", "Treat warnings as errors", "Don't treat warnings as errors">;
defm allowbind: B<"allowbind", "Disable DLL binding">;
defm allowisolation : B<"allowisolation", "Set NO_ISOLATION bit">;
defm allowbind : B<"allowbind", "Enable DLL binding (default)",
"Disable DLL binding">;
defm allowisolation : B<"allowisolation", "Enable DLL isolation (default)",
"Disable DLL isolation">;
defm appcontainer : B<"appcontainer",
"Image can only be run in an app container">;
defm dynamicbase : B<"dynamicbase",
"Disable address space layout randomization">;
defm fixed : B<"fixed", "Enable base relocations">;
defm highentropyva : B<"highentropyva", "Set HIGH_ENTROPY_VA bit">;
defm largeaddressaware : B<"largeaddressaware", "Disable large addresses">;
defm nxcompat : B<"nxcompat", "Disable data execution provention">;
defm safeseh : B<"safeseh", "Produce an image with Safe Exception Handler">;
defm tsaware : B<"tsaware", "Create non-Terminal Server aware executable">;
"Image can only be run in an app container",
"Image can run outside an app container (default)">;
defm dynamicbase : B<"dynamicbase", "Enable ASLR (default unless /fixed)",
"Disable ASLR (default when /fixed)">;
defm fixed : B<"fixed", "Disable base relocations",
"Enable base relocations (default)">;
defm highentropyva : B<"highentropyva",
"Enable 64-bit ASLR (default on 64-bit)",
"Disable 64-bit ASLR">;
defm largeaddressaware : B<"largeaddressaware",
"Enable large addresses (default on 64-bit)",
"Disable large addresses (default on 32-bit)">;
defm nxcompat : B<"nxcompat", "Enable data execution prevention (default)",
"Disable data execution provention">;
defm safeseh : B<"safeseh",
"Produce an image with Safe Exception Handler (only for x86)",
"Don't produce an image with Safe Exception Handler">;
defm tsaware : B<"tsaware",
"Create Terminal Server aware executable (default)",
"Create non-Terminal Server aware executable">;
def help : F<"help">;
def help_q : Flag<["/?", "-?"], "">, Alias<help>;
// LLD extensions
def nopdb : F<"nopdb">, HelpText<"Disable PDB generation for DWARF users">;
def nosymtab : F<"nosymtab">;
def debug_ghash : F<"debug:ghash">;
def debug_dwarf : F<"debug:dwarf">;
def export_all_symbols : F<"export-all-symbols">;
def lldmingw : F<"lldmingw">;
def msvclto : F<"msvclto">;
def output_def : Joined<["/", "-"], "output-def:">;
def rsp_quoting : Joined<["--"], "rsp-quoting=">,
HelpText<"Quoting style for response files, 'windows' (default) or 'posix'">;
def dash_dash_version : Flag<["--"], "version">,
HelpText<"Print version information">;
// Flags for debugging
def lldmap : F<"lldmap">;
@ -128,12 +156,10 @@ def fastfail : F<"fastfail">;
def delay : QF<"delay">;
def errorreport : QF<"errorreport">;
def idlout : QF<"idlout">;
def ignore : QF<"ignore">;
def maxilksize : QF<"maxilksize">;
def natvis : QF<"natvis">;
def pdbaltpath : QF<"pdbaltpath">;
def tlbid : QF<"tlbid">;
def tlbout : QF<"tlbout">;
def verbose_all : QF<"verbose">;
def guardsym : QF<"guardsym">;
defm wx : QB<"wx">;

598
deps/lld/COFF/PDB.cpp vendored
View File

@ -10,37 +10,44 @@
#include "PDB.h"
#include "Chunks.h"
#include "Config.h"
#include "Error.h"
#include "Driver.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/DebugInfo/CodeView/CVDebugRecord.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
#include "llvm/DebugInfo/PDB/PDB.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/JamCRC.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
#include <memory>
@ -67,16 +74,16 @@ class PDBLinker {
public:
PDBLinker(SymbolTable *Symtab)
: Alloc(), Symtab(Symtab), Builder(Alloc), TypeTable(Alloc),
IDTable(Alloc) {}
IDTable(Alloc), GlobalTypeTable(Alloc), GlobalIDTable(Alloc) {}
/// Emit the basic PDB structure: initial streams, headers, etc.
void initialize(const llvm::codeview::DebugInfo *DI);
void initialize(const llvm::codeview::DebugInfo &BuildId);
/// Link CodeView from each object file in the symbol table into the PDB.
void addObjectsToPDB();
/// Link CodeView from a single object file into the PDB.
void addObjectFile(ObjectFile *File);
void addObjFile(ObjFile *File);
/// Produce a mapping from the type and item indices used in the object
/// file to those in the destination PDB.
@ -89,13 +96,18 @@ public:
/// If the object does not use a type server PDB (compiled with /Z7), we merge
/// all the type and item records from the .debug$S stream and fill in the
/// caller-provided ObjectIndexMap.
const CVIndexMap &mergeDebugT(ObjectFile *File, CVIndexMap &ObjectIndexMap);
Expected<const CVIndexMap&> mergeDebugT(ObjFile *File,
CVIndexMap &ObjectIndexMap);
const CVIndexMap &maybeMergeTypeServerPDB(ObjectFile *File,
TypeServer2Record &TS);
Expected<const CVIndexMap&> maybeMergeTypeServerPDB(ObjFile *File,
TypeServer2Record &TS);
/// Add the section map and section contributions to the PDB.
void addSections(ArrayRef<uint8_t> SectionTable);
void addSections(ArrayRef<OutputSection *> OutputSections,
ArrayRef<uint8_t> SectionTable);
void addSectionContrib(pdb::DbiModuleDescriptorBuilder &LinkerModule,
OutputSection *OS, Chunk *C);
/// Write the PDB to disk.
void commit();
@ -108,10 +120,16 @@ private:
pdb::PDBFileBuilder Builder;
/// Type records that will go into the PDB TPI stream.
TypeTableBuilder TypeTable;
MergingTypeTableBuilder TypeTable;
/// Item records that will go into the PDB IPI stream.
TypeTableBuilder IDTable;
MergingTypeTableBuilder IDTable;
/// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH)
GlobalTypeTableBuilder GlobalTypeTable;
/// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH)
GlobalTypeTableBuilder GlobalIDTable;
/// PDBs use a single global string table for filenames in the file checksum
/// table.
@ -123,18 +141,14 @@ private:
/// Type index mappings of type server PDBs that we've loaded so far.
std::map<GUID, CVIndexMap> TypeServerIndexMappings;
/// List of TypeServer PDBs which cannot be loaded.
/// Cached to prevent repeated load attempts.
std::set<GUID> MissingTypeServerPDBs;
};
}
// Returns a list of all SectionChunks.
static void addSectionContribs(SymbolTable *Symtab,
pdb::DbiStreamBuilder &DbiBuilder) {
for (Chunk *C : Symtab->getChunks())
if (auto *SC = dyn_cast<SectionChunk>(C))
DbiBuilder.addSectionContrib(SC->File->ModuleDBI, SC->Header);
}
static SectionChunk *findByName(std::vector<SectionChunk *> &Sections,
static SectionChunk *findByName(ArrayRef<SectionChunk *> Sections,
StringRef Name) {
for (SectionChunk *C : Sections)
if (C->getSectionName() == Name)
@ -152,21 +166,58 @@ static ArrayRef<uint8_t> consumeDebugMagic(ArrayRef<uint8_t> Data,
return Data.slice(4);
}
static ArrayRef<uint8_t> getDebugSection(ObjectFile *File, StringRef SecName) {
static ArrayRef<uint8_t> getDebugSection(ObjFile *File, StringRef SecName) {
if (SectionChunk *Sec = findByName(File->getDebugChunks(), SecName))
return consumeDebugMagic(Sec->getContents(), SecName);
return {};
}
// A COFF .debug$H section is currently a clang extension. This function checks
// if a .debug$H section is in a format that we expect / understand, so that we
// can ignore any sections which are coincidentally also named .debug$H but do
// not contain a format we recognize.
static bool canUseDebugH(ArrayRef<uint8_t> DebugH) {
if (DebugH.size() < sizeof(object::debug_h_header))
return false;
auto *Header =
reinterpret_cast<const object::debug_h_header *>(DebugH.data());
DebugH = DebugH.drop_front(sizeof(object::debug_h_header));
return Header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
Header->Version == 0 &&
Header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1) &&
(DebugH.size() % 20 == 0);
}
static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *File) {
SectionChunk *Sec = findByName(File->getDebugChunks(), ".debug$H");
if (!Sec)
return llvm::None;
ArrayRef<uint8_t> Contents = Sec->getContents();
if (!canUseDebugH(Contents))
return None;
return Contents;
}
static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> DebugH) {
assert(canUseDebugH(DebugH));
DebugH = DebugH.drop_front(sizeof(object::debug_h_header));
uint32_t Count = DebugH.size() / sizeof(GloballyHashedType);
return {reinterpret_cast<const GloballyHashedType *>(DebugH.data()), Count};
}
static void addTypeInfo(pdb::TpiStreamBuilder &TpiBuilder,
TypeTableBuilder &TypeTable) {
TypeCollection &TypeTable) {
// Start the TPI or IPI stream header.
TpiBuilder.setVersionHeader(pdb::PdbTpiV80);
// Flatten the in memory type table.
TypeTable.ForEachRecord([&](TypeIndex TI, ArrayRef<uint8_t> Rec) {
// FIXME: Hash types.
TpiBuilder.addTypeRecord(Rec, None);
// Flatten the in memory type table and hash each type.
TypeTable.ForEachRecord([&](TypeIndex TI, const CVType &Type) {
auto Hash = pdb::hashTypeRecord(Type);
if (auto E = Hash.takeError())
fatal("type hashing error");
TpiBuilder.addTypeRecord(Type.RecordData, *Hash);
});
}
@ -180,12 +231,12 @@ maybeReadTypeServerRecord(CVTypeArray &Types) {
return None;
TypeServer2Record TS;
if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type), TS))
fatal(EC, "error reading type server record");
fatal("error reading type server record: " + toString(std::move(EC)));
return std::move(TS);
}
const CVIndexMap &PDBLinker::mergeDebugT(ObjectFile *File,
CVIndexMap &ObjectIndexMap) {
Expected<const CVIndexMap&> PDBLinker::mergeDebugT(ObjFile *File,
CVIndexMap &ObjectIndexMap) {
ArrayRef<uint8_t> Data = getDebugSection(File, ".debug$T");
if (Data.empty())
return ObjectIndexMap;
@ -194,7 +245,7 @@ const CVIndexMap &PDBLinker::mergeDebugT(ObjectFile *File,
CVTypeArray Types;
BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readArray(Types, Reader.getLength()))
fatal(EC, "Reader::readArray failed");
fatal("Reader::readArray failed: " + toString(std::move(EC)));
// Look through type servers. If we've already seen this type server, don't
// merge any type information.
@ -203,17 +254,41 @@ const CVIndexMap &PDBLinker::mergeDebugT(ObjectFile *File,
// This is a /Z7 object. Fill in the temporary, caller-provided
// ObjectIndexMap.
if (auto Err = mergeTypeAndIdRecords(IDTable, TypeTable,
ObjectIndexMap.TPIMap, Types))
fatal(Err, "codeview::mergeTypeAndIdRecords failed");
if (Config->DebugGHashes) {
ArrayRef<GloballyHashedType> Hashes;
std::vector<GloballyHashedType> OwnedHashes;
if (Optional<ArrayRef<uint8_t>> DebugH = getDebugH(File))
Hashes = getHashesFromDebugH(*DebugH);
else {
OwnedHashes = GloballyHashedType::hashTypes(Types);
Hashes = OwnedHashes;
}
if (auto Err = mergeTypeAndIdRecords(GlobalIDTable, GlobalTypeTable,
ObjectIndexMap.TPIMap, Types, Hashes))
fatal("codeview::mergeTypeAndIdRecords failed: " +
toString(std::move(Err)));
} else {
if (auto Err = mergeTypeAndIdRecords(IDTable, TypeTable,
ObjectIndexMap.TPIMap, Types))
fatal("codeview::mergeTypeAndIdRecords failed: " +
toString(std::move(Err)));
}
return ObjectIndexMap;
}
static Expected<std::unique_ptr<pdb::NativeSession>>
tryToLoadPDB(const GUID &GuidFromObj, StringRef TSPath) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(
TSPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
if (!MBOrErr)
return errorCodeToError(MBOrErr.getError());
std::unique_ptr<pdb::IPDBSession> ThisSession;
if (auto EC =
pdb::loadDataForPDB(pdb::PDB_ReaderType::Native, TSPath, ThisSession))
if (auto EC = pdb::NativeSession::createFromPdb(
MemoryBuffer::getMemBuffer(Driver->takeBuffer(std::move(*MBOrErr)),
/*RequiresNullTerminator=*/false),
ThisSession))
return std::move(EC);
std::unique_ptr<pdb::NativeSession> NS(
@ -234,11 +309,19 @@ tryToLoadPDB(const GUID &GuidFromObj, StringRef TSPath) {
return std::move(NS);
}
const CVIndexMap &PDBLinker::maybeMergeTypeServerPDB(ObjectFile *File,
TypeServer2Record &TS) {
// First, check if we already loaded a PDB with this GUID. Return the type
Expected<const CVIndexMap&> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File,
TypeServer2Record &TS) {
const GUID& TSId = TS.getGuid();
StringRef TSPath = TS.getName();
// First, check if the PDB has previously failed to load.
if (MissingTypeServerPDBs.count(TSId))
return make_error<pdb::GenericError>(
pdb::generic_error_code::type_server_not_found, TSPath);
// Second, check if we already loaded a PDB with this GUID. Return the type
// index mapping if we have it.
auto Insertion = TypeServerIndexMappings.insert({TS.getGuid(), CVIndexMap()});
auto Insertion = TypeServerIndexMappings.insert({TSId, CVIndexMap()});
CVIndexMap &IndexMap = Insertion.first->second;
if (!Insertion.second)
return IndexMap;
@ -249,34 +332,60 @@ const CVIndexMap &PDBLinker::maybeMergeTypeServerPDB(ObjectFile *File,
// Check for a PDB at:
// 1. The given file path
// 2. Next to the object file or archive file
auto ExpectedSession = tryToLoadPDB(TS.getGuid(), TS.getName());
auto ExpectedSession = tryToLoadPDB(TSId, TSPath);
if (!ExpectedSession) {
consumeError(ExpectedSession.takeError());
StringRef LocalPath =
!File->ParentName.empty() ? File->ParentName : File->getName();
SmallString<128> Path = sys::path::parent_path(LocalPath);
sys::path::append(
Path, sys::path::filename(TS.getName(), sys::path::Style::windows));
ExpectedSession = tryToLoadPDB(TS.getGuid(), Path);
Path, sys::path::filename(TSPath, sys::path::Style::windows));
ExpectedSession = tryToLoadPDB(TSId, Path);
}
if (auto E = ExpectedSession.takeError()) {
TypeServerIndexMappings.erase(TSId);
MissingTypeServerPDBs.emplace(TSId);
return std::move(E);
}
if (auto E = ExpectedSession.takeError())
fatal(E, "Type server PDB was not found");
// Merge TPI first, because the IPI stream will reference type indices.
auto ExpectedTpi = (*ExpectedSession)->getPDBFile().getPDBTpiStream();
if (auto E = ExpectedTpi.takeError())
fatal(E, "Type server does not have TPI stream");
if (auto Err = mergeTypeRecords(TypeTable, IndexMap.TPIMap,
ExpectedTpi->typeArray()))
fatal(Err, "codeview::mergeTypeRecords failed");
// Merge IPI.
fatal("Type server does not have TPI stream: " + toString(std::move(E)));
auto ExpectedIpi = (*ExpectedSession)->getPDBFile().getPDBIpiStream();
if (auto E = ExpectedIpi.takeError())
fatal(E, "Type server does not have TPI stream");
if (auto Err = mergeIdRecords(IDTable, IndexMap.TPIMap, IndexMap.IPIMap,
ExpectedIpi->typeArray()))
fatal(Err, "codeview::mergeIdRecords failed");
fatal("Type server does not have TPI stream: " + toString(std::move(E)));
if (Config->DebugGHashes) {
// PDBs do not actually store global hashes, so when merging a type server
// PDB we have to synthesize global hashes. To do this, we first synthesize
// global hashes for the TPI stream, since it is independent, then we
// synthesize hashes for the IPI stream, using the hashes for the TPI stream
// as inputs.
auto TpiHashes = GloballyHashedType::hashTypes(ExpectedTpi->typeArray());
auto IpiHashes =
GloballyHashedType::hashIds(ExpectedIpi->typeArray(), TpiHashes);
// Merge TPI first, because the IPI stream will reference type indices.
if (auto Err = mergeTypeRecords(GlobalTypeTable, IndexMap.TPIMap,
ExpectedTpi->typeArray(), TpiHashes))
fatal("codeview::mergeTypeRecords failed: " + toString(std::move(Err)));
// Merge IPI.
if (auto Err =
mergeIdRecords(GlobalIDTable, IndexMap.TPIMap, IndexMap.IPIMap,
ExpectedIpi->typeArray(), IpiHashes))
fatal("codeview::mergeIdRecords failed: " + toString(std::move(Err)));
} else {
// Merge TPI first, because the IPI stream will reference type indices.
if (auto Err = mergeTypeRecords(TypeTable, IndexMap.TPIMap,
ExpectedTpi->typeArray()))
fatal("codeview::mergeTypeRecords failed: " + toString(std::move(Err)));
// Merge IPI.
if (auto Err = mergeIdRecords(IDTable, IndexMap.TPIMap, IndexMap.IPIMap,
ExpectedIpi->typeArray()))
fatal("codeview::mergeIdRecords failed: " + toString(std::move(Err)));
}
return IndexMap;
}
@ -290,7 +399,7 @@ static bool remapTypeIndex(TypeIndex &TI, ArrayRef<TypeIndex> TypeIndexMap) {
return true;
}
static void remapTypesInSymbolRecord(ObjectFile *File,
static void remapTypesInSymbolRecord(ObjFile *File, SymbolKind SymKind,
MutableArrayRef<uint8_t> Contents,
const CVIndexMap &IndexMap,
ArrayRef<TiReference> TypeRefs) {
@ -301,27 +410,73 @@ static void remapTypesInSymbolRecord(ObjectFile *File,
// This can be an item index or a type index. Choose the appropriate map.
ArrayRef<TypeIndex> TypeOrItemMap = IndexMap.TPIMap;
if (Ref.Kind == TiRefKind::IndexRef && IndexMap.IsTypeServerMap)
bool IsItemIndex = Ref.Kind == TiRefKind::IndexRef;
if (IsItemIndex && IndexMap.IsTypeServerMap)
TypeOrItemMap = IndexMap.IPIMap;
MutableArrayRef<TypeIndex> TIs(
reinterpret_cast<TypeIndex *>(Contents.data() + Ref.Offset), Ref.Count);
for (TypeIndex &TI : TIs) {
if (!remapTypeIndex(TI, TypeOrItemMap)) {
log("ignoring symbol record of kind 0x" + utohexstr(SymKind) + " in " +
File->getName() + " with bad " + (IsItemIndex ? "item" : "type") +
" index 0x" + utohexstr(TI.getIndex()));
TI = TypeIndex(SimpleTypeKind::NotTranslated);
log("ignoring symbol record in " + File->getName() +
" with bad type index 0x" + utohexstr(TI.getIndex()));
continue;
}
}
}
}
/// MSVC translates S_PROC_ID_END to S_END.
uint16_t canonicalizeSymbolKind(SymbolKind Kind) {
if (Kind == SymbolKind::S_PROC_ID_END)
return SymbolKind::S_END;
return Kind;
static SymbolKind symbolKind(ArrayRef<uint8_t> RecordData) {
const RecordPrefix *Prefix =
reinterpret_cast<const RecordPrefix *>(RecordData.data());
return static_cast<SymbolKind>(uint16_t(Prefix->RecordKind));
}
/// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32
static void translateIdSymbols(MutableArrayRef<uint8_t> &RecordData,
TypeCollection &IDTable) {
RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(RecordData.data());
SymbolKind Kind = symbolKind(RecordData);
if (Kind == SymbolKind::S_PROC_ID_END) {
Prefix->RecordKind = SymbolKind::S_END;
return;
}
// In an object file, GPROC32_ID has an embedded reference which refers to the
// single object file type index namespace. This has already been translated
// to the PDB file's ID stream index space, but we need to convert this to a
// symbol that refers to the type stream index space. So we remap again from
// ID index space to type index space.
if (Kind == SymbolKind::S_GPROC32_ID || Kind == SymbolKind::S_LPROC32_ID) {
SmallVector<TiReference, 1> Refs;
auto Content = RecordData.drop_front(sizeof(RecordPrefix));
CVSymbol Sym(Kind, RecordData);
discoverTypeIndicesInSymbol(Sym, Refs);
assert(Refs.size() == 1);
assert(Refs.front().Count == 1);
TypeIndex *TI =
reinterpret_cast<TypeIndex *>(Content.data() + Refs[0].Offset);
// `TI` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
// the IPI stream, whose `FunctionType` member refers to the TPI stream.
// Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and
// in both cases we just need the second type index.
if (!TI->isSimple() && !TI->isNoneType()) {
CVType FuncIdData = IDTable.getType(*TI);
SmallVector<TypeIndex, 2> Indices;
discoverTypeIndices(FuncIdData, Indices);
assert(Indices.size() == 2);
*TI = Indices[1];
}
Kind = (Kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32
: SymbolKind::S_LPROC32;
Prefix->RecordKind = uint16_t(Kind);
}
}
/// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned.
@ -339,10 +494,8 @@ static MutableArrayRef<uint8_t> copySymbolForPdb(const CVSymbol &Sym,
memset(NewData.data() + Sym.length(), 0, Size - Sym.length());
// Update the record prefix length. It should point to the beginning of the
// next record. MSVC does some canonicalization of the record kind, so we do
// that as well.
// next record.
auto *Prefix = reinterpret_cast<RecordPrefix *>(Mem);
Prefix->RecordKind = canonicalizeSymbolKind(Sym.kind());
Prefix->RecordLen = Size - 2;
return NewData;
}
@ -402,7 +555,7 @@ static void scopeStackOpen(SmallVectorImpl<SymbolScope> &Stack,
}
static void scopeStackClose(SmallVectorImpl<SymbolScope> &Stack,
uint32_t CurOffset, ObjectFile *File) {
uint32_t CurOffset, ObjFile *File) {
if (Stack.empty()) {
warn("symbol scopes are not balanced in " + File->getName());
return;
@ -411,8 +564,86 @@ static void scopeStackClose(SmallVectorImpl<SymbolScope> &Stack,
S.OpeningRecord->PtrEnd = CurOffset;
}
static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjectFile *File,
static bool symbolGoesInModuleStream(const CVSymbol &Sym) {
switch (Sym.kind()) {
case SymbolKind::S_GDATA32:
case SymbolKind::S_CONSTANT:
case SymbolKind::S_UDT:
// We really should not be seeing S_PROCREF and S_LPROCREF in the first place
// since they are synthesized by the linker in response to S_GPROC32 and
// S_LPROC32, but if we do see them, don't put them in the module stream I
// guess.
case SymbolKind::S_PROCREF:
case SymbolKind::S_LPROCREF:
return false;
// S_GDATA32 does not go in the module stream, but S_LDATA32 does.
case SymbolKind::S_LDATA32:
default:
return true;
}
}
static bool symbolGoesInGlobalsStream(const CVSymbol &Sym) {
switch (Sym.kind()) {
case SymbolKind::S_CONSTANT:
case SymbolKind::S_GDATA32:
// S_LDATA32 goes in both the module stream and the globals stream.
case SymbolKind::S_LDATA32:
case SymbolKind::S_GPROC32:
case SymbolKind::S_LPROC32:
// We really should not be seeing S_PROCREF and S_LPROCREF in the first place
// since they are synthesized by the linker in response to S_GPROC32 and
// S_LPROC32, but if we do see them, copy them straight through.
case SymbolKind::S_PROCREF:
case SymbolKind::S_LPROCREF:
return true;
// FIXME: For now, we drop all S_UDT symbols (i.e. they don't go in the
// globals stream or the modules stream). These have special handling which
// needs more investigation before we can get right, but by putting them all
// into the globals stream WinDbg fails to display local variables of class
// types saying that it cannot find the type Foo *. So as a stopgap just to
// keep things working, we drop them.
case SymbolKind::S_UDT:
default:
return false;
}
}
static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, ObjFile &File,
const CVSymbol &Sym) {
switch (Sym.kind()) {
case SymbolKind::S_CONSTANT:
case SymbolKind::S_UDT:
case SymbolKind::S_GDATA32:
case SymbolKind::S_LDATA32:
case SymbolKind::S_PROCREF:
case SymbolKind::S_LPROCREF:
Builder.addGlobalSymbol(Sym);
break;
case SymbolKind::S_GPROC32:
case SymbolKind::S_LPROC32: {
SymbolRecordKind K = SymbolRecordKind::ProcRefSym;
if (Sym.kind() == SymbolKind::S_LPROC32)
K = SymbolRecordKind::LocalProcRef;
ProcRefSym PS(K);
PS.Module = static_cast<uint16_t>(File.ModuleDBI->getModuleIndex());
// For some reason, MSVC seems to add one to this value.
++PS.Module;
PS.Name = getSymbolName(Sym);
PS.SumName = 0;
PS.SymOffset = File.ModuleDBI->getNextSymbolOffset();
Builder.addGlobalSymbol(PS);
break;
}
default:
llvm_unreachable("Invalid symbol kind!");
}
}
static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File,
pdb::GSIStreamBuilder &GsiBuilder,
const CVIndexMap &IndexMap,
TypeCollection &IDTable,
BinaryStreamRef SymData) {
// FIXME: Improve error recovery by warning and skipping records when
// possible.
@ -420,11 +651,11 @@ static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjectFile *File,
BinaryStreamReader Reader(SymData);
ExitOnErr(Reader.readArray(Syms, Reader.getLength()));
SmallVector<SymbolScope, 4> Scopes;
for (const CVSymbol &Sym : Syms) {
for (CVSymbol Sym : Syms) {
// Discover type index references in the record. Skip it if we don't know
// where they are.
SmallVector<TiReference, 32> TypeRefs;
if (!discoverTypeIndices(Sym, TypeRefs)) {
if (!discoverTypeIndicesInSymbol(Sym, TypeRefs)) {
log("ignoring unknown symbol record with kind 0x" + utohexstr(Sym.kind()));
continue;
}
@ -435,17 +666,30 @@ static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjectFile *File,
// Re-map all the type index references.
MutableArrayRef<uint8_t> Contents =
NewData.drop_front(sizeof(RecordPrefix));
remapTypesInSymbolRecord(File, Contents, IndexMap, TypeRefs);
remapTypesInSymbolRecord(File, Sym.kind(), Contents, IndexMap, TypeRefs);
// An object file may have S_xxx_ID symbols, but these get converted to
// "real" symbols in a PDB.
translateIdSymbols(NewData, IDTable);
SymbolKind NewKind = symbolKind(NewData);
// Fill in "Parent" and "End" fields by maintaining a stack of scopes.
CVSymbol NewSym(Sym.kind(), NewData);
if (symbolOpensScope(Sym.kind()))
CVSymbol NewSym(NewKind, NewData);
if (symbolOpensScope(NewKind))
scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), NewSym);
else if (symbolEndsScope(Sym.kind()))
else if (symbolEndsScope(NewKind))
scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File);
// Add the symbol to the globals stream if necessary. Do this before adding
// the symbol to the module since we may need to get the next symbol offset,
// and writing to the module's symbol stream will update that offset.
if (symbolGoesInGlobalsStream(NewSym))
addGlobalSymbol(GsiBuilder, *File, NewSym);
// Add the symbol to the module.
File->ModuleDBI->addSymbol(NewSym);
if (symbolGoesInModuleStream(NewSym))
File->ModuleDBI->addSymbol(NewSym);
}
}
@ -460,7 +704,7 @@ static ArrayRef<uint8_t> relocateDebugChunk(BumpPtrAllocator &Alloc,
".debug$S");
}
void PDBLinker::addObjectFile(ObjectFile *File) {
void PDBLinker::addObjFile(ObjFile *File) {
// Add a module descriptor for every object file. We need to put an absolute
// path to the object into the PDB. If this is a plain object, we make its
// path absolute. If it's an object in an archive, we make the archive path
@ -479,7 +723,16 @@ void PDBLinker::addObjectFile(ObjectFile *File) {
// the PDB first, so that we can get the map from object file type and item
// indices to PDB type and item indices.
CVIndexMap ObjectIndexMap;
const CVIndexMap &IndexMap = mergeDebugT(File, ObjectIndexMap);
auto IndexMapResult = mergeDebugT(File, ObjectIndexMap);
// If the .debug$T sections fail to merge, assume there is no debug info.
if (!IndexMapResult) {
warn("Type server PDB for " + Name + " is invalid, ignoring debug info. " +
toString(IndexMapResult.takeError()));
return;
}
const CVIndexMap &IndexMap = *IndexMapResult;
// Now do all live .debug$S sections.
for (SectionChunk *DebugChunk : File->getDebugChunks()) {
@ -511,7 +764,13 @@ void PDBLinker::addObjectFile(ObjectFile *File) {
File->ModuleDBI->addDebugSubsection(SS);
break;
case DebugSubsectionKind::Symbols:
mergeSymbolRecords(Alloc, File, IndexMap, SS.getRecordData());
if (Config->DebugGHashes) {
mergeSymbolRecords(Alloc, File, Builder.getGsiBuilder(), IndexMap,
GlobalIDTable, SS.getRecordData());
} else {
mergeSymbolRecords(Alloc, File, Builder.getGsiBuilder(), IndexMap,
IDTable, SS.getRecordData());
}
break;
default:
// FIXME: Process the rest of the subsections.
@ -539,45 +798,88 @@ void PDBLinker::addObjectFile(ObjectFile *File) {
}
}
static PublicSym32 createPublic(Defined *Def) {
PublicSym32 Pub(SymbolKind::S_PUB32);
Pub.Name = Def->getName();
if (auto *D = dyn_cast<DefinedCOFF>(Def)) {
if (D->getCOFFSymbol().isFunctionDefinition())
Pub.Flags = PublicSymFlags::Function;
} else if (isa<DefinedImportThunk>(Def)) {
Pub.Flags = PublicSymFlags::Function;
}
OutputSection *OS = Def->getChunk()->getOutputSection();
assert(OS && "all publics should be in final image");
Pub.Offset = Def->getRVA() - OS->getRVA();
Pub.Segment = OS->SectionIndex;
return Pub;
}
// Add all object files to the PDB. Merge .debug$T sections into IpiData and
// TpiData.
void PDBLinker::addObjectsToPDB() {
for (ObjectFile *File : Symtab->ObjectFiles)
addObjectFile(File);
for (ObjFile *File : ObjFile::Instances)
addObjFile(File);
Builder.getStringTableBuilder().setStrings(PDBStrTab);
// Construct TPI stream contents.
addTypeInfo(Builder.getTpiBuilder(), TypeTable);
// Construct TPI and IPI stream contents.
if (Config->DebugGHashes) {
addTypeInfo(Builder.getTpiBuilder(), GlobalTypeTable);
addTypeInfo(Builder.getIpiBuilder(), GlobalIDTable);
} else {
addTypeInfo(Builder.getTpiBuilder(), TypeTable);
addTypeInfo(Builder.getIpiBuilder(), IDTable);
}
// Construct IPI stream contents.
addTypeInfo(Builder.getIpiBuilder(), IDTable);
// Compute the public and global symbols.
auto &GsiBuilder = Builder.getGsiBuilder();
std::vector<PublicSym32> Publics;
Symtab->forEachSymbol([&Publics](Symbol *S) {
// Only emit defined, live symbols that have a chunk.
auto *Def = dyn_cast<Defined>(S);
if (Def && Def->isLive() && Def->getChunk())
Publics.push_back(createPublic(Def));
});
// Add public and symbol records stream.
// For now we don't actually write any thing useful to the publics stream, but
// the act of "getting" it also creates it lazily so that we write an empty
// stream.
(void)Builder.getPublicsBuilder();
if (!Publics.empty()) {
// Sort the public symbols and add them to the stream.
std::sort(Publics.begin(), Publics.end(),
[](const PublicSym32 &L, const PublicSym32 &R) {
return L.Name < R.Name;
});
for (const PublicSym32 &Pub : Publics)
GsiBuilder.addPublicSymbol(Pub);
}
}
static void addLinkerModuleSymbols(StringRef Path,
pdb::DbiModuleDescriptorBuilder &Mod,
BumpPtrAllocator &Allocator) {
codeview::SymbolSerializer Serializer(Allocator, CodeViewContainer::Pdb);
codeview::ObjNameSym ONS(SymbolRecordKind::ObjNameSym);
codeview::Compile3Sym CS(SymbolRecordKind::Compile3Sym);
codeview::EnvBlockSym EBS(SymbolRecordKind::EnvBlockSym);
static void addCommonLinkerModuleSymbols(StringRef Path,
pdb::DbiModuleDescriptorBuilder &Mod,
BumpPtrAllocator &Allocator) {
ObjNameSym ONS(SymbolRecordKind::ObjNameSym);
Compile3Sym CS(SymbolRecordKind::Compile3Sym);
EnvBlockSym EBS(SymbolRecordKind::EnvBlockSym);
ONS.Name = "* Linker *";
ONS.Signature = 0;
CS.Machine = Config->is64() ? CPUType::X64 : CPUType::Intel80386;
// Interestingly, if we set the string to 0.0.0.0, then when trying to view
// local variables WinDbg emits an error that private symbols are not present.
// By setting this to a valid MSVC linker version string, local variables are
// displayed properly. As such, even though it is not representative of
// LLVM's version information, we need this for compatibility.
CS.Flags = CompileSym3Flags::None;
CS.VersionBackendBuild = 0;
CS.VersionBackendMajor = 0;
CS.VersionBackendMinor = 0;
CS.VersionBackendBuild = 25019;
CS.VersionBackendMajor = 14;
CS.VersionBackendMinor = 10;
CS.VersionBackendQFE = 0;
// MSVC also sets the frontend to 0.0.0.0 since this is specifically for the
// linker module (which is by definition a backend), so we don't need to do
// anything here. Also, it seems we can use "LLVM Linker" for the linker name
// without any problems. Only the backend version has to be hardcoded to a
// magic number.
CS.VersionFrontendBuild = 0;
CS.VersionFrontendMajor = 0;
CS.VersionFrontendMinor = 0;
@ -592,7 +894,9 @@ static void addLinkerModuleSymbols(StringRef Path,
sys::fs::current_path(cwd);
EBS.Fields.push_back(cwd);
EBS.Fields.push_back("exe");
EBS.Fields.push_back(Config->Argv[0]);
SmallString<64> exe = Config->Argv[0];
llvm::sys::fs::make_absolute(exe);
EBS.Fields.push_back(exe);
EBS.Fields.push_back("pdb");
EBS.Fields.push_back(Path);
EBS.Fields.push_back("cmd");
@ -605,17 +909,33 @@ static void addLinkerModuleSymbols(StringRef Path,
EBS, Allocator, CodeViewContainer::Pdb));
}
static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &Mod,
OutputSection &OS,
BumpPtrAllocator &Allocator) {
SectionSym Sym(SymbolRecordKind::SectionSym);
Sym.Alignment = 12; // 2^12 = 4KB
Sym.Characteristics = OS.getCharacteristics();
Sym.Length = OS.getVirtualSize();
Sym.Name = OS.getName();
Sym.Rva = OS.getRVA();
Sym.SectionNumber = OS.SectionIndex;
Mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
Sym, Allocator, CodeViewContainer::Pdb));
}
// Creates a PDB file.
void coff::createPDB(SymbolTable *Symtab, ArrayRef<uint8_t> SectionTable,
const llvm::codeview::DebugInfo *DI) {
void coff::createPDB(SymbolTable *Symtab,
ArrayRef<OutputSection *> OutputSections,
ArrayRef<uint8_t> SectionTable,
const llvm::codeview::DebugInfo &BuildId) {
PDBLinker PDB(Symtab);
PDB.initialize(DI);
PDB.initialize(BuildId);
PDB.addObjectsToPDB();
PDB.addSections(SectionTable);
PDB.addSections(OutputSections, SectionTable);
PDB.commit();
}
void PDBLinker::initialize(const llvm::codeview::DebugInfo *DI) {
void PDBLinker::initialize(const llvm::codeview::DebugInfo &BuildId) {
ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize
// Create streams in MSF for predefined streams, namely
@ -625,25 +945,64 @@ void PDBLinker::initialize(const llvm::codeview::DebugInfo *DI) {
// Add an Info stream.
auto &InfoBuilder = Builder.getInfoBuilder();
InfoBuilder.setAge(DI ? DI->PDB70.Age : 0);
InfoBuilder.setAge(BuildId.PDB70.Age);
GUID uuid{};
if (DI)
memcpy(&uuid, &DI->PDB70.Signature, sizeof(uuid));
GUID uuid;
memcpy(&uuid, &BuildId.PDB70.Signature, sizeof(uuid));
InfoBuilder.setGuid(uuid);
InfoBuilder.setSignature(time(nullptr));
InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70);
// Add an empty DBI stream.
pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder();
DbiBuilder.setAge(BuildId.PDB70.Age);
DbiBuilder.setVersionHeader(pdb::PdbDbiV70);
ExitOnErr(DbiBuilder.addDbgStream(pdb::DbgHeaderType::NewFPO, {}));
}
void PDBLinker::addSections(ArrayRef<uint8_t> SectionTable) {
// Add Section Contributions.
void PDBLinker::addSectionContrib(pdb::DbiModuleDescriptorBuilder &LinkerModule,
OutputSection *OS, Chunk *C) {
pdb::SectionContrib SC;
memset(&SC, 0, sizeof(SC));
SC.ISect = OS->SectionIndex;
SC.Off = C->getRVA() - OS->getRVA();
SC.Size = C->getSize();
if (auto *SecChunk = dyn_cast<SectionChunk>(C)) {
SC.Characteristics = SecChunk->Header->Characteristics;
SC.Imod = SecChunk->File->ModuleDBI->getModuleIndex();
ArrayRef<uint8_t> Contents = SecChunk->getContents();
JamCRC CRC(0);
ArrayRef<char> CharContents = makeArrayRef(
reinterpret_cast<const char *>(Contents.data()), Contents.size());
CRC.update(CharContents);
SC.DataCrc = CRC.getCRC();
} else {
SC.Characteristics = OS->getCharacteristics();
// FIXME: When we start creating DBI for import libraries, use those here.
SC.Imod = LinkerModule.getModuleIndex();
}
SC.RelocCrc = 0; // FIXME
Builder.getDbiBuilder().addSectionContrib(SC);
}
void PDBLinker::addSections(ArrayRef<OutputSection *> OutputSections,
ArrayRef<uint8_t> SectionTable) {
// It's not entirely clear what this is, but the * Linker * module uses it.
pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder();
addSectionContribs(Symtab, DbiBuilder);
NativePath = Config->PDBPath;
sys::fs::make_absolute(NativePath);
sys::path::native(NativePath, sys::path::Style::windows);
uint32_t PdbFilePathNI = DbiBuilder.addECName(NativePath);
auto &LinkerModule = ExitOnErr(DbiBuilder.addModuleInfo("* Linker *"));
LinkerModule.setPdbFilePathNI(PdbFilePathNI);
addCommonLinkerModuleSymbols(NativePath, LinkerModule, Alloc);
// Add section contributions. They must be ordered by ascending RVA.
for (OutputSection *OS : OutputSections) {
addLinkerModuleSectionSymbol(LinkerModule, *OS, Alloc);
for (Chunk *C : OS->getChunks())
addSectionContrib(LinkerModule, OS, C);
}
// Add Section Map stream.
ArrayRef<object::coff_section> Sections = {
@ -652,15 +1011,6 @@ void PDBLinker::addSections(ArrayRef<uint8_t> SectionTable) {
SectionMap = pdb::DbiStreamBuilder::createSectionMap(Sections);
DbiBuilder.setSectionMap(SectionMap);
// It's not entirely clear what this is, but the * Linker * module uses it.
NativePath = Config->PDBPath;
sys::fs::make_absolute(NativePath);
sys::path::native(NativePath, sys::path::Style::windows);
uint32_t PdbFilePathNI = DbiBuilder.addECName(NativePath);
auto &LinkerModule = ExitOnErr(DbiBuilder.addModuleInfo("* Linker *"));
LinkerModule.setPdbFilePathNI(PdbFilePathNI);
addLinkerModuleSymbols(NativePath, LinkerModule, Alloc);
// Add COFF section header stream.
ExitOnErr(
DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable));

7
deps/lld/COFF/PDB.h vendored
View File

@ -21,10 +21,13 @@ union DebugInfo;
namespace lld {
namespace coff {
class OutputSection;
class SymbolTable;
void createPDB(SymbolTable *Symtab, llvm::ArrayRef<uint8_t> SectionTable,
const llvm::codeview::DebugInfo *DI);
void createPDB(SymbolTable *Symtab,
llvm::ArrayRef<OutputSection *> OutputSections,
llvm::ArrayRef<uint8_t> SectionTable,
const llvm::codeview::DebugInfo &BuildId);
}
}

View File

@ -20,7 +20,7 @@ using namespace lld;
using namespace lld::coff;
using namespace llvm;
Optional<std::string> coff::demangle(StringRef S) {
Optional<std::string> coff::demangleMSVC(StringRef S) {
#if defined(_MSC_VER)
// UnDecorateSymbolName is not thread-safe, so we need a mutex.
static std::mutex Mu;

View File

@ -16,7 +16,7 @@
namespace lld {
namespace coff {
llvm::Optional<std::string> demangle(llvm::StringRef S);
llvm::Optional<std::string> demangleMSVC(llvm::StringRef S);
}
}

View File

@ -10,10 +10,10 @@
#include "SymbolTable.h"
#include "Config.h"
#include "Driver.h"
#include "Error.h"
#include "LTO.h"
#include "Memory.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@ -24,36 +24,6 @@ using namespace llvm;
namespace lld {
namespace coff {
enum SymbolPreference {
SP_EXISTING = -1,
SP_CONFLICT = 0,
SP_NEW = 1,
};
/// Checks if an existing symbol S should be kept or replaced by a new symbol.
/// Returns SP_EXISTING when S should be kept, SP_NEW when the new symbol
/// should be kept, and SP_CONFLICT if no valid resolution exists.
static SymbolPreference compareDefined(Symbol *S, bool WasInserted,
bool NewIsCOMDAT) {
// If the symbol wasn't previously known, the new symbol wins by default.
if (WasInserted || !isa<Defined>(S->body()))
return SP_NEW;
// If the existing symbol is a DefinedRegular, both it and the new symbol
// must be comdats. In that case, we have no reason to prefer one symbol
// over the other, and we keep the existing one. If one of the symbols
// is not a comdat, we report a conflict.
if (auto *R = dyn_cast<DefinedRegular>(S->body())) {
if (NewIsCOMDAT && R->isCOMDAT())
return SP_EXISTING;
else
return SP_CONFLICT;
}
// Existing symbol is not a DefinedRegular; new symbol wins.
return SP_NEW;
}
SymbolTable *Symtab;
void SymbolTable::addFile(InputFile *File) {
@ -68,12 +38,12 @@ void SymbolTable::addFile(InputFile *File) {
" conflicts with " + machineToStr(Config->Machine));
}
if (auto *F = dyn_cast<ObjectFile>(File)) {
ObjectFiles.push_back(F);
if (auto *F = dyn_cast<ObjFile>(File)) {
ObjFile::Instances.push_back(F);
} else if (auto *F = dyn_cast<BitcodeFile>(File)) {
BitcodeFiles.push_back(F);
BitcodeFile::Instances.push_back(F);
} else if (auto *F = dyn_cast<ImportFile>(File)) {
ImportFiles.push_back(F);
ImportFile::Instances.push_back(F);
}
StringRef S = File->getDirectives();
@ -84,70 +54,95 @@ void SymbolTable::addFile(InputFile *File) {
Driver->parseDirectives(S);
}
static void errorOrWarn(const Twine &S) {
if (Config->Force)
warn(S);
else
error(S);
}
void SymbolTable::reportRemainingUndefines() {
SmallPtrSet<SymbolBody *, 8> Undefs;
for (auto &I : Symtab) {
SmallPtrSet<Symbol *, 8> Undefs;
DenseMap<Symbol *, Symbol *> LocalImports;
for (auto &I : SymMap) {
Symbol *Sym = I.second;
auto *Undef = dyn_cast<Undefined>(Sym->body());
auto *Undef = dyn_cast<Undefined>(Sym);
if (!Undef)
continue;
if (!Sym->IsUsedInRegularObj)
continue;
StringRef Name = Undef->getName();
// A weak alias may have been resolved, so check for that.
if (Defined *D = Undef->getWeakAlias()) {
// We resolve weak aliases by replacing the alias's SymbolBody with the
// target's SymbolBody. This causes all SymbolBody pointers referring to
// the old symbol to instead refer to the new symbol. However, we can't
// just blindly copy sizeof(Symbol::Body) bytes from D to Sym->Body
// because D may be an internal symbol, and internal symbols are stored as
// "unparented" SymbolBodies. For that reason we need to check which type
// of symbol we are dealing with and copy the correct number of bytes.
// We want to replace Sym with D. However, we can't just blindly
// copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
// internal symbol, and internal symbols are stored as "unparented"
// Symbols. For that reason we need to check which type of symbol we
// are dealing with and copy the correct number of bytes.
if (isa<DefinedRegular>(D))
memcpy(Sym->Body.buffer, D, sizeof(DefinedRegular));
memcpy(Sym, D, sizeof(DefinedRegular));
else if (isa<DefinedAbsolute>(D))
memcpy(Sym->Body.buffer, D, sizeof(DefinedAbsolute));
memcpy(Sym, D, sizeof(DefinedAbsolute));
else
// No other internal symbols are possible.
Sym->Body = D->symbol()->Body;
memcpy(Sym, D, sizeof(SymbolUnion));
continue;
}
// If we can resolve a symbol by removing __imp_ prefix, do that.
// This odd rule is for compatibility with MSVC linker.
if (Name.startswith("__imp_")) {
Symbol *Imp = find(Name.substr(strlen("__imp_")));
if (Imp && isa<Defined>(Imp->body())) {
auto *D = cast<Defined>(Imp->body());
replaceBody<DefinedLocalImport>(Sym, Name, D);
LocalImportChunks.push_back(
cast<DefinedLocalImport>(Sym->body())->getChunk());
if (Imp && isa<Defined>(Imp)) {
auto *D = cast<Defined>(Imp);
replaceSymbol<DefinedLocalImport>(Sym, Name, D);
LocalImportChunks.push_back(cast<DefinedLocalImport>(Sym)->getChunk());
LocalImports[Sym] = D;
continue;
}
}
// Remaining undefined symbols are not fatal if /force is specified.
// They are replaced with dummy defined symbols.
if (Config->Force)
replaceBody<DefinedAbsolute>(Sym, Name, 0);
Undefs.insert(Sym->body());
replaceSymbol<DefinedAbsolute>(Sym, Name, 0);
Undefs.insert(Sym);
}
if (Undefs.empty())
if (Undefs.empty() && LocalImports.empty())
return;
for (SymbolBody *B : Config->GCRoot)
for (Symbol *B : Config->GCRoot) {
if (Undefs.count(B))
warn("<root>: undefined symbol: " + B->getName());
for (ObjectFile *File : ObjectFiles)
for (SymbolBody *Sym : File->getSymbols())
errorOrWarn("<root>: undefined symbol: " + B->getName());
if (Config->WarnLocallyDefinedImported)
if (Symbol *Imp = LocalImports.lookup(B))
warn("<root>: locally defined symbol imported: " + Imp->getName() +
" (defined in " + toString(Imp->getFile()) + ")");
}
for (ObjFile *File : ObjFile::Instances) {
for (Symbol *Sym : File->getSymbols()) {
if (!Sym)
continue;
if (Undefs.count(Sym))
warn(toString(File) + ": undefined symbol: " + Sym->getName());
if (!Config->Force)
fatal("link failed");
errorOrWarn(toString(File) + ": undefined symbol: " + Sym->getName());
if (Config->WarnLocallyDefinedImported)
if (Symbol *Imp = LocalImports.lookup(Sym))
warn(toString(File) + ": locally defined symbol imported: " +
Imp->getName() + " (defined in " + toString(Imp->getFile()) +
")");
}
}
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) {
Symbol *&Sym = Symtab[CachedHashStringRef(Name)];
Symbol *&Sym = SymMap[CachedHashStringRef(Name)];
if (Sym)
return {Sym, false};
Sym = make<Symbol>();
Sym = (Symbol *)make<SymbolUnion>();
Sym->IsUsedInRegularObj = false;
Sym->PendingArchiveLoad = false;
return {Sym, true};
@ -160,11 +155,11 @@ Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F,
std::tie(S, WasInserted) = insert(Name);
if (!F || !isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
if (WasInserted || (isa<Lazy>(S->body()) && IsWeakAlias)) {
replaceBody<Undefined>(S, Name);
if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) {
replaceSymbol<Undefined>(S, Name);
return S;
}
if (auto *L = dyn_cast<Lazy>(S->body())) {
if (auto *L = dyn_cast<Lazy>(S)) {
if (!S->PendingArchiveLoad) {
S->PendingArchiveLoad = true;
L->File->addMember(&L->Sym);
@ -179,10 +174,10 @@ void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) {
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
if (WasInserted) {
replaceBody<Lazy>(S, F, Sym);
replaceSymbol<Lazy>(S, F, Sym);
return;
}
auto *U = dyn_cast<Undefined>(S->body());
auto *U = dyn_cast<Undefined>(S);
if (!U || U->WeakAlias || S->PendingArchiveLoad)
return;
S->PendingArchiveLoad = true;
@ -190,9 +185,8 @@ void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) {
}
void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) {
error("duplicate symbol: " + toString(*Existing->body()) + " in " +
toString(Existing->body()->getFile()) + " and in " +
(NewFile ? toString(NewFile) : "(internal)"));
error("duplicate symbol: " + toString(*Existing) + " in " +
toString(Existing->getFile()) + " and in " + toString(NewFile));
}
Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
@ -200,9 +194,9 @@ Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body()))
replaceBody<DefinedAbsolute>(S, N, Sym);
else if (!isa<DefinedCOFF>(S->body()))
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
replaceSymbol<DefinedAbsolute>(S, N, Sym);
else if (!isa<DefinedCOFF>(S))
reportDuplicate(S, nullptr);
return S;
}
@ -212,9 +206,9 @@ Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body()))
replaceBody<DefinedAbsolute>(S, N, VA);
else if (!isa<DefinedCOFF>(S->body()))
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
replaceSymbol<DefinedAbsolute>(S, N, VA);
else if (!isa<DefinedCOFF>(S))
reportDuplicate(S, nullptr);
return S;
}
@ -224,14 +218,14 @@ Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) {
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body()))
replaceBody<DefinedSynthetic>(S, N, C);
else if (!isa<DefinedCOFF>(S->body()))
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
replaceSymbol<DefinedSynthetic>(S, N, C);
else if (!isa<DefinedCOFF>(S))
reportDuplicate(S, nullptr);
return S;
}
Symbol *SymbolTable::addRegular(InputFile *F, StringRef N, bool IsCOMDAT,
Symbol *SymbolTable::addRegular(InputFile *F, StringRef N,
const coff_symbol_generic *Sym,
SectionChunk *C) {
Symbol *S;
@ -239,21 +233,32 @@ Symbol *SymbolTable::addRegular(InputFile *F, StringRef N, bool IsCOMDAT,
std::tie(S, WasInserted) = insert(N);
if (!isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
SymbolPreference SP = compareDefined(S, WasInserted, IsCOMDAT);
if (SP == SP_CONFLICT) {
if (WasInserted || !isa<DefinedRegular>(S))
replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false,
/*IsExternal*/ true, Sym, C);
else
reportDuplicate(S, F);
} else if (SP == SP_NEW) {
replaceBody<DefinedRegular>(S, F, N, IsCOMDAT, /*IsExternal*/ true, Sym, C);
} else if (SP == SP_EXISTING && IsCOMDAT && C) {
C->markDiscarded();
// Discard associative chunks that we've parsed so far. No need to recurse
// because an associative section cannot have children.
for (SectionChunk *Child : C->children())
Child->markDiscarded();
}
return S;
}
std::pair<Symbol *, bool>
SymbolTable::addComdat(InputFile *F, StringRef N,
const coff_symbol_generic *Sym) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
if (!isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
if (WasInserted || !isa<DefinedRegular>(S)) {
replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true,
/*IsExternal*/ true, Sym, nullptr);
return {S, true};
}
if (!cast<DefinedRegular>(S)->isCOMDAT())
reportDuplicate(S, F);
return {S, false};
}
Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
const coff_symbol_generic *Sym, CommonChunk *C) {
Symbol *S;
@ -261,51 +266,56 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
std::tie(S, WasInserted) = insert(N);
if (!isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
if (WasInserted || !isa<DefinedCOFF>(S->body()))
replaceBody<DefinedCommon>(S, F, N, Size, Sym, C);
else if (auto *DC = dyn_cast<DefinedCommon>(S->body()))
if (WasInserted || !isa<DefinedCOFF>(S))
replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
else if (auto *DC = dyn_cast<DefinedCommon>(S))
if (Size > DC->getSize())
replaceBody<DefinedCommon>(S, F, N, Size, Sym, C);
replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
return S;
}
Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) {
DefinedImportData *SymbolTable::addImportData(StringRef N, ImportFile *F) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body()))
replaceBody<DefinedImportData>(S, N, F);
else if (!isa<DefinedCOFF>(S->body()))
reportDuplicate(S, nullptr);
return S;
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
replaceSymbol<DefinedImportData>(S, N, F);
return cast<DefinedImportData>(S);
}
reportDuplicate(S, F);
return nullptr;
}
Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID,
uint16_t Machine) {
DefinedImportThunk *SymbolTable::addImportThunk(StringRef Name,
DefinedImportData *ID,
uint16_t Machine) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S->body()) || isa<Lazy>(S->body()))
replaceBody<DefinedImportThunk>(S, Name, ID, Machine);
else if (!isa<DefinedCOFF>(S->body()))
reportDuplicate(S, nullptr);
return S;
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine);
return cast<DefinedImportThunk>(S);
}
reportDuplicate(S, ID->File);
return nullptr;
}
std::vector<Chunk *> SymbolTable::getChunks() {
std::vector<Chunk *> Res;
for (ObjectFile *File : ObjectFiles) {
std::vector<Chunk *> &V = File->getChunks();
for (ObjFile *File : ObjFile::Instances) {
ArrayRef<Chunk *> V = File->getChunks();
Res.insert(Res.end(), V.begin(), V.end());
}
return Res;
}
Symbol *SymbolTable::find(StringRef Name) {
auto It = Symtab.find(CachedHashStringRef(Name));
if (It == Symtab.end())
auto It = SymMap.find(CachedHashStringRef(Name));
if (It == SymMap.end())
return nullptr;
return It->second;
}
@ -317,7 +327,7 @@ Symbol *SymbolTable::findUnderscore(StringRef Name) {
}
StringRef SymbolTable::findByPrefix(StringRef Prefix) {
for (auto Pair : Symtab) {
for (auto Pair : SymMap) {
StringRef Name = Pair.first.val();
if (Name.startswith(Prefix))
return Name;
@ -327,47 +337,57 @@ StringRef SymbolTable::findByPrefix(StringRef Prefix) {
StringRef SymbolTable::findMangle(StringRef Name) {
if (Symbol *Sym = find(Name))
if (!isa<Undefined>(Sym->body()))
if (!isa<Undefined>(Sym))
return Name;
if (Config->Machine != I386)
return findByPrefix(("?" + Name + "@@Y").str());
if (!Name.startswith("_"))
return "";
// Search for x86 C function.
// Search for x86 stdcall function.
StringRef S = findByPrefix((Name + "@").str());
if (!S.empty())
return S;
// Search for x86 fastcall function.
S = findByPrefix(("@" + Name.substr(1) + "@").str());
if (!S.empty())
return S;
// Search for x86 vectorcall function.
S = findByPrefix((Name.substr(1) + "@@").str());
if (!S.empty())
return S;
// Search for x86 C++ non-member function.
return findByPrefix(("?" + Name.substr(1) + "@@Y").str());
}
void SymbolTable::mangleMaybe(SymbolBody *B) {
void SymbolTable::mangleMaybe(Symbol *B) {
auto *U = dyn_cast<Undefined>(B);
if (!U || U->WeakAlias)
return;
StringRef Alias = findMangle(U->getName());
if (!Alias.empty())
if (!Alias.empty()) {
log(U->getName() + " aliased to " + Alias);
U->WeakAlias = addUndefined(Alias);
}
}
SymbolBody *SymbolTable::addUndefined(StringRef Name) {
return addUndefined(Name, nullptr, false)->body();
Symbol *SymbolTable::addUndefined(StringRef Name) {
return addUndefined(Name, nullptr, false);
}
std::vector<StringRef> SymbolTable::compileBitcodeFiles() {
LTO.reset(new BitcodeCompiler);
for (BitcodeFile *F : BitcodeFiles)
for (BitcodeFile *F : BitcodeFile::Instances)
LTO->add(*F);
return LTO->compile();
}
void SymbolTable::addCombinedLTOObjects() {
if (BitcodeFiles.empty())
if (BitcodeFile::Instances.empty())
return;
for (StringRef Object : compileBitcodeFiles()) {
auto *Obj = make<ObjectFile>(MemoryBufferRef(Object, "lto.tmp"));
auto *Obj = make<ObjFile>(MemoryBufferRef(Object, "lto.tmp"));
Obj->parse();
ObjectFiles.push_back(Obj);
ObjFile::Instances.push_back(Obj);
}
}

View File

@ -31,8 +31,7 @@ class DefinedAbsolute;
class DefinedRelative;
class Lazy;
class SectionChunk;
class SymbolBody;
struct Symbol;
class Symbol;
// SymbolTable is a bucket of all known symbols, including defined,
// undefined, or lazy symbols (the last one is symbols in archive
@ -66,7 +65,7 @@ public:
// mangled symbol. This function tries to find a mangled name
// for U from the symbol table, and if found, set the symbol as
// a weak alias for U.
void mangleMaybe(SymbolBody *B);
void mangleMaybe(Symbol *B);
StringRef findMangle(StringRef Name);
// Build a set of COFF objects representing the combined contents of
@ -75,15 +74,8 @@ public:
void addCombinedLTOObjects();
std::vector<StringRef> compileBitcodeFiles();
// The writer needs to handle DLL import libraries specially in
// order to create the import descriptor table.
std::vector<ImportFile *> ImportFiles;
// The writer needs to infer the machine type from the object files.
std::vector<ObjectFile *> ObjectFiles;
// Creates an Undefined symbol for a given name.
SymbolBody *addUndefined(StringRef Name);
Symbol *addUndefined(StringRef Name);
Symbol *addSynthetic(StringRef N, Chunk *C);
Symbol *addAbsolute(StringRef N, uint64_t VA);
@ -91,28 +83,35 @@ public:
Symbol *addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias);
void addLazy(ArchiveFile *F, const Archive::Symbol Sym);
Symbol *addAbsolute(StringRef N, COFFSymbolRef S);
Symbol *addRegular(InputFile *F, StringRef N, bool IsCOMDAT,
Symbol *addRegular(InputFile *F, StringRef N,
const llvm::object::coff_symbol_generic *S = nullptr,
SectionChunk *C = nullptr);
std::pair<Symbol *, bool>
addComdat(InputFile *F, StringRef N,
const llvm::object::coff_symbol_generic *S = nullptr);
Symbol *addCommon(InputFile *F, StringRef N, uint64_t Size,
const llvm::object::coff_symbol_generic *S = nullptr,
CommonChunk *C = nullptr);
Symbol *addImportData(StringRef N, ImportFile *F);
Symbol *addImportThunk(StringRef Name, DefinedImportData *S,
uint16_t Machine);
DefinedImportData *addImportData(StringRef N, ImportFile *F);
DefinedImportThunk *addImportThunk(StringRef Name, DefinedImportData *S,
uint16_t Machine);
void reportDuplicate(Symbol *Existing, InputFile *NewFile);
// A list of chunks which to be added to .rdata.
std::vector<Chunk *> LocalImportChunks;
// Iterates symbols in non-determinstic hash table order.
template <typename T> void forEachSymbol(T Callback) {
for (auto &Pair : SymMap)
Callback(Pair.second);
}
private:
std::pair<Symbol *, bool> insert(StringRef Name);
StringRef findByPrefix(StringRef Prefix);
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> Symtab;
std::vector<BitcodeFile *> BitcodeFiles;
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap;
std::unique_ptr<BitcodeCompiler> LTO;
};

View File

@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "Symbols.h"
#include "Error.h"
#include "InputFiles.h"
#include "Memory.h"
#include "Strings.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@ -20,8 +20,8 @@ using namespace llvm;
using namespace llvm::object;
// Returns a symbol name for an error message.
std::string lld::toString(coff::SymbolBody &B) {
if (Optional<std::string> S = coff::demangle(B.getName()))
std::string lld::toString(coff::Symbol &B) {
if (Optional<std::string> S = coff::demangleMSVC(B.getName()))
return ("\"" + *S + "\" (" + B.getName() + ")").str();
return B.getName();
}
@ -29,7 +29,7 @@ std::string lld::toString(coff::SymbolBody &B) {
namespace lld {
namespace coff {
StringRef SymbolBody::getName() {
StringRef Symbol::getName() {
// COFF symbol names are read lazily for a performance reason.
// Non-external symbol names are never used by the linker except for logging
// or debugging. Their internal references are resolved not by name but by
@ -39,12 +39,12 @@ StringRef SymbolBody::getName() {
// is a waste of time.
if (Name.empty()) {
auto *D = cast<DefinedCOFF>(this);
cast<ObjectFile>(D->File)->getCOFFObj()->getSymbolName(D->Sym, Name);
cast<ObjFile>(D->File)->getCOFFObj()->getSymbolName(D->Sym, Name);
}
return Name;
}
InputFile *SymbolBody::getFile() {
InputFile *Symbol::getFile() {
if (auto *Sym = dyn_cast<DefinedCOFF>(this))
return Sym->File;
if (auto *Sym = dyn_cast<Lazy>(this))
@ -52,9 +52,19 @@ InputFile *SymbolBody::getFile() {
return nullptr;
}
bool Symbol::isLive() const {
if (auto *R = dyn_cast<DefinedRegular>(this))
return R->getChunk()->isLive();
if (auto *Imp = dyn_cast<DefinedImportData>(this))
return Imp->File->Live;
if (auto *Imp = dyn_cast<DefinedImportThunk>(this))
return Imp->WrappedSym->File->Live;
// Assume any other kind of symbol is live.
return true;
}
COFFSymbolRef DefinedCOFF::getCOFFSymbol() {
size_t SymSize =
cast<ObjectFile>(File)->getCOFFObj()->getSymbolTableEntrySize();
size_t SymSize = cast<ObjFile>(File)->getCOFFObj()->getSymbolTableEntrySize();
if (SymSize == sizeof(coff_symbol16))
return COFFSymbolRef(reinterpret_cast<const coff_symbol16 *>(Sym));
assert(SymSize == sizeof(coff_symbol32));
@ -81,7 +91,7 @@ DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S,
Defined *Undefined::getWeakAlias() {
// A weak alias may be a weak alias to another symbol, so check recursively.
for (SymbolBody *A = WeakAlias; A; A = cast<Undefined>(A)->WeakAlias)
for (Symbol *A = WeakAlias; A; A = cast<Undefined>(A)->WeakAlias)
if (auto *D = dyn_cast<Defined>(A))
return D;
return nullptr;

View File

@ -12,8 +12,8 @@
#include "Chunks.h"
#include "Config.h"
#include "Memory.h"
#include "lld/Core/LLVM.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
@ -31,12 +31,11 @@ using llvm::object::coff_symbol_generic;
class ArchiveFile;
class InputFile;
class ObjectFile;
struct Symbol;
class ObjFile;
class SymbolTable;
// The base class for real symbol classes.
class SymbolBody {
class Symbol {
public:
enum Kind {
// The order of these is significant. We start with the regular defined
@ -70,16 +69,16 @@ public:
// Returns the file from which this symbol was created.
InputFile *getFile();
Symbol *symbol();
const Symbol *symbol() const {
return const_cast<SymbolBody *>(this)->symbol();
}
// Indicates that this symbol will be included in the final image. Only valid
// after calling markLive.
bool isLive() const;
protected:
friend SymbolTable;
explicit SymbolBody(Kind K, StringRef N = "")
explicit Symbol(Kind K, StringRef N = "")
: SymbolKind(K), IsExternal(true), IsCOMDAT(false),
WrittenToSymtab(false), Name(N) {}
WrittenToSymtab(false), PendingArchiveLoad(false), IsGCRoot(false),
Name(N) {}
const unsigned SymbolKind : 8;
unsigned IsExternal : 1;
@ -92,19 +91,28 @@ public:
// symbols from being written to the symbol table more than once.
unsigned WrittenToSymtab : 1;
// True if this symbol was referenced by a regular (non-bitcode) object.
unsigned IsUsedInRegularObj : 1;
// True if we've seen both a lazy and an undefined symbol with this symbol
// name, which means that we have enqueued an archive member load and should
// not load any more archive members to resolve the same symbol.
unsigned PendingArchiveLoad : 1;
/// True if we've already added this symbol to the list of GC roots.
unsigned IsGCRoot : 1;
protected:
StringRef Name;
};
// The base class for any defined symbols, including absolute symbols,
// etc.
class Defined : public SymbolBody {
class Defined : public Symbol {
public:
Defined(Kind K, StringRef N) : SymbolBody(K, N) {}
Defined(Kind K, StringRef N) : Symbol(K, N) {}
static bool classof(const SymbolBody *S) {
return S->kind() <= LastDefinedKind;
}
static bool classof(const Symbol *S) { return S->kind() <= LastDefinedKind; }
// Returns the RVA (relative virtual address) of this symbol. The
// writer sets and uses RVAs.
@ -120,12 +128,13 @@ public:
// loaded through that. For bitcode files, Sym is nullptr and the name is stored
// as a StringRef.
class DefinedCOFF : public Defined {
friend SymbolBody;
friend Symbol;
public:
DefinedCOFF(Kind K, InputFile *F, StringRef N, const coff_symbol_generic *S)
: Defined(K, N), File(F), Sym(S) {}
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() <= LastDefinedCOFFKind;
}
@ -151,16 +160,15 @@ public:
this->IsCOMDAT = IsCOMDAT;
}
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() == DefinedRegularKind;
}
uint64_t getRVA() { return (*Data)->getRVA() + Sym->Value; }
bool isCOMDAT() { return IsCOMDAT; }
SectionChunk *getChunk() { return *Data; }
uint32_t getValue() { return Sym->Value; }
uint64_t getRVA() const { return (*Data)->getRVA() + Sym->Value; }
bool isCOMDAT() const { return IsCOMDAT; }
SectionChunk *getChunk() const { return *Data; }
uint32_t getValue() const { return Sym->Value; }
private:
SectionChunk **Data;
};
@ -173,12 +181,12 @@ public:
this->IsExternal = true;
}
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() == DefinedCommonKind;
}
uint64_t getRVA() { return Data->getRVA(); }
Chunk *getChunk() { return Data; }
CommonChunk *getChunk() { return Data; }
private:
friend SymbolTable;
@ -198,7 +206,7 @@ public:
DefinedAbsolute(StringRef N, uint64_t V)
: Defined(DefinedAbsoluteKind, N), VA(V) {}
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() == DefinedAbsoluteKind;
}
@ -222,7 +230,7 @@ public:
explicit DefinedSynthetic(StringRef Name, Chunk *C)
: Defined(DefinedSyntheticKind, Name), C(C) {}
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() == DefinedSyntheticKind;
}
@ -240,12 +248,12 @@ private:
// object file from an archive to replace itself with a defined
// symbol. If the resolver finds both Undefined and Lazy for
// the same name, it will ask the Lazy to load a file.
class Lazy : public SymbolBody {
class Lazy : public Symbol {
public:
Lazy(ArchiveFile *F, const Archive::Symbol S)
: SymbolBody(LazyKind, S.getName()), File(F), Sym(S) {}
: Symbol(LazyKind, S.getName()), File(F), Sym(S) {}
static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; }
static bool classof(const Symbol *S) { return S->kind() == LazyKind; }
ArchiveFile *File;
@ -257,19 +265,17 @@ private:
};
// Undefined symbols.
class Undefined : public SymbolBody {
class Undefined : public Symbol {
public:
explicit Undefined(StringRef N) : SymbolBody(UndefinedKind, N) {}
explicit Undefined(StringRef N) : Symbol(UndefinedKind, N) {}
static bool classof(const SymbolBody *S) {
return S->kind() == UndefinedKind;
}
static bool classof(const Symbol *S) { return S->kind() == UndefinedKind; }
// An undefined symbol can have a fallback symbol which gives an
// undefined symbol a second chance if it would remain undefined.
// If it remains undefined, it'll be replaced with whatever the
// Alias pointer points to.
SymbolBody *WeakAlias = nullptr;
Symbol *WeakAlias = nullptr;
// If this symbol is external weak, try to resolve it to a defined
// symbol by searching the chain of fallback symbols. Returns the symbol if
@ -289,7 +295,7 @@ public:
: Defined(DefinedImportDataKind, N), File(F) {
}
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() == DefinedImportDataKind;
}
@ -313,7 +319,7 @@ class DefinedImportThunk : public Defined {
public:
DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine);
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() == DefinedImportThunkKind;
}
@ -336,7 +342,7 @@ public:
DefinedLocalImport(StringRef N, Defined *S)
: Defined(DefinedLocalImportKind, N), Data(make<LocalImportChunk>(S)) {}
static bool classof(const SymbolBody *S) {
static bool classof(const Symbol *S) {
return S->kind() == DefinedLocalImportKind;
}
@ -393,51 +399,33 @@ inline Chunk *Defined::getChunk() {
llvm_unreachable("unknown symbol kind");
}
// A real symbol object, SymbolBody, is usually stored within a Symbol. There's
// always one Symbol for each symbol name. The resolver updates the SymbolBody
// stored in the Body field of this object as it resolves symbols. Symbol also
// holds computed properties of symbol names.
struct Symbol {
// True if this symbol was referenced by a regular (non-bitcode) object.
unsigned IsUsedInRegularObj : 1;
// True if we've seen both a lazy and an undefined symbol with this symbol
// name, which means that we have enqueued an archive member load and should
// not load any more archive members to resolve the same symbol.
unsigned PendingArchiveLoad : 1;
// This field is used to store the Symbol's SymbolBody. This instantiation of
// AlignedCharArrayUnion gives us a struct with a char array field that is
// large and aligned enough to store any derived class of SymbolBody.
llvm::AlignedCharArrayUnion<
DefinedRegular, DefinedCommon, DefinedAbsolute, DefinedSynthetic, Lazy,
Undefined, DefinedImportData, DefinedImportThunk, DefinedLocalImport>
Body;
SymbolBody *body() {
return reinterpret_cast<SymbolBody *>(Body.buffer);
}
const SymbolBody *body() const { return const_cast<Symbol *>(this)->body(); }
// A buffer class that is large enough to hold any Symbol-derived
// object. We allocate memory using this class and instantiate a symbol
// using the placement new.
union SymbolUnion {
alignas(DefinedRegular) char A[sizeof(DefinedRegular)];
alignas(DefinedCommon) char B[sizeof(DefinedCommon)];
alignas(DefinedAbsolute) char C[sizeof(DefinedAbsolute)];
alignas(DefinedSynthetic) char D[sizeof(DefinedSynthetic)];
alignas(Lazy) char E[sizeof(Lazy)];
alignas(Undefined) char F[sizeof(Undefined)];
alignas(DefinedImportData) char G[sizeof(DefinedImportData)];
alignas(DefinedImportThunk) char H[sizeof(DefinedImportThunk)];
alignas(DefinedLocalImport) char I[sizeof(DefinedLocalImport)];
};
template <typename T, typename... ArgT>
void replaceBody(Symbol *S, ArgT &&... Arg) {
static_assert(sizeof(T) <= sizeof(S->Body), "Body too small");
static_assert(alignof(T) <= alignof(decltype(S->Body)),
"Body not aligned enough");
assert(static_cast<SymbolBody *>(static_cast<T *>(nullptr)) == nullptr &&
"Not a SymbolBody");
new (S->Body.buffer) T(std::forward<ArgT>(Arg)...);
}
inline Symbol *SymbolBody::symbol() {
assert(isExternal());
return reinterpret_cast<Symbol *>(reinterpret_cast<char *>(this) -
offsetof(Symbol, Body));
void replaceSymbol(Symbol *S, ArgT &&... Arg) {
static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small");
static_assert(alignof(T) <= alignof(SymbolUnion),
"SymbolUnion not aligned enough");
assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
"Not a Symbol");
new (S) T(std::forward<ArgT>(Arg)...);
}
} // namespace coff
std::string toString(coff::SymbolBody &B);
std::string toString(coff::Symbol &B);
} // namespace lld
#endif

View File

@ -10,22 +10,22 @@
#include "Writer.h"
#include "Config.h"
#include "DLL.h"
#include "Error.h"
#include "InputFiles.h"
#include "MapFile.h"
#include "Memory.h"
#include "PDB.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdio>
#include <map>
@ -65,8 +65,9 @@ public:
D->Type = COFF::IMAGE_DEBUG_TYPE_CODEVIEW;
D->SizeOfData = Record->getSize();
D->AddressOfRawData = Record->getRVA();
// TODO(compnerd) get the file offset
D->PointerToRawData = 0;
OutputSection *OS = Record->getOutputSection();
uint64_t Offs = OS->getFileOff() + (Record->getRVA() - OS->getRVA());
D->PointerToRawData = Offs;
++D;
}
@ -77,32 +78,37 @@ private:
};
class CVDebugRecordChunk : public Chunk {
public:
CVDebugRecordChunk() {
PDBAbsPath = Config->PDBPath;
if (!PDBAbsPath.empty())
llvm::sys::fs::make_absolute(PDBAbsPath);
}
size_t getSize() const override {
return sizeof(codeview::DebugInfo) + Config->PDBPath.size() + 1;
return sizeof(codeview::DebugInfo) + PDBAbsPath.size() + 1;
}
void writeTo(uint8_t *B) const override {
// Save off the DebugInfo entry to backfill the file signature (build id)
// in Writer::writeBuildId
DI = reinterpret_cast<codeview::DebugInfo *>(B + OutputSectionOff);
DI->Signature.CVSignature = OMF::Signature::PDB70;
BuildId = reinterpret_cast<codeview::DebugInfo *>(B + OutputSectionOff);
// variable sized field (PDB Path)
auto *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*DI));
if (!Config->PDBPath.empty())
memcpy(P, Config->PDBPath.data(), Config->PDBPath.size());
P[Config->PDBPath.size()] = '\0';
char *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*BuildId));
if (!PDBAbsPath.empty())
memcpy(P, PDBAbsPath.data(), PDBAbsPath.size());
P[PDBAbsPath.size()] = '\0';
}
public:
mutable codeview::DebugInfo *DI = nullptr;
SmallString<128> PDBAbsPath;
mutable codeview::DebugInfo *BuildId = nullptr;
};
// The writer writes a SymbolTable result to a file.
class Writer {
public:
Writer(SymbolTable *T) : Symtab(T) {}
Writer() : Buffer(errorHandler().OutputBuffer) {}
void run();
private:
@ -115,11 +121,11 @@ private:
void createSymbolAndStringTable();
void openFile(StringRef OutputPath);
template <typename PEHeaderTy> void writeHeader();
void fixSafeSEHSymbols();
void createSEHTable(OutputSection *RData);
void setSectionPermissions();
void writeSections();
void sortExceptionTable();
void writeBuildId();
void sortExceptionTable();
llvm::Optional<coff_symbol16> createSymbol(Defined *D);
size_t addEntryToStringTable(StringRef Str);
@ -132,8 +138,7 @@ private:
uint32_t getSizeOfInitializedData();
std::map<StringRef, std::vector<DefinedImportData *>> binImports();
SymbolTable *Symtab;
std::unique_ptr<FileOutputBuffer> Buffer;
std::unique_ptr<FileOutputBuffer> &Buffer;
std::vector<OutputSection *> OutputSections;
std::vector<char> Strtab;
std::vector<llvm::object::coff_symbol16> OutputSymtab;
@ -145,6 +150,7 @@ private:
Chunk *DebugDirectory = nullptr;
std::vector<Chunk *> DebugRecords;
CVDebugRecordChunk *BuildId = nullptr;
Optional<codeview::DebugInfo> PreviousBuildId;
ArrayRef<uint8_t> SectionTable;
uint64_t FileSize;
@ -157,7 +163,7 @@ private:
namespace lld {
namespace coff {
void writeResult(SymbolTable *T) { Writer(T).run(); }
void writeResult() { Writer().run(); }
void OutputSection::setRVA(uint64_t RVA) {
Header.VirtualAddress = RVA;
@ -178,10 +184,12 @@ void OutputSection::addChunk(Chunk *C) {
Chunks.push_back(C);
C->setOutputSection(this);
uint64_t Off = Header.VirtualSize;
Off = alignTo(Off, C->getAlign());
Off = alignTo(Off, C->Alignment);
C->setRVA(Off);
C->OutputSectionOff = Off;
Off += C->getSize();
if (Off > UINT32_MAX)
error("section larger than 4 GiB: " + Name);
Header.VirtualSize = Off;
if (C->hasData())
Header.SizeOfRawData = alignTo(Off, SectorSize);
@ -203,7 +211,8 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) {
// If name is too long, write offset into the string table as a name.
sprintf(Hdr->Name, "/%d", StringTableOff);
} else {
assert(!Config->Debug || Name.size() <= COFF::NameSize);
assert(!Config->Debug || Name.size() <= COFF::NameSize ||
(Hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0);
strncpy(Hdr->Name, Name.data(),
std::min(Name.size(), (size_t)COFF::NameSize));
}
@ -212,6 +221,67 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) {
} // namespace coff
} // namespace lld
// PDBs are matched against executables using a build id which consists of three
// components:
// 1. A 16-bit GUID
// 2. An age
// 3. A time stamp.
//
// Debuggers and symbol servers match executables against debug info by checking
// each of these components of the EXE/DLL against the corresponding value in
// the PDB and failing a match if any of the components differ. In the case of
// symbol servers, symbols are cached in a folder that is a function of the
// GUID. As a result, in order to avoid symbol cache pollution where every
// incremental build copies a new PDB to the symbol cache, we must try to re-use
// the existing GUID if one exists, but bump the age. This way the match will
// fail, so the symbol cache knows to use the new PDB, but the GUID matches, so
// it overwrites the existing item in the symbol cache rather than making a new
// one.
static Optional<codeview::DebugInfo> loadExistingBuildId(StringRef Path) {
// We don't need to incrementally update a previous build id if we're not
// writing codeview debug info.
if (!Config->Debug)
return None;
auto ExpectedBinary = llvm::object::createBinary(Path);
if (!ExpectedBinary) {
consumeError(ExpectedBinary.takeError());
return None;
}
auto Binary = std::move(*ExpectedBinary);
if (!Binary.getBinary()->isCOFF())
return None;
std::error_code EC;
COFFObjectFile File(Binary.getBinary()->getMemoryBufferRef(), EC);
if (EC)
return None;
// If the machine of the binary we're outputting doesn't match the machine
// of the existing binary, don't try to re-use the build id.
if (File.is64() != Config->is64() || File.getMachine() != Config->Machine)
return None;
for (const auto &DebugDir : File.debug_directories()) {
if (DebugDir.Type != IMAGE_DEBUG_TYPE_CODEVIEW)
continue;
const codeview::DebugInfo *ExistingDI = nullptr;
StringRef PDBFileName;
if (auto EC = File.getDebugPDBInfo(ExistingDI, PDBFileName)) {
(void)EC;
return None;
}
// We only support writing PDBs in v70 format. So if this is not a build
// id that we recognize / support, ignore it.
if (ExistingDI->Signature.CVSignature != OMF::Signature::PDB70)
return None;
return *ExistingDI;
}
return None;
}
// The main function of the writer.
void Writer::run() {
createSections();
@ -224,32 +294,39 @@ void Writer::run() {
removeEmptySections();
setSectionPermissions();
createSymbolAndStringTable();
// We must do this before opening the output file, as it depends on being able
// to read the contents of the existing output file.
PreviousBuildId = loadExistingBuildId(Config->OutputFile);
openFile(Config->OutputFile);
if (Config->is64()) {
writeHeader<pe32plus_header>();
} else {
writeHeader<pe32_header>();
}
fixSafeSEHSymbols();
writeSections();
sortExceptionTable();
writeBuildId();
if (!Config->PDBPath.empty() && Config->Debug) {
const llvm::codeview::DebugInfo *DI = nullptr;
if (Config->DebugTypes & static_cast<unsigned>(coff::DebugType::CV))
DI = BuildId->DI;
createPDB(Symtab, SectionTable, DI);
assert(BuildId);
createPDB(Symtab, OutputSections, SectionTable, *BuildId->BuildId);
}
writeMapFile(OutputSections);
if (auto EC = Buffer->commit())
fatal(EC, "failed to write the output file");
if (auto E = Buffer->commit())
fatal("failed to write the output file: " + toString(std::move(E)));
}
static StringRef getOutputSection(StringRef Name) {
StringRef S = Name.split('$').first;
// Treat a later period as a separator for MinGW, for sections like
// ".ctors.01234".
S = S.substr(0, S.find('.', 1));
auto It = Config->Merge.find(S);
if (It == Config->Merge.end())
return S;
@ -303,41 +380,20 @@ void Writer::createMiscChunks() {
if (Config->Debug) {
DebugDirectory = make<DebugDirectoryChunk>(DebugRecords);
// TODO(compnerd) create a coffgrp entry if DebugType::CV is not enabled
if (Config->DebugTypes & static_cast<unsigned>(coff::DebugType::CV)) {
auto *Chunk = make<CVDebugRecordChunk>();
BuildId = Chunk;
DebugRecords.push_back(Chunk);
}
// Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We
// output a PDB no matter what, and this chunk provides the only means of
// allowing a debugger to match a PDB and an executable. So we need it even
// if we're ultimately not going to write CodeView data to the PDB.
auto *CVChunk = make<CVDebugRecordChunk>();
BuildId = CVChunk;
DebugRecords.push_back(CVChunk);
RData->addChunk(DebugDirectory);
for (Chunk *C : DebugRecords)
RData->addChunk(C);
}
// Create SEH table. x86-only.
if (Config->Machine != I386)
return;
std::set<Defined *> Handlers;
for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) {
if (!File->SEHCompat)
return;
for (SymbolBody *B : File->SEHandlers) {
// Make sure the handler is still live. Assume all handlers are regular
// symbols.
auto *D = dyn_cast<DefinedRegular>(B);
if (D && D->getChunk()->isLive())
Handlers.insert(D);
}
}
if (!Handlers.empty()) {
SEHTable = make<SEHTableChunk>(Handlers);
RData->addChunk(SEHTable);
}
createSEHTable(RData);
}
// Create .idata section for the DLL-imported symbol table.
@ -345,13 +401,13 @@ void Writer::createMiscChunks() {
// IdataContents class abstracted away the details for us,
// so we just let it create chunks and add them to the section.
void Writer::createImportTables() {
if (Symtab->ImportFiles.empty())
if (ImportFile::Instances.empty())
return;
// Initialize DLLOrder so that import entries are ordered in
// the same order as in the command line. (That affects DLL
// initialization order, and this ordering is MSVC-compatible.)
for (ImportFile *File : Symtab->ImportFiles) {
for (ImportFile *File : ImportFile::Instances) {
if (!File->Live)
continue;
@ -361,7 +417,7 @@ void Writer::createImportTables() {
}
OutputSection *Text = createSection(".text");
for (ImportFile *File : Symtab->ImportFiles) {
for (ImportFile *File : ImportFile::Instances) {
if (!File->Live)
continue;
@ -432,19 +488,12 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *Def) {
if (isa<DefinedSynthetic>(Def))
return None;
if (auto *D = dyn_cast<DefinedRegular>(Def)) {
// Don't write dead symbols or symbols in codeview sections to the symbol
// table.
if (!D->getChunk()->isLive() || D->getChunk()->isCodeView())
return None;
}
if (auto *Sym = dyn_cast<DefinedImportData>(Def))
if (!Sym->File->Live)
return None;
if (auto *Sym = dyn_cast<DefinedImportThunk>(Def))
if (!Sym->WrappedSym->File->Live)
// Don't write dead symbols or symbols in codeview sections to the symbol
// table.
if (!Def->isLive())
return None;
if (auto *D = dyn_cast<DefinedRegular>(Def))
if (D->getChunk()->isCodeView())
return None;
coff_symbol16 Sym;
@ -468,7 +517,7 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *Def) {
Sym.NumberOfAuxSymbols = 0;
switch (Def->kind()) {
case SymbolBody::DefinedAbsoluteKind:
case Symbol::DefinedAbsoluteKind:
Sym.Value = Def->getRVA();
Sym.SectionNumber = IMAGE_SYM_ABSOLUTE;
break;
@ -489,40 +538,46 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *Def) {
}
void Writer::createSymbolAndStringTable() {
if (!Config->Debug || !Config->WriteSymtab)
return;
// Name field in the section table is 8 byte long. Longer names need
// to be written to the string table. First, construct string table.
for (OutputSection *Sec : OutputSections) {
StringRef Name = Sec->getName();
if (Name.size() <= COFF::NameSize)
continue;
// If a section isn't discardable (i.e. will be mapped at runtime),
// prefer a truncated section name over a long section name in
// the string table that is unavailable at runtime. This is different from
// what link.exe does, but finding ".eh_fram" instead of "/4" is useful
// to libunwind.
if ((Sec->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0)
continue;
Sec->setStringTableOff(addEntryToStringTable(Name));
}
for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) {
for (SymbolBody *B : File->getSymbols()) {
auto *D = dyn_cast<Defined>(B);
if (!D || D->WrittenToSymtab)
continue;
D->WrittenToSymtab = true;
if (Config->DebugDwarf) {
for (ObjFile *File : ObjFile::Instances) {
for (Symbol *B : File->getSymbols()) {
auto *D = dyn_cast_or_null<Defined>(B);
if (!D || D->WrittenToSymtab)
continue;
D->WrittenToSymtab = true;
if (Optional<coff_symbol16> Sym = createSymbol(D))
OutputSymtab.push_back(*Sym);
if (Optional<coff_symbol16> Sym = createSymbol(D))
OutputSymtab.push_back(*Sym);
}
}
}
if (OutputSymtab.empty() && Strtab.empty())
return;
OutputSection *LastSection = OutputSections.back();
// We position the symbol table to be adjacent to the end of the last section.
uint64_t FileOff = LastSection->getFileOff() +
alignTo(LastSection->getRawSize(), SectorSize);
if (!OutputSymtab.empty()) {
PointerToSymbolTable = FileOff;
FileOff += OutputSymtab.size() * sizeof(coff_symbol16);
}
if (!Strtab.empty())
FileOff += Strtab.size() + 4;
PointerToSymbolTable = FileOff;
FileOff += OutputSymtab.size() * sizeof(coff_symbol16);
FileOff += 4 + Strtab.size();
FileSize = alignTo(FileOff, SectorSize);
}
@ -551,7 +606,7 @@ void Writer::assignAddresses() {
RVA += alignTo(Sec->getVirtualSize(), PageSize);
FileSize += alignTo(Sec->getRawSize(), SectorSize);
}
SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize);
SizeOfImage = alignTo(RVA, PageSize);
}
template <typename PEHeaderTy> void Writer::writeHeader() {
@ -621,23 +676,21 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
PE->SizeOfStackCommit = Config->StackCommit;
PE->SizeOfHeapReserve = Config->HeapReserve;
PE->SizeOfHeapCommit = Config->HeapCommit;
// Import Descriptor Tables and Import Address Tables are merged
// in our output. That's not compatible with the Binding feature
// that is sort of prelinking. Setting this flag to make it clear
// that our outputs are not for the Binding.
PE->DLLCharacteristics = IMAGE_DLL_CHARACTERISTICS_NO_BIND;
if (Config->AppContainer)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER;
if (Config->DynamicBase)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE;
if (Config->HighEntropyVA)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA;
if (!Config->AllowBind)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND;
if (Config->NxCompat)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT;
if (!Config->AllowIsolation)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION;
if (Config->Machine == I386 && !SEHTable &&
!Symtab->findUnderscore("_load_config_used"))
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH;
if (Config->TerminalServerAware)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE;
PE->NumberOfRvaAndSize = NumberfOfDataDirectory;
@ -673,7 +726,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize();
}
if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) {
if (Defined *B = dyn_cast<Defined>(Sym->body())) {
if (Defined *B = dyn_cast<Defined>(Sym)) {
Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA();
Dir[TLS_TABLE].Size = Config->is64()
? sizeof(object::coff_tls_directory64)
@ -685,7 +738,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize();
}
if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) {
if (auto *B = dyn_cast<DefinedRegular>(Sym->body())) {
if (auto *B = dyn_cast<DefinedRegular>(Sym)) {
SectionChunk *SC = B->getChunk();
assert(B->getRVA() >= SC->getRVA());
uint64_t OffsetInChunk = B->getRVA() - SC->getRVA();
@ -715,7 +768,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
SectionTable = ArrayRef<uint8_t>(
Buf - OutputSections.size() * sizeof(coff_section), Buf);
if (OutputSymtab.empty())
if (OutputSymtab.empty() && Strtab.empty())
return;
COFF->PointerToSymbolTable = PointerToSymbolTable;
@ -734,21 +787,40 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
}
void Writer::openFile(StringRef Path) {
Buffer = check(
Buffer = CHECK(
FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable),
"failed to open " + Path);
}
void Writer::fixSafeSEHSymbols() {
if (!SEHTable)
void Writer::createSEHTable(OutputSection *RData) {
// Create SEH table. x86-only.
if (Config->Machine != I386)
return;
std::set<Defined *> Handlers;
for (ObjFile *File : ObjFile::Instances) {
if (!File->SEHCompat)
return;
for (uint32_t I : File->SXData)
if (Symbol *B = File->getSymbol(I))
if (B->isLive())
Handlers.insert(cast<Defined>(B));
}
if (Handlers.empty())
return;
SEHTable = make<SEHTableChunk>(Handlers);
RData->addChunk(SEHTable);
// Replace the absolute table symbol with a synthetic symbol pointing to the
// SEHTable chunk so that we can emit base relocations for it and resolve
// section relative relocations.
Symbol *T = Symtab->find("___safe_se_handler_table");
Symbol *C = Symtab->find("___safe_se_handler_count");
replaceBody<DefinedSynthetic>(T, T->body()->getName(), SEHTable);
cast<DefinedAbsolute>(C->body())->setVA(SEHTable->getSize() / 4);
replaceSymbol<DefinedSynthetic>(T, T->getName(), SEHTable);
cast<DefinedAbsolute>(C)->setVA(SEHTable->getSize() / 4);
}
// Handles /section options to allow users to overwrite
@ -781,6 +853,25 @@ void Writer::writeSections() {
}
}
void Writer::writeBuildId() {
// If we're not writing a build id (e.g. because /debug is not specified),
// then just return;
if (!Config->Debug)
return;
assert(BuildId && "BuildId is not set!");
if (PreviousBuildId.hasValue()) {
*BuildId->BuildId = *PreviousBuildId;
BuildId->BuildId->PDB70.Age = BuildId->BuildId->PDB70.Age + 1;
return;
}
BuildId->BuildId->Signature.CVSignature = OMF::Signature::PDB70;
BuildId->BuildId->PDB70.Age = 1;
llvm::getRandomBytes(BuildId->BuildId->PDB70.Signature, 16);
}
// Sort .pdata section contents according to PE/COFF spec 5.5.
void Writer::sortExceptionTable() {
OutputSection *Sec = findSection(".pdata");
@ -795,7 +886,7 @@ void Writer::sortExceptionTable() {
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
return;
}
if (Config->Machine == ARMNT) {
if (Config->Machine == ARMNT || Config->Machine == ARM64) {
struct Entry { ulittle32_t Begin, Unwind; };
sort(parallel::par, (Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
@ -804,26 +895,6 @@ void Writer::sortExceptionTable() {
errs() << "warning: don't know how to handle .pdata.\n";
}
// Backfill the CVSignature in a PDB70 Debug Record. This backfilling allows us
// to get reproducible builds.
void Writer::writeBuildId() {
// There is nothing to backfill if BuildId was not setup.
if (BuildId == nullptr)
return;
assert(BuildId->DI->Signature.CVSignature == OMF::Signature::PDB70 &&
"only PDB 7.0 is supported");
assert(sizeof(BuildId->DI->PDB70.Signature) == 16 &&
"signature size mismatch");
// Compute an MD5 hash.
ArrayRef<uint8_t> Buf(Buffer->getBufferStart(), Buffer->getBufferEnd());
memcpy(BuildId->DI->PDB70.Signature, MD5::hash(Buf).data(), 16);
// TODO(compnerd) track the Age
BuildId->DI->PDB70.Age = 1;
}
OutputSection *Writer::findSection(StringRef Name) {
for (OutputSection *Sec : OutputSections)
if (Sec->getName() == Name)

View File

@ -18,11 +18,9 @@
namespace lld {
namespace coff {
class SymbolTable;
static const int PageSize = 4096;
void writeResult(SymbolTable *T);
void writeResult();
// OutputSection represents a section in an output file. It's a
// container of chunks. OutputSection and Chunk are 1:N relationship.
@ -36,7 +34,7 @@ public:
void setFileOffset(uint64_t);
void addChunk(Chunk *C);
llvm::StringRef getName() { return Name; }
std::vector<Chunk *> &getChunks() { return Chunks; }
ArrayRef<Chunk *> getChunks() { return Chunks; }
void addPermissions(uint32_t C);
void setPermissions(uint32_t C);
uint32_t getPermissions() { return Header.Characteristics & PermMask; }

62
deps/lld/Common/Args.cpp vendored Normal file
View File

@ -0,0 +1,62 @@
//===- Args.cpp -----------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "lld/Common/Args.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Option/ArgList.h"
using namespace llvm;
using namespace lld;
int lld::args::getInteger(opt::InputArgList &Args, unsigned Key, int Default) {
int V = Default;
if (auto *Arg = Args.getLastArg(Key)) {
StringRef S = Arg->getValue();
if (!to_integer(S, V, 10))
error(Arg->getSpelling() + ": number expected, but got '" + S + "'");
}
return V;
}
std::vector<StringRef> lld::args::getStrings(opt::InputArgList &Args, int Id) {
std::vector<StringRef> V;
for (auto *Arg : Args.filtered(Id))
V.push_back(Arg->getValue());
return V;
}
uint64_t lld::args::getZOptionValue(opt::InputArgList &Args, int Id,
StringRef Key, uint64_t Default) {
for (auto *Arg : Args.filtered(Id)) {
std::pair<StringRef, StringRef> KV = StringRef(Arg->getValue()).split('=');
if (KV.first == Key) {
uint64_t Result = Default;
if (!to_integer(KV.second, Result))
error("invalid " + Key + ": " + KV.second);
return Result;
}
}
return Default;
}
std::vector<StringRef> lld::args::getLines(MemoryBufferRef MB) {
SmallVector<StringRef, 0> Arr;
MB.getBuffer().split(Arr, '\n');
std::vector<StringRef> Ret;
for (StringRef S : Arr) {
S = S.trim();
if (!S.empty() && S[0] != '#')
Ret.push_back(S);
}
return Ret;
}

32
deps/lld/Common/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,32 @@
if(NOT LLD_BUILT_STANDALONE)
set(tablegen_deps intrinsics_gen)
endif()
add_lld_library(lldCommon
Args.cpp
ErrorHandler.cpp
Memory.cpp
Reproduce.cpp
Strings.cpp
TargetOptionsCommandFlags.cpp
Threads.cpp
Version.cpp
ADDITIONAL_HEADER_DIRS
${LLD_INCLUDE_DIR}/lld/Common
LINK_COMPONENTS
Codegen
Core
Demangle
MC
Option
Support
Target
LINK_LIBS
${LLVM_PTHREAD_LIB}
DEPENDS
${tablegen_deps}
)

View File

@ -1,4 +1,4 @@
//===- Error.cpp ----------------------------------------------------------===//
//===- ErrorHandler.cpp ---------------------------------------------------===//
//
// The LLVM Linker
//
@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
#include "Error.h"
#include "Config.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Threads.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
@ -21,12 +22,7 @@
#endif
using namespace llvm;
using namespace lld;
using namespace lld::elf;
uint64_t elf::ErrorCount;
raw_ostream *elf::ErrorOS;
// The functions defined in this file can be called from multiple threads,
// but outs() or errs() are not thread-safe. We protect them using a mutex.
@ -34,72 +30,25 @@ static std::mutex Mu;
// Prints "\n" or does nothing, depending on Msg contents of
// the previous call of this function.
static void newline(const Twine &Msg) {
static void newline(raw_ostream *ErrorOS, const Twine &Msg) {
// True if the previous error message contained "\n".
// We want to separate multi-line error messages with a newline.
static bool Flag;
if (Flag)
*ErrorOS << "\n";
Flag = (StringRef(Msg.str()).find('\n') != StringRef::npos);
Flag = StringRef(Msg.str()).contains('\n');
}
static void print(StringRef S, raw_ostream::Colors C) {
*ErrorOS << Config->Argv[0] << ": ";
if (Config->ColorDiagnostics) {
ErrorOS->changeColor(C, true);
*ErrorOS << S;
ErrorOS->resetColor();
} else {
*ErrorOS << S;
}
ErrorHandler &lld::errorHandler() {
static ErrorHandler Handler;
return Handler;
}
void elf::log(const Twine &Msg) {
if (Config->Verbose) {
std::lock_guard<std::mutex> Lock(Mu);
outs() << Config->Argv[0] << ": " << Msg << "\n";
outs().flush();
}
}
void lld::exitLld(int Val) {
// Delete the output buffer so that any tempory file is deleted.
errorHandler().OutputBuffer.reset();
void elf::message(const Twine &Msg) {
std::lock_guard<std::mutex> Lock(Mu);
outs() << Msg << "\n";
outs().flush();
}
void elf::warn(const Twine &Msg) {
if (Config->FatalWarnings) {
error(Msg);
return;
}
std::lock_guard<std::mutex> Lock(Mu);
newline(Msg);
print("warning: ", raw_ostream::MAGENTA);
*ErrorOS << Msg << "\n";
}
void elf::error(const Twine &Msg) {
std::lock_guard<std::mutex> Lock(Mu);
newline(Msg);
if (Config->ErrorLimit == 0 || ErrorCount < Config->ErrorLimit) {
print("error: ", raw_ostream::RED);
*ErrorOS << Msg << "\n";
} else if (ErrorCount == Config->ErrorLimit) {
print("error: ", raw_ostream::RED);
*ErrorOS << "too many errors emitted, stopping now"
<< " (use -error-limit=0 to see all errors)\n";
if (Config->ExitEarly)
exitLld(1);
}
++ErrorCount;
}
void elf::exitLld(int Val) {
// Dealloc/destroy ManagedStatic variables before calling
// _exit(). In a non-LTO build, this is a nop. In an LTO
// build allows us to get the output of -time-passes.
@ -110,7 +59,60 @@ void elf::exitLld(int Val) {
_exit(Val);
}
void elf::fatal(const Twine &Msg) {
void ErrorHandler::print(StringRef S, raw_ostream::Colors C) {
*ErrorOS << LogName << ": ";
if (ColorDiagnostics) {
ErrorOS->changeColor(C, true);
*ErrorOS << S;
ErrorOS->resetColor();
} else {
*ErrorOS << S;
}
}
void ErrorHandler::log(const Twine &Msg) {
if (Verbose) {
std::lock_guard<std::mutex> Lock(Mu);
*ErrorOS << LogName << ": " << Msg << "\n";
}
}
void ErrorHandler::message(const Twine &Msg) {
std::lock_guard<std::mutex> Lock(Mu);
outs() << Msg << "\n";
outs().flush();
}
void ErrorHandler::warn(const Twine &Msg) {
if (FatalWarnings) {
error(Msg);
return;
}
std::lock_guard<std::mutex> Lock(Mu);
newline(ErrorOS, Msg);
print("warning: ", raw_ostream::MAGENTA);
*ErrorOS << Msg << "\n";
}
void ErrorHandler::error(const Twine &Msg) {
std::lock_guard<std::mutex> Lock(Mu);
newline(ErrorOS, Msg);
if (ErrorLimit == 0 || ErrorCount < ErrorLimit) {
print("error: ", raw_ostream::RED);
*ErrorOS << Msg << "\n";
} else if (ErrorCount == ErrorLimit) {
print("error: ", raw_ostream::RED);
*ErrorOS << ErrorLimitExceededMsg << "\n";
if (ExitEarly)
exitLld(1);
}
++ErrorCount;
}
void ErrorHandler::fatal(const Twine &Msg) {
error(Msg);
exitLld(1);
}

23
deps/lld/Common/Memory.cpp vendored Normal file
View File

@ -0,0 +1,23 @@
//===- Memory.cpp ---------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "lld/Common/Memory.h"
using namespace llvm;
using namespace lld;
BumpPtrAllocator lld::BAlloc;
StringSaver lld::Saver{BAlloc};
std::vector<SpecificAllocBase *> lld::SpecificAllocBase::Instances;
void lld::freeArena() {
for (SpecificAllocBase *Alloc : SpecificAllocBase::Instances)
Alloc->reset();
BAlloc.Reset();
}

View File

@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
#include "lld/Core/Reproduce.h"
#include "lld/Common/Reproduce.h"
#include "llvm/Option/Arg.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@ -44,9 +44,9 @@ std::string lld::relativeToRoot(StringRef Path) {
// Quote a given string if it contains a space character.
std::string lld::quote(StringRef S) {
if (S.find(' ') == StringRef::npos)
return S;
return ("\"" + S + "\"").str();
if (S.contains(' '))
return ("\"" + S + "\"").str();
return S;
}
std::string lld::rewritePath(StringRef S) {
@ -55,12 +55,12 @@ std::string lld::rewritePath(StringRef S) {
return S;
}
std::string lld::toString(opt::Arg *Arg) {
std::string K = Arg->getSpelling();
if (Arg->getNumValues() == 0)
std::string lld::toString(const opt::Arg &Arg) {
std::string K = Arg.getSpelling();
if (Arg.getNumValues() == 0)
return K;
std::string V = quote(Arg->getValue());
if (Arg->getOption().getRenderStyle() == opt::Option::RenderJoinedStyle)
std::string V = quote(Arg.getValue());
if (Arg.getOption().getRenderStyle() == opt::Option::RenderJoinedStyle)
return K + V;
return K + " " + V;
}

32
deps/lld/Common/Strings.cpp vendored Normal file
View File

@ -0,0 +1,32 @@
//===- Strings.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "lld/Common/Strings.h"
#include "llvm/Demangle/Demangle.h"
using namespace llvm;
using namespace lld;
// Returns the demangled C++ symbol name for Name.
Optional<std::string> lld::demangleItanium(StringRef Name) {
// itaniumDemangle can be used to demangle strings other than symbol
// names which do not necessarily start with "_Z". Name can be
// either a C or C++ symbol. Don't call itaniumDemangle if the name
// does not look like a C++ symbol name to avoid getting unexpected
// result for a C symbol that happens to match a mangled type name.
if (!Name.startswith("_Z"))
return None;
char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
if (!Buf)
return None;
std::string S(Buf);
free(Buf);
return S;
}

View File

@ -8,25 +8,25 @@
//===----------------------------------------------------------------------===//
//
// This file exists as a place for global variables defined in LLVM's
// CodeGen/CommandFlags.h. By putting the resulting object file in
// CodeGen/CommandFlags.def. By putting the resulting object file in
// an archive and linking with it, the definitions will automatically be
// included when needed and skipped when already present.
//
//===----------------------------------------------------------------------===//
#include "lld/Core/TargetOptionsCommandFlags.h"
#include "lld/Common/TargetOptionsCommandFlags.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/CommandFlags.def"
#include "llvm/Target/TargetOptions.h"
// Define an externally visible version of
// InitTargetOptionsFromCodeGenFlags, so that its functionality can be
// used without having to include llvm/CodeGen/CommandFlags.h, which
// used without having to include llvm/CodeGen/CommandFlags.def, which
// would lead to multiple definitions of the command line flags.
llvm::TargetOptions lld::InitTargetOptionsFromCodeGenFlags() {
return ::InitTargetOptionsFromCodeGenFlags();
}
llvm::CodeModel::Model lld::GetCodeModelFromCMModel() {
return CMModel;
llvm::Optional<llvm::CodeModel::Model> lld::GetCodeModelFromCMModel() {
return getCodeModel();
}

12
deps/lld/Common/Threads.cpp vendored Normal file
View File

@ -0,0 +1,12 @@
//===- Threads.cpp --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "lld/Common/Threads.h"
bool lld::ThreadsEnabled = true;

View File

@ -1,4 +1,4 @@
//===- lib/Config/Version.cpp - LLD Version Number ---------------*- C++-=====//
//===- lib/Common/Version.cpp - LLD Version Number ---------------*- C++-=====//
//
// The LLVM Compiler Infrastructure
//
@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "lld/Config/Version.h"
#include "lld/Common/Version.h"
using namespace llvm;

Some files were not shown because too many files have changed in this diff Show More