mirror of
https://github.com/ziglang/zig.git
synced 2026-02-21 16:54:52 +00:00
Merge remote-tracking branch 'origin/llvm8'
This commit is contained in:
commit
3c7555cb67
@ -1,43 +1,11 @@
|
||||
image: freebsd/latest
|
||||
packages:
|
||||
- cmake
|
||||
- ninja
|
||||
- llvm70
|
||||
- py27-s3cmd
|
||||
- wget
|
||||
secrets:
|
||||
- 6c60aaee-92e7-4e7d-812c-114817689b4d
|
||||
sources:
|
||||
- https://github.com/ziglang/zig
|
||||
tasks:
|
||||
- build: |
|
||||
cd zig && mkdir build && cd build
|
||||
cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release
|
||||
ninja install
|
||||
- test: |
|
||||
cd zig/build
|
||||
bin/zig test ../test/stage1/behavior.zig
|
||||
bin/zig test ../std/special/compiler_rt.zig
|
||||
|
||||
bin/zig test ../test/stage1/behavior.zig --library c
|
||||
bin/zig test ../std/special/compiler_rt.zig --library c
|
||||
|
||||
bin/zig test ../test/stage1/behavior.zig --release-fast
|
||||
bin/zig test ../std/special/compiler_rt.zig --release-fast
|
||||
|
||||
bin/zig test ../test/stage1/behavior.zig --release-fast --library c
|
||||
bin/zig test ../std/special/compiler_rt.zig --release-fast --library c
|
||||
|
||||
bin/zig test ../test/stage1/behavior.zig --release-small --library c
|
||||
bin/zig test ../std/special/compiler_rt.zig --release-small --library c
|
||||
|
||||
bin/zig test ../test/stage1/behavior.zig --release-small
|
||||
bin/zig test ../std/special/compiler_rt.zig --release-small
|
||||
|
||||
bin/zig test ../test/stage1/behavior.zig --release-safe
|
||||
bin/zig test ../std/special/compiler_rt.zig --release-safe
|
||||
|
||||
bin/zig test ../test/stage1/behavior.zig --release-safe --library c
|
||||
bin/zig test ../std/special/compiler_rt.zig --release-safe --library c
|
||||
# TODO enable all tests
|
||||
#bin/zig build --build-file ../build.zig test
|
||||
# TODO integrate with the download page updater and make a
|
||||
# static build available to download for FreeBSD.
|
||||
# This will require setting up a cache of LLVM/Clang built
|
||||
# statically.
|
||||
- build: cd zig && ./ci/srht/freebsd_script
|
||||
|
||||
@ -56,10 +56,8 @@ endif()
|
||||
|
||||
if(APPLE AND ZIG_STATIC)
|
||||
list(REMOVE_ITEM LLVM_LIBRARIES "-lz")
|
||||
list(REMOVE_ITEM LLVM_LIBRARIES "-lcurses")
|
||||
find_library(ZLIB NAMES z zlib libz)
|
||||
find_library(LIBNCURSES NAMES libncurses.a)
|
||||
list(APPEND LLVM_LIBRARIES "${LIBNCURSES}" "${ZLIB}")
|
||||
list(APPEND LLVM_LIBRARIES "${ZLIB}")
|
||||
endif()
|
||||
|
||||
set(ZIG_CPP_LIB_DIR "${CMAKE_BINARY_DIR}/zig_cpp")
|
||||
@ -117,6 +115,7 @@ else()
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/WriterMachO.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp"
|
||||
)
|
||||
|
||||
set(EMBEDDED_LLD_ELF_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/AArch64ErrataFix.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AArch64.cpp"
|
||||
@ -124,19 +123,21 @@ else()
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/ARM.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AVR.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/Hexagon.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/MSP430.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/Mips.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/MipsArchTree.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC64.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/RISCV.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/SPARCV9.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/X86.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/X86_64.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/CallGraphSort.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/DWARF.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Driver.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/DriverUtils.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/EhFrame.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Filesystem.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/GdbIndex.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/ICF.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/InputFiles.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/InputSection.cpp"
|
||||
@ -2800,13 +2801,6 @@ set(ZIG_LIBC_FILES
|
||||
"glibc/sysdeps/mips/mips64/n64/crtn.S"
|
||||
"glibc/sysdeps/mips/nptl/bits/pthreadtypes-arch.h"
|
||||
"glibc/sysdeps/mips/start.S"
|
||||
"glibc/sysdeps/nios2/bits/endian.h"
|
||||
"glibc/sysdeps/nios2/crti.S"
|
||||
"glibc/sysdeps/nios2/crtn.S"
|
||||
"glibc/sysdeps/nios2/dl-sysdep.h"
|
||||
"glibc/sysdeps/nios2/nptl/bits/pthreadtypes-arch.h"
|
||||
"glibc/sysdeps/nios2/start.S"
|
||||
"glibc/sysdeps/nios2/sysdep.h"
|
||||
"glibc/sysdeps/nptl/bits/pthreadtypes.h"
|
||||
"glibc/sysdeps/nptl/bits/thread-shared-types.h"
|
||||
"glibc/sysdeps/nptl/libc-lock.h"
|
||||
@ -2909,7 +2903,6 @@ set(ZIG_LIBC_FILES
|
||||
"glibc/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h"
|
||||
"glibc/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h"
|
||||
"glibc/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h"
|
||||
"glibc/sysdeps/unix/sysv/linux/nios2/sysdep.h"
|
||||
"glibc/sysdeps/unix/sysv/linux/powerpc/bits/stat.h"
|
||||
"glibc/sysdeps/unix/sysv/linux/powerpc/kernel-features.h"
|
||||
"glibc/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h"
|
||||
|
||||
18
README.md
18
README.md
@ -101,7 +101,6 @@ clarity.
|
||||
|i386 | Tier 2 | Tier 2 | Tier 4 | Tier 2 | Tier 3 | Tier 3 | Tier 3 | Tier 3 |
|
||||
|arm | Tier 2 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 |
|
||||
|arm64 | Tier 2 | Tier 2 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 |
|
||||
|avr | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|
||||
|bpf | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|
||||
|hexagon | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|
||||
|mips | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|
||||
@ -110,8 +109,9 @@ clarity.
|
||||
|sparc | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|
||||
|s390x | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|
||||
|lanai | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|
||||
|wasm32 | Tier 4 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|
||||
|wasm64 | Tier 4 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|
||||
|wasm32 | Tier 3 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|
||||
|wasm64 | Tier 3 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|
||||
|avr | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | N/A | Tier 4 |
|
||||
|riscv32 | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | Tier 4 | Tier 4 |
|
||||
|riscv64 | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | Tier 4 | Tier 4 |
|
||||
|xcore | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | N/A | Tier 4 |
|
||||
@ -149,13 +149,13 @@ Note that you can
|
||||
|
||||
* cmake >= 2.8.5
|
||||
* gcc >= 5.0.0 or clang >= 3.6.0
|
||||
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same gcc or clang version above
|
||||
* LLVM, Clang, LLD development libraries == 8.x, compiled with the same gcc or clang version above
|
||||
|
||||
##### Windows
|
||||
|
||||
* cmake >= 2.8.5
|
||||
* Microsoft Visual Studio 2017
|
||||
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same MSVC version above
|
||||
* Microsoft Visual Studio 2017 (version 15.8)
|
||||
* LLVM, Clang, LLD development libraries == 8.x, compiled with the same MSVC version above
|
||||
|
||||
#### Instructions
|
||||
|
||||
@ -173,11 +173,11 @@ bin/zig build --build-file ../build.zig test
|
||||
##### MacOS
|
||||
|
||||
```
|
||||
brew install cmake llvm@7
|
||||
brew outdated llvm@7 || brew upgrade llvm@7
|
||||
brew install cmake llvm@8
|
||||
brew outdated llvm@8 || brew upgrade llvm@8
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@7/
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@8/
|
||||
make install
|
||||
bin/zig build --build-file ../build.zig test
|
||||
```
|
||||
|
||||
8
c_headers/__clang_cuda_runtime_wrapper.h
vendored
8
c_headers/__clang_cuda_runtime_wrapper.h
vendored
@ -62,10 +62,15 @@
|
||||
#include "cuda.h"
|
||||
#if !defined(CUDA_VERSION)
|
||||
#error "cuda.h did not define CUDA_VERSION"
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000
|
||||
#error "Unsupported CUDA version!"
|
||||
#endif
|
||||
|
||||
#pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
|
||||
#if CUDA_VERSION >= 10000
|
||||
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
||||
#endif
|
||||
|
||||
// Make largest subset of device functions available during host
|
||||
// compilation -- SM_35 for the time being.
|
||||
#ifndef __CUDA_ARCH__
|
||||
@ -419,6 +424,7 @@ __device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
|
||||
#pragma pop_macro("dim3")
|
||||
#pragma pop_macro("uint3")
|
||||
#pragma pop_macro("__USE_FAST_MATH__")
|
||||
#pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
|
||||
|
||||
#endif // __CUDA__
|
||||
#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
|
||||
|
||||
4
c_headers/adxintrin.h
vendored
4
c_headers/adxintrin.h
vendored
@ -53,7 +53,7 @@ static __inline unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
|
||||
unsigned int *__p)
|
||||
{
|
||||
return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p);
|
||||
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@ -61,7 +61,7 @@ static __inline unsigned char __DEFAULT_FN_ATTRS
|
||||
_addcarry_u64(unsigned char __cf, unsigned long long __x,
|
||||
unsigned long long __y, unsigned long long *__p)
|
||||
{
|
||||
return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p);
|
||||
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
125
c_headers/altivec.h
vendored
125
c_headers/altivec.h
vendored
@ -9492,49 +9492,51 @@ vec_splat_u32(signed char __a) {
|
||||
|
||||
/* vec_sr */
|
||||
|
||||
static __inline__ vector signed char __ATTRS_o_ai
|
||||
vec_sr(vector signed char __a, vector unsigned char __b) {
|
||||
vector unsigned char __res = (vector unsigned char)__a >> __b;
|
||||
return (vector signed char)__res;
|
||||
}
|
||||
|
||||
// vec_sr does modulo arithmetic on __b first, so __b is allowed to be more
|
||||
// than the length of __a.
|
||||
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||
vec_sr(vector unsigned char __a, vector unsigned char __b) {
|
||||
return __a >> __b;
|
||||
return __a >>
|
||||
(__b % (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector signed short __ATTRS_o_ai
|
||||
vec_sr(vector signed short __a, vector unsigned short __b) {
|
||||
vector unsigned short __res = (vector unsigned short)__a >> __b;
|
||||
return (vector signed short)__res;
|
||||
static __inline__ vector signed char __ATTRS_o_ai
|
||||
vec_sr(vector signed char __a, vector unsigned char __b) {
|
||||
return (vector signed char)vec_sr((vector unsigned char)__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned short __ATTRS_o_ai
|
||||
vec_sr(vector unsigned short __a, vector unsigned short __b) {
|
||||
return __a >> __b;
|
||||
return __a >>
|
||||
(__b % (vector unsigned short)(sizeof(unsigned short) * __CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector signed int __ATTRS_o_ai
|
||||
vec_sr(vector signed int __a, vector unsigned int __b) {
|
||||
vector unsigned int __res = (vector unsigned int)__a >> __b;
|
||||
return (vector signed int)__res;
|
||||
static __inline__ vector short __ATTRS_o_ai vec_sr(vector short __a,
|
||||
vector unsigned short __b) {
|
||||
return (vector short)vec_sr((vector unsigned short)__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned int __ATTRS_o_ai
|
||||
vec_sr(vector unsigned int __a, vector unsigned int __b) {
|
||||
return __a >> __b;
|
||||
return __a >>
|
||||
(__b % (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector int __ATTRS_o_ai vec_sr(vector int __a,
|
||||
vector unsigned int __b) {
|
||||
return (vector int)vec_sr((vector unsigned int)__a, __b);
|
||||
}
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static __inline__ vector signed long long __ATTRS_o_ai
|
||||
vec_sr(vector signed long long __a, vector unsigned long long __b) {
|
||||
vector unsigned long long __res = (vector unsigned long long)__a >> __b;
|
||||
return (vector signed long long)__res;
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned long long __ATTRS_o_ai
|
||||
vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
|
||||
return __a >> __b;
|
||||
return __a >> (__b % (vector unsigned long long)(sizeof(unsigned long long) *
|
||||
__CHAR_BIT__));
|
||||
}
|
||||
|
||||
static __inline__ vector long long __ATTRS_o_ai
|
||||
vec_sr(vector long long __a, vector unsigned long long __b) {
|
||||
return (vector long long)vec_sr((vector unsigned long long)__a, __b);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -9544,12 +9546,12 @@ vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
|
||||
|
||||
static __inline__ vector signed char __ATTRS_o_ai
|
||||
vec_vsrb(vector signed char __a, vector unsigned char __b) {
|
||||
return __a >> (vector signed char)__b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned char __ATTRS_o_ai
|
||||
vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
|
||||
return __a >> __b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
/* vec_vsrh */
|
||||
@ -9558,12 +9560,12 @@ vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
|
||||
|
||||
static __inline__ vector short __ATTRS_o_ai
|
||||
vec_vsrh(vector short __a, vector unsigned short __b) {
|
||||
return __a >> (vector short)__b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned short __ATTRS_o_ai
|
||||
vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
|
||||
return __a >> __b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
/* vec_vsrw */
|
||||
@ -9572,12 +9574,12 @@ vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
|
||||
|
||||
static __inline__ vector int __ATTRS_o_ai vec_vsrw(vector int __a,
|
||||
vector unsigned int __b) {
|
||||
return __a >> (vector int)__b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ vector unsigned int __ATTRS_o_ai
|
||||
vec_vsrw(vector unsigned int __a, vector unsigned int __b) {
|
||||
return __a >> __b;
|
||||
return vec_sr(__a, __b);
|
||||
}
|
||||
|
||||
/* vec_sra */
|
||||
@ -16353,67 +16355,82 @@ vec_revb(vector unsigned __int128 __a) {
|
||||
|
||||
/* vec_xl */
|
||||
|
||||
typedef vector signed char unaligned_vec_schar __attribute__((aligned(1)));
|
||||
typedef vector unsigned char unaligned_vec_uchar __attribute__((aligned(1)));
|
||||
typedef vector signed short unaligned_vec_sshort __attribute__((aligned(1)));
|
||||
typedef vector unsigned short unaligned_vec_ushort __attribute__((aligned(1)));
|
||||
typedef vector signed int unaligned_vec_sint __attribute__((aligned(1)));
|
||||
typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1)));
|
||||
typedef vector float unaligned_vec_float __attribute__((aligned(1)));
|
||||
|
||||
static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset,
|
||||
signed char *__ptr) {
|
||||
return *(vector signed char *)(__ptr + __offset);
|
||||
return *(unaligned_vec_schar *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned char
|
||||
vec_xl(signed long long __offset, unsigned char *__ptr) {
|
||||
return *(vector unsigned char *)(__ptr + __offset);
|
||||
return *(unaligned_vec_uchar*)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset,
|
||||
signed short *__ptr) {
|
||||
return *(vector signed short *)(__ptr + __offset);
|
||||
return *(unaligned_vec_sshort *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned short
|
||||
vec_xl(signed long long __offset, unsigned short *__ptr) {
|
||||
return *(vector unsigned short *)(__ptr + __offset);
|
||||
return *(unaligned_vec_ushort *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset,
|
||||
signed int *__ptr) {
|
||||
return *(vector signed int *)(__ptr + __offset);
|
||||
return *(unaligned_vec_sint *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset,
|
||||
unsigned int *__ptr) {
|
||||
return *(vector unsigned int *)(__ptr + __offset);
|
||||
return *(unaligned_vec_uint *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset,
|
||||
float *__ptr) {
|
||||
return *(vector float *)(__ptr + __offset);
|
||||
return *(unaligned_vec_float *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
typedef vector signed long long unaligned_vec_sll __attribute__((aligned(1)));
|
||||
typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1)));
|
||||
typedef vector double unaligned_vec_double __attribute__((aligned(1)));
|
||||
|
||||
static inline __ATTRS_o_ai vector signed long long
|
||||
vec_xl(signed long long __offset, signed long long *__ptr) {
|
||||
return *(vector signed long long *)(__ptr + __offset);
|
||||
return *(unaligned_vec_sll *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned long long
|
||||
vec_xl(signed long long __offset, unsigned long long *__ptr) {
|
||||
return *(vector unsigned long long *)(__ptr + __offset);
|
||||
return *(unaligned_vec_ull *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset,
|
||||
double *__ptr) {
|
||||
return *(vector double *)(__ptr + __offset);
|
||||
return *(unaligned_vec_double *)(__ptr + __offset);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
|
||||
typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1)));
|
||||
typedef vector unsigned __int128 unaligned_vec_ui128
|
||||
__attribute__((aligned(1)));
|
||||
static inline __ATTRS_o_ai vector signed __int128
|
||||
vec_xl(signed long long __offset, signed __int128 *__ptr) {
|
||||
return *(vector signed __int128 *)(__ptr + __offset);
|
||||
return *(unaligned_vec_si128 *)(__ptr + __offset);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector unsigned __int128
|
||||
vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
|
||||
return *(vector unsigned __int128 *)(__ptr + __offset);
|
||||
return *(unaligned_vec_ui128 *)(__ptr + __offset);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -16498,62 +16515,62 @@ vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) {
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed char __vec,
|
||||
signed long long __offset,
|
||||
signed char *__ptr) {
|
||||
*(vector signed char *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_schar *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned char __vec,
|
||||
signed long long __offset,
|
||||
unsigned char *__ptr) {
|
||||
*(vector unsigned char *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_uchar *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed short __vec,
|
||||
signed long long __offset,
|
||||
signed short *__ptr) {
|
||||
*(vector signed short *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_sshort *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec,
|
||||
signed long long __offset,
|
||||
unsigned short *__ptr) {
|
||||
*(vector unsigned short *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_ushort *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed int __vec,
|
||||
signed long long __offset,
|
||||
signed int *__ptr) {
|
||||
*(vector signed int *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_sint *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec,
|
||||
signed long long __offset,
|
||||
unsigned int *__ptr) {
|
||||
*(vector unsigned int *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_uint *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector float __vec,
|
||||
signed long long __offset,
|
||||
float *__ptr) {
|
||||
*(vector float *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_float *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec,
|
||||
signed long long __offset,
|
||||
signed long long *__ptr) {
|
||||
*(vector signed long long *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_sll *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec,
|
||||
signed long long __offset,
|
||||
unsigned long long *__ptr) {
|
||||
*(vector unsigned long long *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_ull *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector double __vec,
|
||||
signed long long __offset,
|
||||
double *__ptr) {
|
||||
*(vector double *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_double *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -16561,13 +16578,13 @@ static inline __ATTRS_o_ai void vec_xst(vector double __vec,
|
||||
static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec,
|
||||
signed long long __offset,
|
||||
signed __int128 *__ptr) {
|
||||
*(vector signed __int128 *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_si128 *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec,
|
||||
signed long long __offset,
|
||||
unsigned __int128 *__ptr) {
|
||||
*(vector unsigned __int128 *)(__ptr + __offset) = __vec;
|
||||
*(unaligned_vec_ui128 *)(__ptr + __offset) = __vec;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
2
c_headers/arm_fp16.h
vendored
2
c_headers/arm_fp16.h
vendored
@ -27,7 +27,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
typedef __fp16 float16_t;
|
||||
#define __ai static inline __attribute__((__always_inline__, __nodebug__))
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
|
||||
2958
c_headers/arm_neon.h
vendored
2958
c_headers/arm_neon.h
vendored
File diff suppressed because it is too large
Load Diff
839
c_headers/avx512bwintrin.h
vendored
839
c_headers/avx512bwintrin.h
vendored
File diff suppressed because it is too large
Load Diff
302
c_headers/avx512dqintrin.h
vendored
302
c_headers/avx512dqintrin.h
vendored
@ -29,180 +29,309 @@
|
||||
#define __AVX512DQINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline __mmask8 __DEFAULT_FN_ATTRS
|
||||
_knot_mask8(__mmask8 __M)
|
||||
{
|
||||
return __builtin_ia32_knotqi(__M);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kand_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kandn_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kor_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kxnor_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kxor_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_kadd_mask8(__mmask8 __A, __mmask8 __B)
|
||||
{
|
||||
return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_kadd_mask16(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
#define _kshiftli_mask8(A, I) \
|
||||
(__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))
|
||||
|
||||
#define _kshiftri_mask8(A, I) \
|
||||
(__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_cvtmask8_u32(__mmask8 __A) {
|
||||
return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_cvtu32_mask8(unsigned int __A) {
|
||||
return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_load_mask8(__mmask8 *__A) {
|
||||
return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_store_mask8(__mmask8 *__A, __mmask8 __B) {
|
||||
*(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
|
||||
return (__m512i) ((__v8du) __A * (__v8du) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
|
||||
(__v8di)_mm512_mullo_epi64(__A, __B),
|
||||
(__v8di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
|
||||
(__v8di)_mm512_mullo_epi64(__A, __B),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_xor_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)((__v8du)__A ^ (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_xor_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_xor_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_xor_ps (__m512 __A, __m512 __B) {
|
||||
return (__m512)((__v16su)__A ^ (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_xor_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_xor_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_or_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)((__v8du)__A | (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_or_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_or_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_or_ps(__m512 __A, __m512 __B) {
|
||||
return (__m512)((__v16su)__A | (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_or_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_or_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_and_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)((__v8du)__A & (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_and_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_and_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_and_ps(__m512 __A, __m512 __B) {
|
||||
return (__m512)((__v16su)__A & (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_and_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_and_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_andnot_pd(__m512d __A, __m512d __B) {
|
||||
return (__m512d)(~(__v8du)__A & (__v8du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_andnot_pd(__A, __B),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_andnot_pd(__A, __B),
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_andnot_ps(__m512 __A, __m512 __B) {
|
||||
return (__m512)(~(__v16su)__A & (__v16su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_andnot_ps(__A, __B),
|
||||
(__v16sf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
|
||||
(__v16sf)_mm512_andnot_ps(__A, __B),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtpd_epi64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -210,7 +339,7 @@ _mm512_cvtpd_epi64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -218,7 +347,7 @@ _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -241,7 +370,7 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtpd_epu64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -249,7 +378,7 @@ _mm512_cvtpd_epu64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -257,7 +386,7 @@ _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -280,7 +409,7 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtps_epi64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -288,7 +417,7 @@ _mm512_cvtps_epi64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -296,7 +425,7 @@ _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -319,7 +448,7 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtps_epu64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -327,7 +456,7 @@ _mm512_cvtps_epu64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -335,7 +464,7 @@ _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -359,19 +488,19 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepi64_pd (__m512i __A) {
|
||||
return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepi64_pd(__A),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepi64_pd(__A),
|
||||
@ -393,7 +522,7 @@ _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepi64_ps (__m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -401,7 +530,7 @@ _mm512_cvtepi64_ps (__m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) __W,
|
||||
@ -409,7 +538,7 @@ _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -433,7 +562,7 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttpd_epi64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -441,7 +570,7 @@ _mm512_cvttpd_epi64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -449,7 +578,7 @@ _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -472,7 +601,7 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttpd_epu64 (__m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -480,7 +609,7 @@ _mm512_cvttpd_epu64 (__m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) __W,
|
||||
@ -488,7 +617,7 @@ _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -511,7 +640,7 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttps_epi64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -519,7 +648,7 @@ _mm512_cvttps_epi64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -527,7 +656,7 @@ _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -550,7 +679,7 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttps_epu64 (__m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -558,7 +687,7 @@ _mm512_cvttps_epu64 (__m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) __W,
|
||||
@ -566,7 +695,7 @@ _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
|
||||
(__v8di) _mm512_setzero_si512(),
|
||||
@ -589,19 +718,19 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepu64_pd (__m512i __A) {
|
||||
return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepu64_pd(__A),
|
||||
(__v8df)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
|
||||
(__v8df)_mm512_cvtepu64_pd(__A),
|
||||
@ -625,7 +754,7 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepu64_ps (__m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -633,7 +762,7 @@ _mm512_cvtepu64_ps (__m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) __W,
|
||||
@ -641,7 +770,7 @@ _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
|
||||
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
|
||||
(__v8sf) _mm256_setzero_ps(),
|
||||
@ -935,32 +1064,32 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(U), (int)(C), (int)(R))
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
_mm512_movepi32_mask (__m512i __A)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_movm_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_movm_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
|
||||
_mm512_movepi64_mask (__m512i __A)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_f32x2 (__m128 __A)
|
||||
{
|
||||
return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
|
||||
@ -968,7 +1097,7 @@ _mm512_broadcast_f32x2 (__m128 __A)
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -976,7 +1105,7 @@ _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
|
||||
(__v16sf)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -984,7 +1113,7 @@ _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_f32x8(__m256 __A)
|
||||
{
|
||||
return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
|
||||
@ -992,7 +1121,7 @@ _mm512_broadcast_f32x8(__m256 __A)
|
||||
0, 1, 2, 3, 4, 5, 6, 7);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -1000,7 +1129,7 @@ _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
|
||||
(__v16sf)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
|
||||
{
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
@ -1008,14 +1137,14 @@ _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_f64x2(__m128d __A)
|
||||
{
|
||||
return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
|
||||
{
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
|
||||
@ -1023,7 +1152,7 @@ _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
|
||||
(__v8df)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
|
||||
{
|
||||
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
|
||||
@ -1031,7 +1160,7 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
|
||||
(__v8df)_mm512_setzero_pd());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_i32x2 (__m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
|
||||
@ -1039,7 +1168,7 @@ _mm512_broadcast_i32x2 (__m128i __A)
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1047,7 +1176,7 @@ _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
|
||||
(__v16si)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1055,7 +1184,7 @@ _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_i32x8(__m256i __A)
|
||||
{
|
||||
return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
|
||||
@ -1063,7 +1192,7 @@ _mm512_broadcast_i32x8(__m256i __A)
|
||||
0, 1, 2, 3, 4, 5, 6, 7);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1071,7 +1200,7 @@ _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
|
||||
(__v16si)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
@ -1079,14 +1208,14 @@ _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_broadcast_i64x2(__m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
@ -1094,7 +1223,7 @@ _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
|
||||
(__v8di)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
@ -1256,6 +1385,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
|
||||
(__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
|
||||
(__mmask8)(U))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS512
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
||||
177
c_headers/avx512fintrin.h
vendored
177
c_headers/avx512fintrin.h
vendored
@ -175,6 +175,7 @@ typedef enum
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
|
||||
|
||||
/* Create vectors with repeated elements */
|
||||
|
||||
@ -508,13 +509,13 @@ _mm512_castsi512_si256 (__m512i __A)
|
||||
return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_int2mask(int __a)
|
||||
{
|
||||
return (__mmask16)__a;
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS512
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm512_mask2int(__mmask16 __a)
|
||||
{
|
||||
return (int)__a;
|
||||
@ -4328,6 +4329,15 @@ _mm512_loadu_si512 (void const *__P)
|
||||
return ((struct __loadu_si512*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_loadu_epi32 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi32 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi32*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
|
||||
{
|
||||
@ -4346,6 +4356,15 @@ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_loadu_epi64 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi64 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi64*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -4481,6 +4500,15 @@ _mm512_load_epi64 (void const *__P)
|
||||
|
||||
/* SIMD store ops */
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_storeu_epi64 (void *__P, __m512i __A)
|
||||
{
|
||||
struct __storeu_epi64 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi64*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
@ -4497,6 +4525,15 @@ _mm512_storeu_si512 (void *__P, __m512i __A)
|
||||
((struct __storeu_si512*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_storeu_epi32 (void *__P, __m512i __A)
|
||||
{
|
||||
struct __storeu_epi32 {
|
||||
__m512i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi32*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
@ -4580,7 +4617,7 @@ _mm512_store_epi64 (void *__P, __m512i __A)
|
||||
|
||||
/* Mask ops */
|
||||
|
||||
static __inline __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_knot(__mmask16 __M)
|
||||
{
|
||||
return __builtin_ia32_knothi(__M);
|
||||
@ -5622,7 +5659,7 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(U), (int)(R))
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kmov (__mmask16 __A)
|
||||
{
|
||||
return __A;
|
||||
@ -7593,177 +7630,177 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
|
||||
|
||||
#define _mm512_i64gather_ps(index, addr, scale) \
|
||||
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64gather_epi32(index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)-1, (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64gather_pd(index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64gather_epi64(index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_ps(index, addr, scale) \
|
||||
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16sf)(__m512)(index), \
|
||||
(__mmask16)-1, (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16sf)(__m512)(index), \
|
||||
(__mmask16)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_epi32(index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__mmask16)-1, (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__mmask16)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_pd(index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i32gather_epi64(index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), (__mmask8)-1, \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8sf)(__m256)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8sf)(__m256)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8di)(__m512i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
|
||||
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16sf)(__m512)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
|
||||
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16sf)(__m512)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
|
||||
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16si)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
|
||||
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), \
|
||||
(__v16si)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8df)(__m512d)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8di)(__m512i)(v1), (int)(scale))
|
||||
|
||||
@ -8320,54 +8357,105 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
|
||||
|
||||
#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kand (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kandn (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kor (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS512
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm512_kortestc (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS512
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm512_kortestz (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
|
||||
*__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
|
||||
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kxnor (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kxor (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
|
||||
}
|
||||
|
||||
#define _kand_mask16 _mm512_kand
|
||||
#define _kandn_mask16 _mm512_kandn
|
||||
#define _knot_mask16 _mm512_knot
|
||||
#define _kor_mask16 _mm512_kor
|
||||
#define _kxnor_mask16 _mm512_kxnor
|
||||
#define _kxor_mask16 _mm512_kxor
|
||||
|
||||
#define _kshiftli_mask16(A, I) \
|
||||
(__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
|
||||
|
||||
#define _kshiftri_mask16(A, I) \
|
||||
(__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_cvtmask16_u32(__mmask16 __A) {
|
||||
return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_cvtu32_mask16(unsigned int __A) {
|
||||
return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_load_mask16(__mmask16 *__A) {
|
||||
return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_store_mask16(__mmask16 *__A, __mmask16 __B) {
|
||||
*(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS512
|
||||
_mm512_stream_si512 (__m512i * __P, __m512i __A)
|
||||
{
|
||||
@ -9594,5 +9682,6 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS512
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __AVX512FINTRIN_H */
|
||||
|
||||
32
c_headers/avx512pfintrin.h
vendored
32
c_headers/avx512pfintrin.h
vendored
@ -33,78 +33,78 @@
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (int const *)(addr), \
|
||||
(__v16si)(__m512i)(index), (void const *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16) -1, \
|
||||
(__v16si)(__m512i)(index), (int const *)(addr), \
|
||||
(__v16si)(__m512i)(index), (void const *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(int const *)(addr), (int)(scale), (int)(hint))
|
||||
(void const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(int const *)(addr), (int)(scale), (int)(hint))
|
||||
(void const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (int *)(addr), \
|
||||
(__v16si)(__m512i)(index), (void *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
|
||||
174
c_headers/avx512vbmi2intrin.h
vendored
174
c_headers/avx512vbmi2intrin.h
vendored
@ -227,167 +227,141 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
|
||||
(__v32hi)_mm512_setzero_si512())
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
|
||||
(__v8di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
|
||||
(__v16si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
|
||||
(__v32hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
|
||||
(__v32hi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
|
||||
(__v8di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__U,
|
||||
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i) __builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
|
||||
(__v16si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
return (__m512i) __builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
|
||||
(__v32hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectw_512(__U,
|
||||
(__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
|
||||
(__v32hi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
|
||||
|
||||
26
c_headers/avx512vbmiintrin.h
vendored
26
c_headers/avx512vbmiintrin.h
vendored
@ -91,30 +91,26 @@ _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512((__v64qi)__X, (__v64qi) __Y);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) _mm512_setzero_si512 (),
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
|
||||
(__v64qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
|
||||
_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) _mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
|
||||
(__v64qi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
|
||||
|
||||
56
c_headers/avx512vbmivlintrin.h
vendored
56
c_headers/avx512vbmivlintrin.h
vendored
@ -150,61 +150,49 @@ _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||
_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
|
||||
__m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
|
||||
(__v16qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
|
||||
(__v16qi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||
_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
|
||||
(__v32qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
|
||||
(__v32qi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
|
||||
|
||||
75
c_headers/avx512vlbwintrin.h
vendored
75
c_headers/avx512vlbwintrin.h
vendored
@ -2297,6 +2297,15 @@ _mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
|
||||
(__v32qi) _mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi16 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi16 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi16*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -2314,6 +2323,15 @@ _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi16 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi16 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi16*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
|
||||
{
|
||||
@ -2331,6 +2349,15 @@ _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi8 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi8 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi8*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
|
||||
{
|
||||
@ -2348,6 +2375,15 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi8 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi8 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi8*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
|
||||
{
|
||||
@ -2364,7 +2400,17 @@ _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi16 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi16 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi16*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
__builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
|
||||
@ -2372,6 +2418,15 @@ _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi16 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi16 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi16*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
@ -2380,6 +2435,15 @@ _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi8 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi8 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi8*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
@ -2388,6 +2452,15 @@ _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi8 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi8 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi8*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
|
||||
{
|
||||
|
||||
349
c_headers/avx512vlintrin.h
vendored
349
c_headers/avx512vlintrin.h
vendored
@ -461,11 +461,17 @@ _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_and_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8su)__a & (__v8su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_and_si256(__A, __B),
|
||||
(__v8si)_mm256_and_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -475,11 +481,17 @@ _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_and_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v4su)__a & (__v4su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_and_si128(__A, __B),
|
||||
(__v4si)_mm_and_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
@ -489,11 +501,17 @@ _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_andnot_epi32(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)(~(__v8su)__A & (__v8su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_andnot_si256(__A, __B),
|
||||
(__v8si)_mm256_andnot_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -504,25 +522,37 @@ _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_andnot_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)(~(__v4su)__A & (__v4su)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_andnot_si128(__A, __B),
|
||||
(__v4si)_mm_andnot_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_or_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8su)__a | (__v8su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_or_si256(__A, __B),
|
||||
(__v8si)_mm256_or_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -532,11 +562,17 @@ _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_or_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v4su)__a | (__v4su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_or_si128(__A, __B),
|
||||
(__v4si)_mm_or_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
@ -546,11 +582,17 @@ _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_xor_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8su)__a ^ (__v8su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_xor_si256(__A, __B),
|
||||
(__v8si)_mm256_xor_epi32(__A, __B),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
@ -561,11 +603,16 @@ _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
|
||||
__m128i __B)
|
||||
_mm_xor_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v4su)__a ^ (__v4su)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_xor_si128(__A, __B),
|
||||
(__v4si)_mm_xor_epi32(__A, __B),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
@ -575,11 +622,17 @@ _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_and_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v4du)__a & (__v4du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_and_si256(__A, __B),
|
||||
(__v4di)_mm256_and_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -589,11 +642,17 @@ _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_and_epi64(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v2du)__a & (__v2du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_and_si128(__A, __B),
|
||||
(__v2di)_mm_and_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -603,11 +662,17 @@ _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_andnot_epi64(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)(~(__v4du)__A & (__v4du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_andnot_si256(__A, __B),
|
||||
(__v4di)_mm256_andnot_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -618,11 +683,17 @@ _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_andnot_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)(~(__v2du)__A & (__v2du)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_andnot_si128(__A, __B),
|
||||
(__v2di)_mm_andnot_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -632,11 +703,17 @@ _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_or_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v4du)__a | (__v4du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_or_si256(__A, __B),
|
||||
(__v4di)_mm256_or_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -646,11 +723,17 @@ _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_or_epi64(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v2du)__a | (__v2du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_or_si128(__A, __B),
|
||||
(__v2di)_mm_or_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -660,11 +743,17 @@ _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
|
||||
return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_xor_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v4du)__a ^ (__v4du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_xor_si256(__A, __B),
|
||||
(__v4di)_mm256_xor_epi64(__A, __B),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
@ -674,12 +763,18 @@ _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
|
||||
return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_xor_epi64(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)((__v2du)__a ^ (__v2du)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_xor_si128(__A, __B),
|
||||
(__v2di)_mm_xor_epi64(__A, __B),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
@ -3389,162 +3484,162 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
|
||||
}
|
||||
|
||||
#define _mm_i64scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i64scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i64scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_i64scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i32scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2df)(__m128d)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i32scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v2di)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_pd(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4df)(__m256d)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4di)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_i32scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm_i32scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__v4si)(__m128i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_ps(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
|
||||
(int)(scale))
|
||||
|
||||
#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
|
||||
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
|
||||
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
|
||||
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__v8si)(__m256i)(v1), (int)(scale))
|
||||
|
||||
@ -4989,6 +5084,12 @@ _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
|
||||
(__v8si) _mm256_setzero_si256 ());
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_load_epi32 (void const *__P)
|
||||
{
|
||||
return *(__m128i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5008,6 +5109,12 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_load_epi32 (void const *__P)
|
||||
{
|
||||
return *(__m256i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5027,6 +5134,12 @@ _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_store_epi32 (void *__P, __m128i __A)
|
||||
{
|
||||
*(__m128i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5035,6 +5148,12 @@ _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_store_epi32 (void *__P, __m256i __A)
|
||||
{
|
||||
*(__m256i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -5075,6 +5194,12 @@ _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
|
||||
(__v4di) _mm256_setzero_si256 ());
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_load_epi64 (void const *__P)
|
||||
{
|
||||
return *(__m128i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5094,6 +5219,12 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_load_epi64 (void const *__P)
|
||||
{
|
||||
return *(__m256i *) __P;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5113,6 +5244,12 @@ _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_store_epi64 (void *__P, __m128i __A)
|
||||
{
|
||||
*(__m128i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5121,6 +5258,12 @@ _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_store_epi64 (void *__P, __m256i __A)
|
||||
{
|
||||
*(__m256i *) __P = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -5366,6 +5509,15 @@ _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi64 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi64 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi64*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5383,6 +5535,15 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi64 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi64 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi64*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5400,6 +5561,15 @@ _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_loadu_epi32 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi32 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi32*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5417,6 +5587,15 @@ _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_loadu_epi32 (void const *__P)
|
||||
{
|
||||
struct __loadu_epi32 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return ((struct __loadu_epi32*)__P)->__v;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
|
||||
{
|
||||
@ -5534,6 +5713,15 @@ _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi64 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi64 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi64*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5542,6 +5730,15 @@ _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi64 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi64 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi64*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -5550,6 +5747,15 @@ _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS128
|
||||
_mm_storeu_epi32 (void *__P, __m128i __A)
|
||||
{
|
||||
struct __storeu_epi32 {
|
||||
__m128i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi32*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
@ -5558,6 +5764,15 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __DEFAULT_FN_ATTRS256
|
||||
_mm256_storeu_epi32 (void *__P, __m256i __A)
|
||||
{
|
||||
struct __storeu_epi32 {
|
||||
__m256i __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_epi32*)__P)->__v = __A;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
@ -7769,97 +7984,97 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
|
||||
|
||||
#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v2di)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4di)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
|
||||
(__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
|
||||
(double const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
|
||||
(long long const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v4si)(__m128i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
|
||||
(__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
|
||||
(float const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
|
||||
(__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
|
||||
(int const *)(addr), \
|
||||
(void const *)(addr), \
|
||||
(__v8si)(__m256i)(index), \
|
||||
(__mmask8)(mask), (int)(scale))
|
||||
|
||||
|
||||
312
c_headers/avx512vlvbmi2intrin.h
vendored
312
c_headers/avx512vlvbmi2intrin.h
vendored
@ -421,327 +421,279 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__v8hi)_mm_setzero_si128())
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
|
||||
(__v4di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shldv_epi64(__A, __B, __C),
|
||||
(__v4di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shldv_epi64(__A, __B, __C),
|
||||
(__v4di)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shldv_epi64(__A, __B, __C),
|
||||
(__v2di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shldv_epi64(__A, __B, __C),
|
||||
(__v2di)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shldv_epi32(__A, __B, __C),
|
||||
(__v8si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shldv_epi32(__A, __B, __C),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shldv_epi32(__A, __B, __C),
|
||||
(__v4si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shldv_epi32(__A, __B, __C),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
|
||||
(__v16hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shldv_epi16(__A, __B, __C),
|
||||
(__v16hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shldv_epi16(__A, __B, __C),
|
||||
(__v16hi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shldv_epi16(__A, __B, __C),
|
||||
(__v8hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shldv_epi16(__A, __B, __C),
|
||||
(__v8hi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
|
||||
(__v4di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shrdv_epi64(__A, __B, __C),
|
||||
(__v4di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__U,
|
||||
(__v4di)_mm256_shrdv_epi64(__A, __B, __C),
|
||||
(__v4di)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shrdv_epi64(__A, __B, __C),
|
||||
(__v2di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__U,
|
||||
(__v2di)_mm_shrdv_epi64(__A, __B, __C),
|
||||
(__v2di)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shrdv_epi32(__A, __B, __C),
|
||||
(__v8si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_shrdv_epi32(__A, __B, __C),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shrdv_epi32(__A, __B, __C),
|
||||
(__v4si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_shrdv_epi32(__A, __B, __C),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
|
||||
(__v16hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
|
||||
(__v16hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
return (__m256i)__builtin_ia32_selectw_256(__U,
|
||||
(__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
|
||||
(__v16hi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shrdv_epi16(__A, __B, __C),
|
||||
(__v8hi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_selectw_128(__U,
|
||||
(__v8hi)_mm_shrdv_epi16(__A, __B, __C),
|
||||
(__v8hi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
|
||||
|
||||
10
c_headers/bmiintrin.h
vendored
10
c_headers/bmiintrin.h
vendored
@ -62,7 +62,7 @@
|
||||
static __inline__ unsigned short __RELAXED_FN_ATTRS
|
||||
__tzcnt_u16(unsigned short __X)
|
||||
{
|
||||
return __X ? __builtin_ctzs(__X) : 16;
|
||||
return __builtin_ia32_tzcnt_u16(__X);
|
||||
}
|
||||
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
@ -196,7 +196,7 @@ __blsr_u32(unsigned int __X)
|
||||
static __inline__ unsigned int __RELAXED_FN_ATTRS
|
||||
__tzcnt_u32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_ctz(__X) : 32;
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
}
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
@ -212,7 +212,7 @@ __tzcnt_u32(unsigned int __X)
|
||||
static __inline__ int __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_ctz(__X) : 32;
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@ -359,7 +359,7 @@ __blsr_u64(unsigned long long __X)
|
||||
static __inline__ unsigned long long __RELAXED_FN_ATTRS
|
||||
__tzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_ctzll(__X) : 64;
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
}
|
||||
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
@ -375,7 +375,7 @@ __tzcnt_u64(unsigned long long __X)
|
||||
static __inline__ long long __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_ctzll(__X) : 64;
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
@ -73,10 +73,12 @@ __device__ inline void operator delete[](void *ptr,
|
||||
|
||||
// Sized delete, C++14 only.
|
||||
#if __cplusplus >= 201402L
|
||||
__device__ void operator delete(void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
__device__ inline void operator delete(void *ptr,
|
||||
__SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
__device__ void operator delete[](void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
__device__ inline void operator delete[](void *ptr,
|
||||
__SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
107
c_headers/emmintrin.h
vendored
107
c_headers/emmintrin.h
vendored
@ -1675,7 +1675,49 @@ _mm_loadu_si64(void const *__a)
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
long long __u = ((struct __loadu_si64*)__a)->__v;
|
||||
return __extension__ (__m128i)(__v2di){__u, 0L};
|
||||
return __extension__ (__m128i)(__v2di){__u, 0LL};
|
||||
}
|
||||
|
||||
/// Loads a 32-bit integer value to the low element of a 128-bit integer
|
||||
/// vector and clears the upper element.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A pointer to a 32-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the loaded value.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_loadu_si32(void const *__a)
|
||||
{
|
||||
struct __loadu_si32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
int __u = ((struct __loadu_si32*)__a)->__v;
|
||||
return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
|
||||
}
|
||||
|
||||
/// Loads a 16-bit integer value to the low element of a 128-bit integer
|
||||
/// vector and clears the upper element.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic does not correspond to a specific instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A pointer to a 16-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \returns A 128-bit vector of [8 x i16] containing the loaded value.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_loadu_si16(void const *__a)
|
||||
{
|
||||
struct __loadu_si16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
short __u = ((struct __loadu_si16*)__a)->__v;
|
||||
return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
|
||||
}
|
||||
|
||||
/// Loads a 64-bit double-precision value to the low element of a
|
||||
@ -3993,6 +4035,69 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
|
||||
((struct __storeu_si128*)__p)->__v = __b;
|
||||
}
|
||||
|
||||
/// Stores a 64-bit integer value from the low element of a 128-bit integer
|
||||
/// vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to a 64-bit memory location. The address of the memory
|
||||
/// location does not have to be algned.
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si64(void const *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si64 {
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
|
||||
}
|
||||
|
||||
/// Stores a 32-bit integer value from the low element of a 128-bit integer
|
||||
/// vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to a 32-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si32(void const *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
|
||||
}
|
||||
|
||||
/// Stores a 16-bit integer value from the low element of a 128-bit integer
|
||||
/// vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic does not correspond to a specific instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to a 16-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si16(void const *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
|
||||
}
|
||||
|
||||
/// Moves bytes selected by the mask from the first operand to the
|
||||
/// specified unaligned memory location. When a mask bit is 1, the
|
||||
/// corresponding byte is written, otherwise it is not written.
|
||||
|
||||
12
c_headers/float.h
vendored
12
c_headers/float.h
vendored
@ -21,8 +21,8 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __FLOAT_H
|
||||
#define __FLOAT_H
|
||||
#ifndef __CLANG_FLOAT_H
|
||||
#define __CLANG_FLOAT_H
|
||||
|
||||
/* If we're on MinGW, fall back to the system's float.h, which might have
|
||||
* additional definitions provided for Windows.
|
||||
@ -85,6 +85,9 @@
|
||||
# undef FLT_DECIMAL_DIG
|
||||
# undef DBL_DECIMAL_DIG
|
||||
# undef LDBL_DECIMAL_DIG
|
||||
# undef FLT_HAS_SUBNORM
|
||||
# undef DBL_HAS_SUBNORM
|
||||
# undef LDBL_HAS_SUBNORM
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -141,6 +144,9 @@
|
||||
# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
|
||||
# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
|
||||
# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
|
||||
# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__
|
||||
# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__
|
||||
# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
|
||||
#endif
|
||||
|
||||
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
@ -157,4 +163,4 @@
|
||||
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
|
||||
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
|
||||
|
||||
#endif /* __FLOAT_H */
|
||||
#endif /* __CLANG_FLOAT_H */
|
||||
|
||||
59
c_headers/immintrin.h
vendored
59
c_headers/immintrin.h
vendored
@ -306,6 +306,65 @@ _writegsbase_u64(unsigned long long __V)
|
||||
#endif
|
||||
#endif /* __FSGSBASE__ */
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
|
||||
|
||||
/* The structs used below are to force the load/store to be unaligned. This
|
||||
* is accomplished with the __packed__ attribute. The __may_alias__ prevents
|
||||
* tbaa metadata from being generated based on the struct and the type of the
|
||||
* field inside of it.
|
||||
*/
|
||||
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i16(void const * __P) {
|
||||
struct __loadu_i16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i16(void * __P, short __D) {
|
||||
struct __storeu_i16 {
|
||||
short __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i32(void const * __P) {
|
||||
struct __loadu_i32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i32(void * __P, int __D) {
|
||||
struct __storeu_i32 {
|
||||
int __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_loadbe_i64(void const * __P) {
|
||||
struct __loadu_i64 {
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
|
||||
_storebe_i64(void * __P, long long __D) {
|
||||
struct __storeu_i64 {
|
||||
long long __v;
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
|
||||
}
|
||||
#endif
|
||||
#endif /* __MOVBE */
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
|
||||
#include <rtmintrin.h>
|
||||
#include <xtestintrin.h>
|
||||
|
||||
538
c_headers/intrin.h
vendored
538
c_headers/intrin.h
vendored
@ -90,8 +90,6 @@ void __inwordstring(unsigned short, unsigned short *, unsigned long);
|
||||
void __lidt(void *);
|
||||
unsigned __int64 __ll_lshift(unsigned __int64, int);
|
||||
__int64 __ll_rshift(__int64, int);
|
||||
unsigned int __lzcnt(unsigned int);
|
||||
unsigned short __lzcnt16(unsigned short);
|
||||
static __inline__
|
||||
void __movsb(unsigned char *, unsigned char const *, size_t);
|
||||
static __inline__
|
||||
@ -219,7 +217,6 @@ void __incgsbyte(unsigned long);
|
||||
void __incgsdword(unsigned long);
|
||||
void __incgsqword(unsigned long);
|
||||
void __incgsword(unsigned long);
|
||||
unsigned __int64 __lzcnt64(unsigned __int64);
|
||||
static __inline__
|
||||
void __movsq(unsigned long long *, unsigned long long const *, size_t);
|
||||
static __inline__
|
||||
@ -329,189 +326,63 @@ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
|
||||
|* Interlocked Exchange Add
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value) {
|
||||
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);
|
||||
short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);
|
||||
long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);
|
||||
__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Increment
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement16_acq(short volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement16_nf(short volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement16_rel(short volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement_acq(long volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement_nf(long volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement_rel(long volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement64_acq(__int64 volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement64_nf(__int64 volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedIncrement64_rel(__int64 volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
short _InterlockedIncrement16_acq(short volatile *_Value);
|
||||
short _InterlockedIncrement16_nf(short volatile *_Value);
|
||||
short _InterlockedIncrement16_rel(short volatile *_Value);
|
||||
long _InterlockedIncrement_acq(long volatile *_Value);
|
||||
long _InterlockedIncrement_nf(long volatile *_Value);
|
||||
long _InterlockedIncrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Decrement
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement16_acq(short volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement16_nf(short volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement16_rel(short volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement_acq(long volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement_nf(long volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement_rel(long volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement64_acq(__int64 volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement64_nf(__int64 volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedDecrement64_rel(__int64 volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
short _InterlockedDecrement16_acq(short volatile *_Value);
|
||||
short _InterlockedDecrement16_nf(short volatile *_Value);
|
||||
short _InterlockedDecrement16_rel(short volatile *_Value);
|
||||
long _InterlockedDecrement_acq(long volatile *_Value);
|
||||
long _InterlockedDecrement_nf(long volatile *_Value);
|
||||
long _InterlockedDecrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked And
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd8_acq(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd8_nf(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd8_rel(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd16_acq(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd16_nf(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd16_rel(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd_acq(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd_nf(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd_rel(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedAnd_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
@ -534,261 +405,81 @@ unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
|
||||
|* Interlocked Or
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr8_acq(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr8_nf(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr8_rel(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr16_acq(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr16_nf(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr16_rel(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr_acq(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr_nf(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr_rel(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedOr8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedOr16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedOr_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Xor
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor8_acq(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor8_nf(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor8_rel(char volatile *_Value, char _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor16_acq(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor16_nf(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor16_rel(short volatile *_Value, short _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor_acq(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor_nf(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor_rel(long volatile *_Value, long _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
|
||||
}
|
||||
char _InterlockedXor8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedXor16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedXor_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange8_acq(char volatile *_Target, char _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange8_nf(char volatile *_Target, char _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange8_rel(char volatile *_Target, char _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange16_acq(short volatile *_Target, short _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange16_nf(short volatile *_Target, short _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange16_rel(short volatile *_Target, short _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_acq(long volatile *_Target, long _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_nf(long volatile *_Target, long _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_rel(long volatile *_Target, long _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
|
||||
return _Value;
|
||||
}
|
||||
char _InterlockedExchange8_acq(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_nf(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_rel(char volatile *_Target, char _Value);
|
||||
short _InterlockedExchange16_acq(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_nf(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_rel(short volatile *_Target, short _Value);
|
||||
long _InterlockedExchange_acq(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_nf(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_rel(long volatile *_Target, long _Value);
|
||||
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Compare Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange8_acq(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange8_nf(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ char __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange8_rel(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange16_acq(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange16_nf(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange16_rel(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_acq(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_nf(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
|
||||
return _Comparand;
|
||||
}
|
||||
char _InterlockedCompareExchange8_acq(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_nf(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_rel(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
short _InterlockedCompareExchange16_acq(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_nf(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_rel(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
long _InterlockedCompareExchange_acq(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_nf(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
@ -841,7 +532,7 @@ __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__cpuid(int __info[4], int __level) {
|
||||
__asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
|
||||
: "a"(__level));
|
||||
: "a"(__level), "c"(0));
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__cpuidex(int __info[4], int __level, int __ecx) {
|
||||
@ -858,12 +549,35 @@ static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__halt(void) {
|
||||
__asm__ volatile ("hlt");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__nop(void) {
|
||||
__asm__ volatile ("nop");
|
||||
}
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* MS AArch64 specific
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__aarch64__)
|
||||
unsigned __int64 __getReg(int);
|
||||
long _InterlockedAdd(long volatile *Addend, long Value);
|
||||
__int64 _ReadStatusReg(int);
|
||||
void _WriteStatusReg(int, __int64);
|
||||
|
||||
static inline unsigned short _byteswap_ushort (unsigned short val) {
|
||||
return __builtin_bswap16(val);
|
||||
}
|
||||
static inline unsigned long _byteswap_ulong (unsigned long val) {
|
||||
return __builtin_bswap32(val);
|
||||
}
|
||||
static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) {
|
||||
return __builtin_bswap64(val);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Privileged intrinsics
|
||||
\*----------------------------------------------------------------------------*/
|
||||
|
||||
22
c_headers/lzcntintrin.h
vendored
22
c_headers/lzcntintrin.h
vendored
@ -31,6 +31,7 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
|
||||
|
||||
#ifndef _MSC_VER
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -41,11 +42,8 @@
|
||||
/// An unsigned 16-bit integer whose leading zeros are to be counted.
|
||||
/// \returns An unsigned 16-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
static __inline__ unsigned short __DEFAULT_FN_ATTRS
|
||||
__lzcnt16(unsigned short __X)
|
||||
{
|
||||
return __X ? __builtin_clzs(__X) : 16;
|
||||
}
|
||||
#define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X))
|
||||
#endif // _MSC_VER
|
||||
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
@ -61,7 +59,7 @@ __lzcnt16(unsigned short __X)
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__lzcnt32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_clz(__X) : 32;
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
@ -78,10 +76,11 @@ __lzcnt32(unsigned int __X)
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_lzcnt_u32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_clz(__X) : 32;
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#ifndef _MSC_VER
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -93,11 +92,8 @@ _lzcnt_u32(unsigned int __X)
|
||||
/// \returns An unsigned 64-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
/// \see _lzcnt_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__lzcnt64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_clzll(__X) : 64;
|
||||
}
|
||||
#define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X))
|
||||
#endif // _MSC_VER
|
||||
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
@ -113,7 +109,7 @@ __lzcnt64(unsigned long long __X)
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
_lzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_clzll(__X) : 64;
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
657
c_headers/opencl-c.h
vendored
657
c_headers/opencl-c.h
vendored
@ -22,6 +22,14 @@
|
||||
#endif //cl_khr_3d_image_writes
|
||||
#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#ifndef cl_intel_planar_yuv
|
||||
#define cl_intel_planar_yuv
|
||||
#endif // cl_intel_planar_yuv
|
||||
#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
|
||||
#pragma OPENCL EXTENSION cl_intel_planar_yuv : end
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
#define __ovld __attribute__((overloadable))
|
||||
#define __conv __attribute__((convergent))
|
||||
|
||||
@ -14602,6 +14610,7 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
|
||||
@ -14609,6 +14618,7 @@ int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_
|
||||
int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
@ -14618,6 +14628,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, f
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
|
||||
@ -14625,6 +14636,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_
|
||||
int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
#ifdef cl_khr_depth_images
|
||||
float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);
|
||||
@ -14727,6 +14739,8 @@ uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler,
|
||||
#endif //cl_khr_mipmap_image
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
/**
|
||||
* Sampler-less Image Access
|
||||
*/
|
||||
@ -14760,24 +14774,31 @@ float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);
|
||||
int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);
|
||||
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
|
||||
// Image read functions returning half4 type
|
||||
#ifdef cl_khr_fp16
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);
|
||||
/**
|
||||
* Sampler-less Image Access
|
||||
*/
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#endif //cl_khr_fp16
|
||||
|
||||
// Image read functions for read_write images
|
||||
@ -15707,7 +15728,6 @@ double __ovld __conv work_group_scan_inclusive_max(double x);
|
||||
|
||||
// OpenCL v2.0 s6.13.16 - Pipe Functions
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#define PIPE_RESERVE_ID_VALID_BIT (1U << 30)
|
||||
#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
|
||||
bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
@ -16193,6 +16213,637 @@ void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, u
|
||||
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
|
||||
#endif // cl_intel_subgroups_short
|
||||
|
||||
#ifdef cl_intel_device_side_avc_motion_estimation
|
||||
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
|
||||
|
||||
#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0
|
||||
#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1
|
||||
#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2
|
||||
#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0
|
||||
#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1
|
||||
#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2
|
||||
#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0
|
||||
#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
|
||||
#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
|
||||
#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
|
||||
#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
|
||||
#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
|
||||
#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
|
||||
#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
|
||||
#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
|
||||
#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
|
||||
|
||||
#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
|
||||
#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
|
||||
#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
|
||||
#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
|
||||
|
||||
#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
|
||||
#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
|
||||
|
||||
#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
|
||||
#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
|
||||
#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
|
||||
#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
|
||||
|
||||
#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0
|
||||
#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1
|
||||
#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2
|
||||
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
|
||||
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)
|
||||
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)
|
||||
|
||||
#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
|
||||
#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
|
||||
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
|
||||
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
|
||||
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
|
||||
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1
|
||||
#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2
|
||||
#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3
|
||||
|
||||
#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
|
||||
#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
|
||||
|
||||
#define CLK_AVC_ME_INITIALIZE_INTEL 0x0
|
||||
|
||||
#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0
|
||||
|
||||
#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0
|
||||
|
||||
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
|
||||
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
|
||||
|
||||
// MCE built-in functions
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
uint2 __ovld
|
||||
intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table(
|
||||
uchar slice_type, uchar qp);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty(
|
||||
uchar slice_type, uchar qp);
|
||||
|
||||
uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table();
|
||||
uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table();
|
||||
uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table();
|
||||
uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty();
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty();
|
||||
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_inter_shape_penalty(
|
||||
ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_ac_only_haar(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
|
||||
ulong __ovld intel_sub_group_avc_mce_get_motion_vectors(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_mce_get_inter_distortions(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_directions(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities(
|
||||
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
|
||||
// IME built-in functions
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_initialize(
|
||||
ushort2 src_coord, uchar partition_mask, uchar sad_adjustment);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_single_reference(
|
||||
short2 ref_offset, uchar search_window_config,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_dual_reference(
|
||||
short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_max_motion_vector_count(
|
||||
uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_unidirectional_mix_disable(
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_early_search_termination_threshold(
|
||||
uchar threshold, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_weighted_sad(
|
||||
uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload);
|
||||
|
||||
__attribute__((deprecated("If you use the latest Intel driver, please use "
|
||||
"intel_sub_group_avc_ime_ref_window_size instead",
|
||||
"intel_sub_group_avc_ime_ref_window_size")))
|
||||
ushort2 __ovld
|
||||
intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref);
|
||||
ushort2 __ovld intel_sub_group_avc_ime_ref_window_size(
|
||||
uchar search_window_config, char dual_ref);
|
||||
short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(
|
||||
short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,
|
||||
ushort2 image_size);
|
||||
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference_streamout(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference_streamin(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld
|
||||
intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ime_payload_t payload,
|
||||
intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
|
||||
|
||||
intel_sub_group_avc_ime_single_reference_streamin_t __ovld
|
||||
intel_sub_group_avc_ime_get_single_reference_streamin(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result);
|
||||
intel_sub_group_avc_ime_dual_reference_streamin_t __ovld
|
||||
intel_sub_group_avc_ime_get_dual_reference_streamin(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_strip_single_reference_streamout(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_ime_strip_dual_reference_streamout(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result);
|
||||
|
||||
uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
|
||||
uchar major_shape);
|
||||
ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
|
||||
uchar major_shape);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
|
||||
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
|
||||
uchar major_shape);
|
||||
uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
|
||||
uchar major_shape, uchar direction);
|
||||
ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
|
||||
uchar major_shape, uchar direction);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
|
||||
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
|
||||
uchar major_shape, uchar direction);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_border_reached(
|
||||
uchar image_select, intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_ime_get_unidirectional_early_search_termination(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
|
||||
// REF built-in functions
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_fme_initialize(
|
||||
ushort2 src_coord, ulong motion_vectors, uchar major_shapes,
|
||||
uchar minor_shapes, uchar directions, uchar pixel_resolution,
|
||||
uchar sad_adjustment);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_bme_initialize(
|
||||
ushort2 src_coord, ulong motion_vectors, uchar major_shapes,
|
||||
uchar minor_shapes, uchar directions, uchar pixel_resolution,
|
||||
uchar bidirectional_weight, uchar sad_adjustment);
|
||||
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_bidirectional_mix_disable(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_bilinear_filter_enable(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_single_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_dual_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_ref_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
|
||||
// SIC built-in functions
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_initialize(
|
||||
ushort2 src_coord);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_configure_skc(
|
||||
uint skip_block_partition_type, uint skip_motion_vector_mask,
|
||||
ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_configure_ipe(
|
||||
uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,
|
||||
uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,
|
||||
uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,
|
||||
uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_configure_ipe(
|
||||
uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,
|
||||
uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,
|
||||
uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,
|
||||
ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel,
|
||||
ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
uint __ovld
|
||||
intel_sub_group_avc_sic_get_motion_vector_mask(
|
||||
uint skip_block_partition_type, uchar direction);
|
||||
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_intra_luma_shape_penalty(
|
||||
uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_intra_luma_mode_cost_function(
|
||||
uchar luma_mode_penalty, uint luma_packed_neighbor_modes,
|
||||
uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function(
|
||||
uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_skc_bilinear_filter_enable(
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_skc_forward_transform_enable(
|
||||
ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_block_based_raw_skip_sad(
|
||||
uchar block_based_skip_type,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_ipe(
|
||||
read_only image2d_t src_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_single_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t ref_image,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_dual_reference(
|
||||
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
|
||||
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_sic_evaluate_with_multi_reference(
|
||||
read_only image2d_t src_image, uint packed_reference_ids,
|
||||
uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
|
||||
// Wrappers
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty(
|
||||
uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_inter_shape_penalty(
|
||||
ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_inter_shape_penalty(
|
||||
ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_inter_shape_penalty(
|
||||
ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_inter_direction_penalty(
|
||||
uchar direction_cost, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_motion_vector_cost_function(
|
||||
ulong packed_cost_center_delta, uint2 packed_cost_table,
|
||||
uchar cost_precision, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_source_interlaced_field_polarity(
|
||||
uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity(
|
||||
uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities(
|
||||
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_ime_set_ac_only_haar(
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_ref_set_ac_only_haar(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_sic_set_ac_only_haar(
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
|
||||
ulong __ovld intel_sub_group_avc_ime_get_motion_vectors(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ulong __ovld intel_sub_group_avc_ref_get_motion_vectors(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
ushort __ovld intel_sub_group_avc_ime_get_inter_distortions(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_ref_get_inter_distortions(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_sic_get_inter_distortions(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
|
||||
ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_directions(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_directions(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities(
|
||||
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
uchar __ovld
|
||||
intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities(
|
||||
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
|
||||
// Type conversion functions
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_ime_convert_to_mce_payload(
|
||||
intel_sub_group_avc_ime_payload_t payload);
|
||||
intel_sub_group_avc_ime_payload_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ime_payload(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_ref_convert_to_mce_payload(
|
||||
intel_sub_group_avc_ref_payload_t payload);
|
||||
intel_sub_group_avc_ref_payload_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ref_payload(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
intel_sub_group_avc_mce_payload_t __ovld
|
||||
intel_sub_group_avc_sic_convert_to_mce_payload(
|
||||
intel_sub_group_avc_sic_payload_t payload);
|
||||
intel_sub_group_avc_sic_payload_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_sic_payload(
|
||||
intel_sub_group_avc_mce_payload_t payload);
|
||||
|
||||
intel_sub_group_avc_mce_result_t __ovld
|
||||
intel_sub_group_avc_ime_convert_to_mce_result(
|
||||
intel_sub_group_avc_ime_result_t result);
|
||||
intel_sub_group_avc_ime_result_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ime_result(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
intel_sub_group_avc_mce_result_t __ovld
|
||||
intel_sub_group_avc_ref_convert_to_mce_result(
|
||||
intel_sub_group_avc_ref_result_t result);
|
||||
intel_sub_group_avc_ref_result_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_ref_result(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
intel_sub_group_avc_mce_result_t __ovld
|
||||
intel_sub_group_avc_sic_convert_to_mce_result(
|
||||
intel_sub_group_avc_sic_result_t result);
|
||||
intel_sub_group_avc_sic_result_t __ovld
|
||||
intel_sub_group_avc_mce_convert_to_sic_result(
|
||||
intel_sub_group_avc_mce_result_t result);
|
||||
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end
|
||||
#endif // cl_intel_device_side_avc_motion_estimation
|
||||
|
||||
#ifdef cl_amd_media_ops
|
||||
uint __ovld amd_bitalign(uint a, uint b, uint c);
|
||||
uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);
|
||||
|
||||
6
c_headers/vecintrin.h
vendored
6
c_headers/vecintrin.h
vendored
@ -381,7 +381,7 @@ vec_insert_and_zero(const unsigned long long *__ptr) {
|
||||
static inline __ATTRS_o_ai vector float
|
||||
vec_insert_and_zero(const float *__ptr) {
|
||||
vector float __vec = (vector float)0;
|
||||
__vec[0] = *__ptr;
|
||||
__vec[1] = *__ptr;
|
||||
return __vec;
|
||||
}
|
||||
#endif
|
||||
@ -5942,13 +5942,13 @@ vec_orc(vector unsigned long long __a, vector unsigned long long __b) {
|
||||
|
||||
static inline __ATTRS_o_ai vector float
|
||||
vec_orc(vector float __a, vector float __b) {
|
||||
return (vector float)((vector unsigned int)__a &
|
||||
return (vector float)((vector unsigned int)__a |
|
||||
~(vector unsigned int)__b);
|
||||
}
|
||||
|
||||
static inline __ATTRS_o_ai vector double
|
||||
vec_orc(vector double __a, vector double __b) {
|
||||
return (vector double)((vector unsigned long long)__a &
|
||||
return (vector double)((vector unsigned long long)__a |
|
||||
~(vector unsigned long long)__b);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -5,14 +5,14 @@ set -e
|
||||
|
||||
BUILDDIR="$(pwd)"
|
||||
|
||||
sudo sh -c 'echo "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main" >> /etc/apt/sources.list'
|
||||
sudo sh -c 'echo "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-8 main" >> /etc/apt/sources.list'
|
||||
wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
sudo apt-get update -q
|
||||
|
||||
sudo apt-get remove -y llvm-*
|
||||
sudo rm -rf /usr/local/*
|
||||
sudo apt-get install -y libxml2-dev libclang-7-dev llvm-7 llvm-7-dev cmake s3cmd gcc-7 g++-7
|
||||
sudo apt-get install -y libxml2-dev libclang-8-dev llvm-8 llvm-8-dev cmake s3cmd gcc-7 g++-7
|
||||
|
||||
export CC=gcc-7
|
||||
export CXX=g++-7
|
||||
@ -25,7 +25,7 @@ make -j2 install
|
||||
if [ "${BUILD_REASON}" != "PullRequest" ]; then
|
||||
ARTIFACTSDIR="$BUILDDIR/artifacts"
|
||||
mkdir "$ARTIFACTSDIR"
|
||||
docker run -i --mount type=bind,source="$ARTIFACTSDIR",target=/z ziglang/static-base:llvm7-1 -j2 $BUILD_SOURCEVERSION
|
||||
docker run -i --mount type=bind,source="$ARTIFACTSDIR",target=/z ziglang/static-base:llvm8-1 -j2 $BUILD_SOURCEVERSION
|
||||
TARBALL="$(ls $ARTIFACTSDIR)"
|
||||
mv "$DOWNLOADSECUREFILE_SECUREFILEPATH" "$HOME/.s3cfg"
|
||||
s3cmd put -P "$ARTIFACTSDIR/$TARBALL" s3://ziglang.org/builds/
|
||||
|
||||
@ -6,7 +6,7 @@ set -e
|
||||
brew install s3cmd gcc@8
|
||||
|
||||
ZIGDIR="$(pwd)"
|
||||
CACHE_BASENAME="llvm+clang-7.0.0-macos-x86_64-gcc8-release-static"
|
||||
CACHE_BASENAME="llvm+clang-8.0.0-macos-x86_64-gcc8-release-static"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
TMPDIR="$HOME/tmpz"
|
||||
JOBS="-j2"
|
||||
@ -42,25 +42,18 @@ else
|
||||
rm $PREFIX/lib/libz*dylib
|
||||
|
||||
cd $TMPDIR
|
||||
wget ftp://ftp.invisible-island.net/ncurses/ncurses.tar.gz
|
||||
tar xf ncurses.tar.gz
|
||||
cd ncurses-6.1/
|
||||
./configure --without-shared --prefix=$PREFIX
|
||||
make $JOBS install
|
||||
|
||||
cd $TMPDIR
|
||||
wget https://releases.llvm.org/7.0.0/llvm-7.0.0.src.tar.xz
|
||||
tar xf llvm-7.0.0.src.tar.xz
|
||||
cd llvm-7.0.0.src/
|
||||
wget https://releases.llvm.org/8.0.0/llvm-8.0.0.src.tar.xz
|
||||
tar xf llvm-8.0.0.src.tar.xz
|
||||
cd llvm-8.0.0.src/
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_BUILD_TYPE=Release -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="WebAssembly;AVR;RISCV" -DLLVM_ENABLE_LIBXML2=OFF
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_BUILD_TYPE=Release -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="AVR;RISCV" -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_ENABLE_TERMINFO=OFF
|
||||
make $JOBS install
|
||||
|
||||
cd $TMPDIR
|
||||
wget https://releases.llvm.org/7.0.0/cfe-7.0.0.src.tar.xz
|
||||
tar xf cfe-7.0.0.src.tar.xz
|
||||
cd cfe-7.0.0.src/
|
||||
wget https://releases.llvm.org/8.0.0/cfe-8.0.0.src.tar.xz
|
||||
tar xf cfe-8.0.0.src.tar.xz
|
||||
cd cfe-8.0.0.src/
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
@ -6,5 +6,5 @@ set -e
|
||||
pacman -Su --needed --noconfirm
|
||||
pacman -S --needed --noconfirm wget p7zip python3-pip
|
||||
pip install s3cmd
|
||||
wget -nv "https://ziglang.org/deps/llvm%2bclang-7.0.0-win64-msvc-release.tar.xz"
|
||||
tar xf llvm+clang-7.0.0-win64-msvc-release.tar.xz
|
||||
wget -nv "https://ziglang.org/deps/llvm%2bclang-8.0.0-win64-msvc-release.tar.xz"
|
||||
tar xf llvm+clang-8.0.0-win64-msvc-release.tar.xz
|
||||
|
||||
@ -11,7 +11,7 @@ SET "MSYSTEM=%PREVMSYSTEM%"
|
||||
|
||||
SET "ZIGBUILDDIR=%SRCROOT%\build"
|
||||
SET "ZIGINSTALLDIR=%ZIGBUILDDIR%\release"
|
||||
SET "ZIGPREFIXPATH=%SRCROOT%\llvm+clang-7.0.0-win64-msvc-release"
|
||||
SET "ZIGPREFIXPATH=%SRCROOT%\llvm+clang-8.0.0-win64-msvc-release"
|
||||
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
|
||||
|
||||
53
ci/srht/freebsd_script
Executable file
53
ci/srht/freebsd_script
Executable file
@ -0,0 +1,53 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -x
|
||||
set -e
|
||||
|
||||
ZIGDIR="$(pwd)"
|
||||
CACHE_BASENAME="llvm+clang-8.0.0-freebsd-x86_64-release"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
JOBS="-j$(sysctl -n hw.ncpu)"
|
||||
|
||||
cd $HOME
|
||||
wget -nv "https://ziglang.org/builds/$CACHE_BASENAME.tar.xz"
|
||||
tar xf "$CACHE_BASENAME.tar.xz"
|
||||
|
||||
cd $ZIGDIR
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_INSTALL_PREFIX=$(pwd)/release -DZIG_STATIC=ON
|
||||
make $JOBS install
|
||||
|
||||
# TODO test everything. right now it's skipping stuff including docs
|
||||
# because for some reason @cImport is failing on the CI server.
|
||||
release/bin/zig build --build-file ../build.zig test-behavior -Dskip-release
|
||||
|
||||
if [ -f ~/.s3cfg ]; then
|
||||
mv ../LICENSE release/
|
||||
# TODO re-enable this
|
||||
#mv ../zig-cache/langref.html release/
|
||||
mv release/bin/zig release/
|
||||
rmdir release/bin
|
||||
|
||||
GITBRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||||
VERSION=$(release/zig version)
|
||||
DIRNAME="zig-freebsd-x86_64-$VERSION"
|
||||
TARBALL="$DIRNAME.tar.xz"
|
||||
mv release "$DIRNAME"
|
||||
tar cfJ "$TARBALL" "$DIRNAME"
|
||||
|
||||
s3cmd put -P "$TARBALL" s3://ziglang.org/builds/
|
||||
touch empty
|
||||
s3cmd put -P empty s3://ziglang.org/builds/zig-freebsd-x86_64-$GITBRANCH.tar.xz --add-header="Cache-Control: max-age=0, must-revalidate" --add-header="x-amz-website-redirect-location:/builds/$TARBALL"
|
||||
|
||||
SHASUM=$(shasum -a 256 $TARBALL | cut '-d ' -f1)
|
||||
BYTESIZE=$(wc -c < $TARBALL)
|
||||
|
||||
JSONFILE="freebsd-$GITBRANCH.json"
|
||||
touch $JSONFILE
|
||||
echo "{\"tarball\": \"$TARBALL\"," >>$JSONFILE
|
||||
echo "\"shasum\": \"$SHASUM\"," >>$JSONFILE
|
||||
echo "\"size\": \"$BYTESIZE\"}" >>$JSONFILE
|
||||
|
||||
s3cmd put -P "$JSONFILE" s3://ziglang.org/builds/$JSONFILE
|
||||
fi
|
||||
@ -38,10 +38,10 @@ if(MSVC)
|
||||
else()
|
||||
find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
|
||||
PATHS
|
||||
/usr/lib/llvm/7/include
|
||||
/usr/lib/llvm-7/include
|
||||
/usr/lib/llvm-7.0/include
|
||||
/usr/local/llvm70/include
|
||||
/usr/lib/llvm/8/include
|
||||
/usr/lib/llvm-8/include
|
||||
/usr/lib/llvm-8.0/include
|
||||
/usr/local/llvm80/include
|
||||
/mingw64/include)
|
||||
|
||||
macro(FIND_AND_ADD_CLANG_LIB _libname_)
|
||||
@ -49,10 +49,10 @@ else()
|
||||
find_library(CLANG_${_prettylibname_}_LIB NAMES ${_libname_}
|
||||
PATHS
|
||||
${CLANG_LIBDIRS}
|
||||
/usr/lib/llvm/7/lib
|
||||
/usr/lib/llvm-7/lib
|
||||
/usr/lib/llvm-7.0/lib
|
||||
/usr/local/llvm70/lib
|
||||
/usr/lib/llvm/8/lib
|
||||
/usr/lib/llvm-8/lib
|
||||
/usr/lib/llvm-8.0/lib
|
||||
/usr/local/llvm80/lib
|
||||
/mingw64/lib
|
||||
/c/msys64/mingw64/lib
|
||||
c:\\msys64\\mingw64\\lib)
|
||||
|
||||
@ -8,14 +8,15 @@
|
||||
|
||||
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
|
||||
PATHS
|
||||
/usr/lib/llvm-7.0/include
|
||||
/usr/local/llvm70/include
|
||||
/usr/lib/llvm-8.0/include
|
||||
/usr/local/llvm80/include
|
||||
/mingw64/include)
|
||||
|
||||
find_library(LLD_LIBRARY NAMES lld-7.0 lld70 lld
|
||||
find_library(LLD_LIBRARY NAMES lld-8.0 lld80 lld
|
||||
PATHS
|
||||
/usr/lib/llvm-7.0/lib
|
||||
/usr/local/llvm70/lib)
|
||||
/usr/lib/llvm-8.0/lib
|
||||
/usr/local/llvm80/lib
|
||||
)
|
||||
if(EXISTS ${LLD_LIBRARY})
|
||||
set(LLD_LIBRARIES ${LLD_LIBRARY})
|
||||
else()
|
||||
@ -23,8 +24,8 @@ else()
|
||||
string(TOUPPER ${_libname_} _prettylibname_)
|
||||
find_library(LLD_${_prettylibname_}_LIB NAMES ${_libname_}
|
||||
PATHS
|
||||
/usr/lib/llvm-7.0/lib
|
||||
/usr/local/llvm70/lib
|
||||
/usr/lib/llvm-8.0/lib
|
||||
/usr/local/llvm80/lib
|
||||
/mingw64/lib
|
||||
/c/msys64/mingw64/lib
|
||||
c:/msys64/mingw64/lib)
|
||||
|
||||
@ -8,12 +8,16 @@
|
||||
# LLVM_LIBDIRS
|
||||
|
||||
find_program(LLVM_CONFIG_EXE
|
||||
NAMES llvm-config-7 llvm-config-7.0 llvm-config70 llvm-config
|
||||
NAMES llvm-config-8 llvm-config-8.0 llvm-config80 llvm-config
|
||||
PATHS
|
||||
"/mingw64/bin"
|
||||
"/c/msys64/mingw64/bin"
|
||||
"c:/msys64/mingw64/bin"
|
||||
"C:/Libraries/llvm-7.0.0/bin")
|
||||
"C:/Libraries/llvm-8.0.0/bin")
|
||||
|
||||
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
|
||||
message(FATAL_ERROR "unable to find llvm-config")
|
||||
endif()
|
||||
|
||||
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
|
||||
message(FATAL_ERROR "unable to find llvm-config")
|
||||
@ -24,14 +28,14 @@ execute_process(
|
||||
OUTPUT_VARIABLE LLVM_CONFIG_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 7)
|
||||
message(FATAL_ERROR "expected LLVM 7.x but found ${LLVM_CONFIG_VERSION}")
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 8)
|
||||
message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}")
|
||||
endif()
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 8)
|
||||
message(FATAL_ERROR "expected LLVM 7.x but found ${LLVM_CONFIG_VERSION}")
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 9)
|
||||
message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}")
|
||||
endif()
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 8)
|
||||
message(FATAL_ERROR "expected LLVM 7.x but found ${LLVM_CONFIG_VERSION}")
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 9)
|
||||
message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
@ -57,6 +61,7 @@ NEED_TARGET("NVPTX")
|
||||
NEED_TARGET("PowerPC")
|
||||
NEED_TARGET("Sparc")
|
||||
NEED_TARGET("SystemZ")
|
||||
NEED_TARGET("WebAssembly")
|
||||
NEED_TARGET("X86")
|
||||
NEED_TARGET("XCore")
|
||||
|
||||
@ -107,7 +112,7 @@ execute_process(
|
||||
set(LLVM_LIBRARIES ${LLVM_LIBRARIES} ${LLVM_SYSTEM_LIBS})
|
||||
|
||||
if(NOT LLVM_LIBRARIES)
|
||||
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-7 LLVM-7.0)
|
||||
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-8 LLVM-8.0)
|
||||
endif()
|
||||
|
||||
link_directories("${CMAKE_PREFIX_PATH}/lib")
|
||||
|
||||
31
deps/lld-prebuilt/COFF/Options.inc
vendored
31
deps/lld-prebuilt/COFF/Options.inc
vendored
@ -55,10 +55,8 @@ OPTION(prefix_2, "color-diagnostics=", color_diagnostics_eq, Joined, INVALID, IN
|
||||
"Use colors in diagnostics; one of 'always', 'never', 'auto'", nullptr, nullptr)
|
||||
OPTION(prefix_2, "color-diagnostics", color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Use colors in diagnostics", nullptr, nullptr)
|
||||
OPTION(prefix_1, "debug:dwarf", debug_dwarf, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "debug:full", debug_full, Flag, INVALID, debug, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "debug:ghash", debug_ghash, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "debug:symtab", debug_symtab, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "debug:", debug_opt, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Embed a symbol table in the image with option", nullptr, nullptr)
|
||||
OPTION(prefix_1, "debugtype:", debugtype, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Debug Info Options", nullptr, nullptr)
|
||||
OPTION(prefix_1, "debug", debug, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -95,9 +93,12 @@ OPTION(prefix_1, "fixed:no", fixed_no, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Enable base relocations (default)", nullptr, nullptr)
|
||||
OPTION(prefix_1, "fixed", fixed, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Disable base relocations", nullptr, nullptr)
|
||||
OPTION(prefix_1, "force:unresolved", force_unresolved, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "force", force, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
OPTION(prefix_1, "force:multiple", force_multiple, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Allow multiply defined symbols when creating executables", nullptr, nullptr)
|
||||
OPTION(prefix_1, "force:unresolved", force_unresolved, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Allow undefined symbols when creating executables", nullptr, nullptr)
|
||||
OPTION(prefix_1, "force", force, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Allow undefined and multiply defined symbols when creating executables", nullptr, nullptr)
|
||||
OPTION(prefix_1, "functionpadmin", functionpadmin, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "guard:", guard, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Control flow guard", nullptr, nullptr)
|
||||
@ -148,30 +149,32 @@ OPTION(prefix_1, "lldsavetemps", lldsavetemps, Flag, INVALID, INVALID, nullptr,
|
||||
OPTION(prefix_1, "machine:", machine, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Specify target platform", nullptr, nullptr)
|
||||
OPTION(prefix_1, "manifest:", manifest_colon, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Create manifest file", nullptr, nullptr)
|
||||
"NO disables manifest output; EMBED[,ID=#] embeds manifest as resource in the image", nullptr, nullptr)
|
||||
OPTION(prefix_1, "manifestdependency:", manifestdependency, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Attributes for <dependency> in manifest file", nullptr, nullptr)
|
||||
"Attributes for <dependency> element in manifest file; implies /manifest", nullptr, nullptr)
|
||||
OPTION(prefix_1, "manifestfile:", manifestfile, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Manifest file path", nullptr, nullptr)
|
||||
"Manifest output path, with /manifest", nullptr, nullptr)
|
||||
OPTION(prefix_1, "manifestinput:", manifestinput, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Specify manifest file", nullptr, nullptr)
|
||||
"Additional manifest inputs; only valid with /manifest:embed", nullptr, nullptr)
|
||||
OPTION(prefix_1, "manifestuac:", manifestuac, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"User access control", nullptr, nullptr)
|
||||
OPTION(prefix_1, "manifest", manifest, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "manifest", manifest, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Create .manifest file", nullptr, nullptr)
|
||||
OPTION(prefix_1, "maxilksize:", maxilksize, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "merge:", merge, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Combine sections", nullptr, nullptr)
|
||||
OPTION(prefix_1, "mllvm:", mllvm, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Options to pass to LLVM", nullptr, nullptr)
|
||||
OPTION(prefix_1, "msvclto", msvclto, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "natvis:", natvis, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Path to natvis file to embed in the PDB", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not use colors in diagnostics", nullptr, nullptr)
|
||||
OPTION(prefix_1, "nodefaultlib:", nodefaultlib, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Remove a default library", nullptr, nullptr)
|
||||
OPTION(prefix_1, "nodefaultlib", nodefaultlib_all, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "noentry", noentry, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "nodefaultlib", nodefaultlib_all, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Remove all default libraries", nullptr, nullptr)
|
||||
OPTION(prefix_1, "noentry", noentry, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Don't add reference to DllMainCRTStartup; only valid with /dll", nullptr, nullptr)
|
||||
OPTION(prefix_1, "nologo", nologo, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "nxcompat:no", nxcompat_no, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Disable data execution provention", nullptr, nullptr)
|
||||
|
||||
22
deps/lld-prebuilt/ELF/Options.inc
vendored
22
deps/lld-prebuilt/ELF/Options.inc
vendored
@ -54,7 +54,7 @@ OPTION(prefix_2, "Bsymbolic-functions", Bsymbolic_functions, Flag, INVALID, INVA
|
||||
OPTION(prefix_2, "Bsymbolic", Bsymbolic, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Bind defined symbols locally", nullptr, nullptr)
|
||||
OPTION(prefix_2, "build-id=", build_id_eq, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Generate build ID note", "[fast,md5,sha,uuid,0x<hexstring>]", nullptr)
|
||||
"Generate build ID note", "[fast,md5,sha1,uuid,0x<hexstring>]", nullptr)
|
||||
OPTION(prefix_2, "build-id", build_id, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Alias for --build-id=fast", nullptr, nullptr)
|
||||
OPTION(prefix_1, "b", anonymous_16, Separate, INVALID, format, nullptr, 0, 0,
|
||||
@ -62,6 +62,8 @@ OPTION(prefix_1, "b", anonymous_16, Separate, INVALID, format, nullptr, 0, 0,
|
||||
OPTION(prefix_2, "call-graph-ordering-file=", call_graph_ordering_file_eq, Joined, INVALID, call_graph_ordering_file, nullptr, 0, 0,
|
||||
"Layout sections to optimize the given callgraph", nullptr, nullptr)
|
||||
OPTION(prefix_2, "call-graph-ordering-file", call_graph_ordering_file, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "call-graph-profile-sort", call_graph_profile_sort, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Reorder sections with call graph profile (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "call_shared", anonymous_1, Flag, INVALID, Bdynamic, nullptr, 0, 0,
|
||||
"Alias for --Bdynamic", nullptr, nullptr)
|
||||
OPTION(prefix_2, "check-sections", check_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -133,7 +135,7 @@ OPTION(prefix_2, "exclude-libs=", exclude_libs_eq, Joined, INVALID, exclude_libs
|
||||
"Exclude static libraries from automatic export", nullptr, nullptr)
|
||||
OPTION(prefix_2, "exclude-libs", exclude_libs, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "execute-only", execute_only, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not mark executable sections readable", nullptr, nullptr)
|
||||
"Mark executable sections unreadable", nullptr, nullptr)
|
||||
OPTION(prefix_2, "export-dynamic-symbol=", export_dynamic_symbol_eq, Joined, INVALID, export_dynamic_symbol, nullptr, 0, 0,
|
||||
"Put a symbol in the dynamic symbol table", nullptr, nullptr)
|
||||
OPTION(prefix_2, "export-dynamic-symbol", export_dynamic_symbol, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
@ -245,6 +247,8 @@ OPTION(prefix_2, "no-apply-dynamic-relocs", no_apply_dynamic_relocs, Flag, INVAL
|
||||
"Do not apply link-time values for dynamic relocations (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-as-needed", no_as_needed, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Always set DT_NEEDED for shared libraries (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-call-graph-profile-sort", no_call_graph_profile_sort, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not reorder sections with call graph profile", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-check-sections", no_check_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not check section addresses for overlaps", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -291,6 +295,8 @@ OPTION(prefix_2, "no-rosegment", no_rosegment, Flag, INVALID, INVALID, nullptr,
|
||||
"Do not put read-only non-executable sections in their own segment", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-threads", no_threads, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not run the linker multi-threaded", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-toc-optimize", no_toc_optimize, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"(PowerPC64) Disable TOC related optimizations", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-undefined-version", no_undefined_version, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Report version scripts that refer undefined symbols", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-undefined", no_undefined, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -301,6 +307,8 @@ OPTION(prefix_2, "no-warn-backrefs", no_warn_backrefs, Flag, INVALID, INVALID, n
|
||||
"Do not warn about backward symbol references to fetch archive members (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-warn-common", no_warn_common, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not warn about duplicate common symbols (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-warn-ifunc-textrel", no_warn_ifunc_textrel, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not warn about using ifunc symbols with text relocations (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-warn-mismatch", anonymous_57, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "no-warn-symbol-ordering", no_warn_symbol_ordering, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not warn about problems with the symbol ordering file", nullptr, nullptr)
|
||||
@ -338,6 +346,8 @@ OPTION(prefix_2, "pack-dyn-relocs=", pack_dyn_relocs_eq, Joined, INVALID, pack_d
|
||||
OPTION(prefix_2, "pack-dyn-relocs", pack_dyn_relocs, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, "[none,android,relr,android+relr]", nullptr)
|
||||
OPTION(prefix_2, "pic-executable", anonymous_23, Flag, INVALID, pie, nullptr, 0, 0,
|
||||
"Alias for --pie", nullptr, nullptr)
|
||||
OPTION(prefix_2, "pic-veneer", pic_veneer, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Always generate position independent thunks (veneers)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "pie", pie, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Create a position independent executable", nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin-opt=-fresolution=", plugin_opt_fresolution_eq, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
@ -349,6 +359,7 @@ OPTION(prefix_2, "plugin-opt=disable-verify", anonymous_41, Flag, INVALID, disab
|
||||
"Alias for -disable-verify", nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin-opt=dwo_dir=", plugin_opt_dwo_dir_eq, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Directory to store .dwo files when LTO and debug fission are used", nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin-opt=emit-llvm", plugin_opt_emit_llvm, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin-opt=jobs=", anonymous_42, Joined, INVALID, thinlto_jobs, nullptr, 0, 0,
|
||||
"Alias for -thinlto-jobs", nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin-opt=lto-partitions=", anonymous_43, Joined, INVALID, lto_partitions, nullptr, 0, 0,
|
||||
@ -424,6 +435,9 @@ OPTION(prefix_2, "sort-common", anonymous_60, Flag, INVALID, INVALID, nullptr, 0
|
||||
OPTION(prefix_2, "sort-section=", sort_section_eq, Joined, INVALID, sort_section, nullptr, 0, 0,
|
||||
"Specifies sections sorting rule when linkerscript is used", nullptr, nullptr)
|
||||
OPTION(prefix_2, "sort-section", sort_section, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "split-stack-adjust-size=", split_stack_adjust_size_eq, Joined, INVALID, split_stack_adjust_size, nullptr, 0, 0,
|
||||
"Specify adjustment to stack size when a split-stack function calls a non-split-stack function", "<value>", nullptr)
|
||||
OPTION(prefix_2, "split-stack-adjust-size", split_stack_adjust_size, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, "<value>", nullptr)
|
||||
OPTION(prefix_2, "start-group", start_group, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Ignored for compatibility with GNU unless you pass --warn-backrefs", nullptr, nullptr)
|
||||
OPTION(prefix_2, "start-lib", start_lib, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -467,6 +481,8 @@ OPTION(prefix_2, "thinlto-jobs=", thinlto_jobs, Joined, INVALID, INVALID, nullpt
|
||||
"Number of ThinLTO jobs", nullptr, nullptr)
|
||||
OPTION(prefix_2, "threads", threads, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Run the linker multi-threaded (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "toc-optimize", toc_optimize, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"(PowerPC64) Enable TOC related optimizations (default)", nullptr, nullptr)
|
||||
OPTION(prefix_2, "trace-symbol=", trace_symbol_eq, Joined, INVALID, trace_symbol, nullptr, 0, 0,
|
||||
"Trace references to symbols", nullptr, nullptr)
|
||||
OPTION(prefix_2, "trace-symbol", trace_symbol, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
@ -511,6 +527,8 @@ OPTION(prefix_2, "warn-backrefs", warn_backrefs, Flag, INVALID, INVALID, nullptr
|
||||
OPTION(prefix_2, "warn-common", warn_common, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Warn about duplicate common symbols", nullptr, nullptr)
|
||||
OPTION(prefix_2, "warn-execstack", anonymous_62, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "warn-ifunc-textrel", warn_ifunc_textrel, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Warn about using ifunc symbols with text relocations", nullptr, nullptr)
|
||||
OPTION(prefix_2, "warn-once", anonymous_63, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "warn-shared-textrel", anonymous_64, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "warn-symbol-ordering", warn_symbol_ordering, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
|
||||
7
deps/lld-prebuilt/MinGW/Options.inc
vendored
7
deps/lld-prebuilt/MinGW/Options.inc
vendored
@ -90,6 +90,13 @@ OPTION(prefix_1, "o", o, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
|
||||
OPTION(prefix_2, "pdb", pdb, Separate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Specify output PDB debug information file", nullptr, nullptr)
|
||||
OPTION(prefix_2, "pic-executable", pic_executable, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin-opt=", anonymous_3, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin-opt", anonymous_2, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin=", anonymous_1, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "plugin", anonymous_0, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "require-defined=", require_defined_eq, Joined, INVALID, require_defined, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "require-defined", require_defined, Separate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Force symbol to be added to symbol table as an undefined one", nullptr, nullptr)
|
||||
OPTION(prefix_2, "shared", shared, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Build a shared object", nullptr, nullptr)
|
||||
OPTION(prefix_2, "stack", stack, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
|
||||
6
deps/lld-prebuilt/lld/Common/Version.inc
vendored
6
deps/lld-prebuilt/lld/Common/Version.inc
vendored
@ -1,6 +1,6 @@
|
||||
#define LLD_VERSION 7.0.0
|
||||
#define LLD_VERSION_STRING "7.0.0"
|
||||
#define LLD_VERSION_MAJOR 7
|
||||
#define LLD_VERSION 8.0.0
|
||||
#define LLD_VERSION_STRING "8.0.0"
|
||||
#define LLD_VERSION_MAJOR 8
|
||||
#define LLD_VERSION_MINOR 0
|
||||
#define LLD_REVISION_STRING ""
|
||||
#define LLD_REPOSITORY_STRING ""
|
||||
|
||||
41
deps/lld-prebuilt/wasm/Options.inc
vendored
41
deps/lld-prebuilt/wasm/Options.inc
vendored
@ -36,23 +36,28 @@ OPTION(prefix_1, "color-diagnostics=", color_diagnostics_eq, Joined, INVALID, IN
|
||||
"Use colors in diagnostics; one of 'always', 'never', 'auto'", nullptr, nullptr)
|
||||
OPTION(prefix_1, "color-diagnostics", color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Use colors in diagnostics", nullptr, nullptr)
|
||||
OPTION(prefix_1, "compress-relocations", compress_relocations, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Compress the relocation targets in the code section.", nullptr, nullptr)
|
||||
OPTION(prefix_1, "demangle", demangle, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Demangle symbol names", nullptr, nullptr)
|
||||
OPTION(prefix_1, "disable-verify", disable_verify, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "entry=", alias_entry_entry, Joined, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "entry=", anonymous_1, Joined, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "entry", entry, Separate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Name of entry point symbol", "<entry>", nullptr)
|
||||
OPTION(prefix_1, "error-limit=", error_limit, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Maximum number of errors to emit before stopping (0 = no limit)", nullptr, nullptr)
|
||||
OPTION(prefix_1, "export-all", export_all, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Export all symbols (normally combined with --no-gc-sections)", nullptr, nullptr)
|
||||
OPTION(prefix_1, "export-dynamic", export_dynamic, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Put symbols in the dynamic symbol table", nullptr, nullptr)
|
||||
OPTION(prefix_1, "export-table", export_table, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Export function table to the environment", nullptr, nullptr)
|
||||
OPTION(prefix_1, "export=", export_eq, Joined, INVALID, export, nullptr, 0, 0,
|
||||
"Force a symbol to be exported", nullptr, nullptr)
|
||||
OPTION(prefix_1, "export", export, Separate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Force a symbol to be exported", nullptr, nullptr)
|
||||
OPTION(prefix_2, "e", alias_entry_e, JoinedOrSeparate, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "export", export, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "E", anonymous_2, Flag, INVALID, export_dynamic, nullptr, 0, 0,
|
||||
"Alias for --export-dynamic", nullptr, nullptr)
|
||||
OPTION(prefix_2, "e", anonymous_0, JoinedOrSeparate, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "fatal-warnings", fatal_warnings, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Treat warnings as errors", nullptr, nullptr)
|
||||
OPTION(prefix_1, "gc-sections", gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -67,7 +72,7 @@ OPTION(prefix_1, "import-table", import_table, Flag, INVALID, INVALID, nullptr,
|
||||
"Import function table from the environment", nullptr, nullptr)
|
||||
OPTION(prefix_1, "initial-memory=", initial_memory, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Initial size of the linear memory", nullptr, nullptr)
|
||||
OPTION(prefix_2, "i", alias_initial_memory_i, Flag, INVALID, initial_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "i", anonymous_3, Flag, INVALID, initial_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "lto-O", lto_O, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Optimization level for LTO", "<opt-level>", nullptr)
|
||||
OPTION(prefix_1, "lto-partitions=", lto_partitions, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -82,18 +87,22 @@ OPTION(prefix_1, "merge-data-segments", merge_data_segments, Flag, INVALID, INVA
|
||||
"Enable merging data segments", nullptr, nullptr)
|
||||
OPTION(prefix_1, "mllvm", mllvm, Separate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Options to pass to LLVM", nullptr, nullptr)
|
||||
OPTION(prefix_2, "m", alias_max_memory_m, Flag, INVALID, max_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "m", anonymous_4, Flag, INVALID, max_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not use colors in diagnostics", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-demangle", no_demangle, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not demangle symbol names", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-entry", no_entry, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not output any entry point", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-export-dynamic", no_export_dynamic, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not put symbols in the dynamic symbol table (default)", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-fatal-warnings", no_fatal_warnings, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-gc-sections", no_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Disable garbage collection of unused sections", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-merge-data-segments", no_merge_data_segments, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Disable merging data segments", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-pie", no_pie, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not create a position independent executable (default)", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-print-gc-sections", no_print_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Do not list removed unused sections", nullptr, nullptr)
|
||||
OPTION(prefix_1, "no-threads", no_threads, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
@ -104,33 +113,41 @@ OPTION(prefix_2, "O", O, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Optimize output file size", nullptr, nullptr)
|
||||
OPTION(prefix_2, "o", o, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Path to file to write output", "<path>", nullptr)
|
||||
OPTION(prefix_1, "pie", pie, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Create a position independent executable", nullptr, nullptr)
|
||||
OPTION(prefix_1, "print-gc-sections", print_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"List removed unused sections", nullptr, nullptr)
|
||||
OPTION(prefix_1, "relocatable", relocatable, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Create relocatable object file", nullptr, nullptr)
|
||||
OPTION(prefix_2, "r", alias_relocatable_r, Flag, INVALID, relocatable, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "r", anonymous_5, Flag, INVALID, relocatable, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "save-temps", save_temps, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "shared-memory", shared_memory, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Use shared linear memory", nullptr, nullptr)
|
||||
OPTION(prefix_1, "shared", shared, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Build a shared object", nullptr, nullptr)
|
||||
OPTION(prefix_1, "stack-first", stack_first, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Place stack at start of linear memory rather than after data", nullptr, nullptr)
|
||||
OPTION(prefix_1, "strip-all", strip_all, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Strip all symbols", nullptr, nullptr)
|
||||
OPTION(prefix_1, "strip-debug", strip_debug, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Strip debugging information", nullptr, nullptr)
|
||||
OPTION(prefix_2, "S", anonymous_7, Flag, INVALID, strip_debug, nullptr, 0, 0,
|
||||
"Alias for --strip-debug", nullptr, nullptr)
|
||||
OPTION(prefix_2, "s", anonymous_6, Flag, INVALID, strip_all, nullptr, 0, 0,
|
||||
"Alias for --strip-all", nullptr, nullptr)
|
||||
OPTION(prefix_1, "thinlto-cache-dir=", thinlto_cache_dir, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Path to ThinLTO cached object file directory", nullptr, nullptr)
|
||||
OPTION(prefix_1, "thinlto-cache-policy=", thinlto_cache_policy_eq, Joined, INVALID, thinlto_cache_policy, nullptr, 0, 0,
|
||||
"Pruning policy for the ThinLTO cache", nullptr, nullptr)
|
||||
OPTION(prefix_1, "thinlto-cache-policy", thinlto_cache_policy, Separate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Pruning policy for the ThinLTO cache", nullptr, nullptr)
|
||||
OPTION(prefix_1, "thinlto-cache-policy", thinlto_cache_policy, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "thinlto-jobs=", thinlto_jobs, Joined, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Number of ThinLTO jobs", nullptr, nullptr)
|
||||
OPTION(prefix_1, "threads", threads, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Run the linker multi-threaded", nullptr, nullptr)
|
||||
OPTION(prefix_1, "undefined=", undefined_eq, Joined, INVALID, undefined, nullptr, 0, 0,
|
||||
"Force undefined symbol during linking", nullptr, nullptr)
|
||||
OPTION(prefix_1, "undefined", undefined, Separate, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Force undefined symbol during linking", nullptr, nullptr)
|
||||
OPTION(prefix_2, "u", alias_undefined_u, JoinedOrSeparate, INVALID, undefined, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "undefined", undefined, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_2, "u", anonymous_8, JoinedOrSeparate, INVALID, undefined, nullptr, 0, 0, nullptr, nullptr, nullptr)
|
||||
OPTION(prefix_1, "verbose", verbose, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
"Verbose mode", nullptr, nullptr)
|
||||
OPTION(prefix_1, "version", version, Flag, INVALID, INVALID, nullptr, 0, 0,
|
||||
|
||||
347
deps/lld/COFF/Chunks.cpp
vendored
347
deps/lld/COFF/Chunks.cpp
vendored
@ -11,6 +11,7 @@
|
||||
#include "InputFiles.h"
|
||||
#include "Symbols.h"
|
||||
#include "Writer.h"
|
||||
#include "SymbolTable.h"
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/BinaryFormat/COFF.h"
|
||||
@ -44,6 +45,22 @@ SectionChunk::SectionChunk(ObjFile *F, const coff_section *H)
|
||||
Live = !Config->DoGC || !isCOMDAT();
|
||||
}
|
||||
|
||||
// Initialize the RelocTargets vector, to allow redirecting certain relocations
|
||||
// to a thunk instead of the actual symbol the relocation's symbol table index
|
||||
// indicates.
|
||||
void SectionChunk::readRelocTargets() {
|
||||
assert(RelocTargets.empty());
|
||||
RelocTargets.reserve(Relocs.size());
|
||||
for (const coff_relocation &Rel : Relocs)
|
||||
RelocTargets.push_back(File->getSymbol(Rel.SymbolTableIndex));
|
||||
}
|
||||
|
||||
// Reset RelocTargets to their original targets before thunks were added.
|
||||
void SectionChunk::resetRelocTargets() {
|
||||
for (size_t I = 0, E = Relocs.size(); I < E; ++I)
|
||||
RelocTargets[I] = File->getSymbol(Relocs[I].SymbolTableIndex);
|
||||
}
|
||||
|
||||
static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); }
|
||||
static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); }
|
||||
static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); }
|
||||
@ -58,7 +75,8 @@ static bool checkSecRel(const SectionChunk *Sec, OutputSection *OS) {
|
||||
return true;
|
||||
if (Sec->isCodeView())
|
||||
return false;
|
||||
fatal("SECREL relocation cannot be applied to absolute symbols");
|
||||
error("SECREL relocation cannot be applied to absolute symbols");
|
||||
return false;
|
||||
}
|
||||
|
||||
static void applySecRel(const SectionChunk *Sec, uint8_t *Off,
|
||||
@ -98,7 +116,7 @@ void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS,
|
||||
case IMAGE_REL_AMD64_SECTION: applySecIdx(Off, OS); break;
|
||||
case IMAGE_REL_AMD64_SECREL: applySecRel(this, Off, OS, S); break;
|
||||
default:
|
||||
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
toString(File));
|
||||
}
|
||||
}
|
||||
@ -113,7 +131,7 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS,
|
||||
case IMAGE_REL_I386_SECTION: applySecIdx(Off, OS); break;
|
||||
case IMAGE_REL_I386_SECREL: applySecRel(this, Off, OS, S); break;
|
||||
default:
|
||||
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
toString(File));
|
||||
}
|
||||
}
|
||||
@ -123,16 +141,22 @@ static void applyMOV(uint8_t *Off, uint16_t V) {
|
||||
write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff));
|
||||
}
|
||||
|
||||
static uint16_t readMOV(uint8_t *Off) {
|
||||
static uint16_t readMOV(uint8_t *Off, bool MOVT) {
|
||||
uint16_t Op1 = read16le(Off);
|
||||
if ((Op1 & 0xfbf0) != (MOVT ? 0xf2c0 : 0xf240))
|
||||
error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") +
|
||||
" instruction in MOV32T relocation");
|
||||
uint16_t Op2 = read16le(Off + 2);
|
||||
if ((Op2 & 0x8000) != 0)
|
||||
error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") +
|
||||
" instruction in MOV32T relocation");
|
||||
return (Op2 & 0x00ff) | ((Op2 >> 4) & 0x0700) | ((Op1 << 1) & 0x0800) |
|
||||
((Op1 & 0x000f) << 12);
|
||||
}
|
||||
|
||||
void applyMOV32T(uint8_t *Off, uint32_t V) {
|
||||
uint16_t ImmW = readMOV(Off); // read MOVW operand
|
||||
uint16_t ImmT = readMOV(Off + 4); // read MOVT operand
|
||||
uint16_t ImmW = readMOV(Off, false); // read MOVW operand
|
||||
uint16_t ImmT = readMOV(Off + 4, true); // read MOVT operand
|
||||
uint32_t Imm = ImmW | (ImmT << 16);
|
||||
V += Imm; // add the immediate offset
|
||||
applyMOV(Off, V); // set MOVW operand
|
||||
@ -141,7 +165,7 @@ void applyMOV32T(uint8_t *Off, uint32_t V) {
|
||||
|
||||
static void applyBranch20T(uint8_t *Off, int32_t V) {
|
||||
if (!isInt<21>(V))
|
||||
fatal("relocation out of range");
|
||||
error("relocation out of range");
|
||||
uint32_t S = V < 0 ? 1 : 0;
|
||||
uint32_t J1 = (V >> 19) & 1;
|
||||
uint32_t J2 = (V >> 18) & 1;
|
||||
@ -151,7 +175,7 @@ static void applyBranch20T(uint8_t *Off, int32_t V) {
|
||||
|
||||
void applyBranch24T(uint8_t *Off, int32_t V) {
|
||||
if (!isInt<25>(V))
|
||||
fatal("relocation out of range");
|
||||
error("relocation out of range");
|
||||
uint32_t S = V < 0 ? 1 : 0;
|
||||
uint32_t J1 = ((~V >> 23) & 1) ^ S;
|
||||
uint32_t J2 = ((~V >> 22) & 1) ^ S;
|
||||
@ -176,7 +200,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS,
|
||||
case IMAGE_REL_ARM_SECTION: applySecIdx(Off, OS); break;
|
||||
case IMAGE_REL_ARM_SECREL: applySecRel(this, Off, OS, S); break;
|
||||
default:
|
||||
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
toString(File));
|
||||
}
|
||||
}
|
||||
@ -184,7 +208,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS,
|
||||
// Interpret the existing immediate value as a byte offset to the
|
||||
// target symbol, then update the instruction with the immediate as
|
||||
// the page offset from the current instruction to the target.
|
||||
static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) {
|
||||
void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) {
|
||||
uint32_t Orig = read32le(Off);
|
||||
uint64_t Imm = ((Orig >> 29) & 0x3) | ((Orig >> 3) & 0x1FFFFC);
|
||||
S += Imm;
|
||||
@ -198,7 +222,7 @@ static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) {
|
||||
// Update the immediate field in a AARCH64 ldr, str, and add instruction.
|
||||
// Optionally limit the range of the written immediate by one or more bits
|
||||
// (RangeLimit).
|
||||
static void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) {
|
||||
void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) {
|
||||
uint32_t Orig = read32le(Off);
|
||||
Imm += (Orig >> 10) & 0xFFF;
|
||||
Orig &= ~(0xFFF << 10);
|
||||
@ -221,7 +245,7 @@ static void applyArm64Ldr(uint8_t *Off, uint64_t Imm) {
|
||||
if ((Orig & 0x4800000) == 0x4800000)
|
||||
Size += 4;
|
||||
if ((Imm & ((1 << Size) - 1)) != 0)
|
||||
fatal("misaligned ldr/str offset");
|
||||
error("misaligned ldr/str offset");
|
||||
applyArm64Imm(Off, Imm >> Size, Size);
|
||||
}
|
||||
|
||||
@ -250,21 +274,21 @@ static void applySecRelLdr(const SectionChunk *Sec, uint8_t *Off,
|
||||
applyArm64Ldr(Off, (S - OS->getRVA()) & 0xfff);
|
||||
}
|
||||
|
||||
static void applyArm64Branch26(uint8_t *Off, int64_t V) {
|
||||
void applyArm64Branch26(uint8_t *Off, int64_t V) {
|
||||
if (!isInt<28>(V))
|
||||
fatal("relocation out of range");
|
||||
error("relocation out of range");
|
||||
or32(Off, (V & 0x0FFFFFFC) >> 2);
|
||||
}
|
||||
|
||||
static void applyArm64Branch19(uint8_t *Off, int64_t V) {
|
||||
if (!isInt<21>(V))
|
||||
fatal("relocation out of range");
|
||||
error("relocation out of range");
|
||||
or32(Off, (V & 0x001FFFFC) << 3);
|
||||
}
|
||||
|
||||
static void applyArm64Branch14(uint8_t *Off, int64_t V) {
|
||||
if (!isInt<16>(V))
|
||||
fatal("relocation out of range");
|
||||
error("relocation out of range");
|
||||
or32(Off, (V & 0x0000FFFC) << 3);
|
||||
}
|
||||
|
||||
@ -287,11 +311,37 @@ void SectionChunk::applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS,
|
||||
case IMAGE_REL_ARM64_SECREL_LOW12L: applySecRelLdr(this, Off, OS, S); break;
|
||||
case IMAGE_REL_ARM64_SECTION: applySecIdx(Off, OS); break;
|
||||
default:
|
||||
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
|
||||
toString(File));
|
||||
}
|
||||
}
|
||||
|
||||
static void maybeReportRelocationToDiscarded(const SectionChunk *FromChunk,
|
||||
Defined *Sym,
|
||||
const coff_relocation &Rel) {
|
||||
// Don't report these errors when the relocation comes from a debug info
|
||||
// section or in mingw mode. MinGW mode object files (built by GCC) can
|
||||
// have leftover sections with relocations against discarded comdat
|
||||
// sections. Such sections are left as is, with relocations untouched.
|
||||
if (FromChunk->isCodeView() || FromChunk->isDWARF() || Config->MinGW)
|
||||
return;
|
||||
|
||||
// Get the name of the symbol. If it's null, it was discarded early, so we
|
||||
// have to go back to the object file.
|
||||
ObjFile *File = FromChunk->File;
|
||||
StringRef Name;
|
||||
if (Sym) {
|
||||
Name = Sym->getName();
|
||||
} else {
|
||||
COFFSymbolRef COFFSym =
|
||||
check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex));
|
||||
File->getCOFFObj()->getSymbolName(COFFSym, Name);
|
||||
}
|
||||
|
||||
error("relocation against symbol in discarded section: " + Name +
|
||||
getSymbolLocations(File, Rel.SymbolTableIndex));
|
||||
}
|
||||
|
||||
void SectionChunk::writeTo(uint8_t *Buf) const {
|
||||
if (!hasData())
|
||||
return;
|
||||
@ -302,46 +352,40 @@ void SectionChunk::writeTo(uint8_t *Buf) const {
|
||||
|
||||
// Apply relocations.
|
||||
size_t InputSize = getSize();
|
||||
for (const coff_relocation &Rel : Relocs) {
|
||||
for (size_t I = 0, E = Relocs.size(); I < E; I++) {
|
||||
const coff_relocation &Rel = Relocs[I];
|
||||
|
||||
// Check for an invalid relocation offset. This check isn't perfect, because
|
||||
// we don't have the relocation size, which is only known after checking the
|
||||
// machine and relocation type. As a result, a relocation may overwrite the
|
||||
// beginning of the following input section.
|
||||
if (Rel.VirtualAddress >= InputSize)
|
||||
fatal("relocation points beyond the end of its parent section");
|
||||
if (Rel.VirtualAddress >= InputSize) {
|
||||
error("relocation points beyond the end of its parent section");
|
||||
continue;
|
||||
}
|
||||
|
||||
uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress;
|
||||
|
||||
// Use the potentially remapped Symbol instead of the one that the
|
||||
// relocation points to.
|
||||
auto *Sym = dyn_cast_or_null<Defined>(RelocTargets[I]);
|
||||
|
||||
// Get the output section of the symbol for this relocation. The output
|
||||
// section is needed to compute SECREL and SECTION relocations used in debug
|
||||
// info.
|
||||
auto *Sym =
|
||||
dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex));
|
||||
if (!Sym) {
|
||||
if (isCodeView() || isDWARF())
|
||||
continue;
|
||||
// Symbols in early discarded sections are represented using null pointers,
|
||||
// so we need to retrieve the name from the object file.
|
||||
COFFSymbolRef Sym =
|
||||
check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex));
|
||||
StringRef Name;
|
||||
File->getCOFFObj()->getSymbolName(Sym, Name);
|
||||
fatal("relocation against symbol in discarded section: " + Name);
|
||||
}
|
||||
Chunk *C = Sym->getChunk();
|
||||
Chunk *C = Sym ? Sym->getChunk() : nullptr;
|
||||
OutputSection *OS = C ? C->getOutputSection() : nullptr;
|
||||
|
||||
// Only absolute and __ImageBase symbols lack an output section. For any
|
||||
// other symbol, this indicates that the chunk was discarded. Normally
|
||||
// relocations against discarded sections are an error. However, debug info
|
||||
// sections are not GC roots and can end up with these kinds of relocations.
|
||||
// Skip these relocations.
|
||||
if (!OS && !isa<DefinedAbsolute>(Sym) && !isa<DefinedSynthetic>(Sym)) {
|
||||
if (isCodeView() || isDWARF())
|
||||
continue;
|
||||
fatal("relocation against symbol in discarded section: " +
|
||||
Sym->getName());
|
||||
// Skip the relocation if it refers to a discarded section, and diagnose it
|
||||
// as an error if appropriate. If a symbol was discarded early, it may be
|
||||
// null. If it was discarded late, the output section will be null, unless
|
||||
// it was an absolute or synthetic symbol.
|
||||
if (!Sym ||
|
||||
(!OS && !isa<DefinedAbsolute>(Sym) && !isa<DefinedSynthetic>(Sym))) {
|
||||
maybeReportRelocationToDiscarded(this, Sym, Rel);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t S = Sym->getRVA();
|
||||
|
||||
// Compute the RVA of the relocation for relative relocations.
|
||||
@ -399,17 +443,125 @@ static uint8_t getBaserelType(const coff_relocation &Rel) {
|
||||
// fixed by the loader if load-time relocation is needed.
|
||||
// Only called when base relocation is enabled.
|
||||
void SectionChunk::getBaserels(std::vector<Baserel> *Res) {
|
||||
for (const coff_relocation &Rel : Relocs) {
|
||||
for (size_t I = 0, E = Relocs.size(); I < E; I++) {
|
||||
const coff_relocation &Rel = Relocs[I];
|
||||
uint8_t Ty = getBaserelType(Rel);
|
||||
if (Ty == IMAGE_REL_BASED_ABSOLUTE)
|
||||
continue;
|
||||
Symbol *Target = File->getSymbol(Rel.SymbolTableIndex);
|
||||
// Use the potentially remapped Symbol instead of the one that the
|
||||
// relocation points to.
|
||||
Symbol *Target = RelocTargets[I];
|
||||
if (!Target || isa<DefinedAbsolute>(Target))
|
||||
continue;
|
||||
Res->emplace_back(RVA + Rel.VirtualAddress, Ty);
|
||||
}
|
||||
}
|
||||
|
||||
// MinGW specific.
|
||||
// Check whether a static relocation of type Type can be deferred and
|
||||
// handled at runtime as a pseudo relocation (for references to a module
|
||||
// local variable, which turned out to actually need to be imported from
|
||||
// another DLL) This returns the size the relocation is supposed to update,
|
||||
// in bits, or 0 if the relocation cannot be handled as a runtime pseudo
|
||||
// relocation.
|
||||
static int getRuntimePseudoRelocSize(uint16_t Type) {
|
||||
// Relocations that either contain an absolute address, or a plain
|
||||
// relative offset, since the runtime pseudo reloc implementation
|
||||
// adds 8/16/32/64 bit values to a memory address.
|
||||
//
|
||||
// Given a pseudo relocation entry,
|
||||
//
|
||||
// typedef struct {
|
||||
// DWORD sym;
|
||||
// DWORD target;
|
||||
// DWORD flags;
|
||||
// } runtime_pseudo_reloc_item_v2;
|
||||
//
|
||||
// the runtime relocation performs this adjustment:
|
||||
// *(base + .target) += *(base + .sym) - (base + .sym)
|
||||
//
|
||||
// This works for both absolute addresses (IMAGE_REL_*_ADDR32/64,
|
||||
// IMAGE_REL_I386_DIR32, where the memory location initially contains
|
||||
// the address of the IAT slot, and for relative addresses (IMAGE_REL*_REL32),
|
||||
// where the memory location originally contains the relative offset to the
|
||||
// IAT slot.
|
||||
//
|
||||
// This requires the target address to be writable, either directly out of
|
||||
// the image, or temporarily changed at runtime with VirtualProtect.
|
||||
// Since this only operates on direct address values, it doesn't work for
|
||||
// ARM/ARM64 relocations, other than the plain ADDR32/ADDR64 relocations.
|
||||
switch (Config->Machine) {
|
||||
case AMD64:
|
||||
switch (Type) {
|
||||
case IMAGE_REL_AMD64_ADDR64:
|
||||
return 64;
|
||||
case IMAGE_REL_AMD64_ADDR32:
|
||||
case IMAGE_REL_AMD64_REL32:
|
||||
case IMAGE_REL_AMD64_REL32_1:
|
||||
case IMAGE_REL_AMD64_REL32_2:
|
||||
case IMAGE_REL_AMD64_REL32_3:
|
||||
case IMAGE_REL_AMD64_REL32_4:
|
||||
case IMAGE_REL_AMD64_REL32_5:
|
||||
return 32;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
case I386:
|
||||
switch (Type) {
|
||||
case IMAGE_REL_I386_DIR32:
|
||||
case IMAGE_REL_I386_REL32:
|
||||
return 32;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
case ARMNT:
|
||||
switch (Type) {
|
||||
case IMAGE_REL_ARM_ADDR32:
|
||||
return 32;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
case ARM64:
|
||||
switch (Type) {
|
||||
case IMAGE_REL_ARM64_ADDR64:
|
||||
return 64;
|
||||
case IMAGE_REL_ARM64_ADDR32:
|
||||
return 32;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("unknown machine type");
|
||||
}
|
||||
}
|
||||
|
||||
// MinGW specific.
|
||||
// Append information to the provided vector about all relocations that
|
||||
// need to be handled at runtime as runtime pseudo relocations (references
|
||||
// to a module local variable, which turned out to actually need to be
|
||||
// imported from another DLL).
|
||||
void SectionChunk::getRuntimePseudoRelocs(
|
||||
std::vector<RuntimePseudoReloc> &Res) {
|
||||
for (const coff_relocation &Rel : Relocs) {
|
||||
auto *Target =
|
||||
dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex));
|
||||
if (!Target || !Target->IsRuntimePseudoReloc)
|
||||
continue;
|
||||
int SizeInBits = getRuntimePseudoRelocSize(Rel.Type);
|
||||
if (SizeInBits == 0) {
|
||||
error("unable to automatically import from " + Target->getName() +
|
||||
" with relocation type " +
|
||||
File->getCOFFObj()->getRelocationTypeName(Rel.Type) + " in " +
|
||||
toString(File));
|
||||
continue;
|
||||
}
|
||||
// SizeInBits is used to initialize the Flags field; currently no
|
||||
// other flags are defined.
|
||||
Res.emplace_back(
|
||||
RuntimePseudoReloc(Target, this, Rel.VirtualAddress, SizeInBits));
|
||||
}
|
||||
}
|
||||
|
||||
bool SectionChunk::hasData() const {
|
||||
return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA);
|
||||
}
|
||||
@ -447,6 +599,13 @@ void SectionChunk::replace(SectionChunk *Other) {
|
||||
Other->Live = false;
|
||||
}
|
||||
|
||||
uint32_t SectionChunk::getSectionNumber() const {
|
||||
DataRefImpl R;
|
||||
R.p = reinterpret_cast<uintptr_t>(Header);
|
||||
SectionRef S(R, File->getCOFFObj());
|
||||
return S.getIndex() + 1;
|
||||
}
|
||||
|
||||
CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) {
|
||||
// Common symbols are aligned on natural boundaries up to 32 bytes.
|
||||
// This is what MSVC link.exe does.
|
||||
@ -460,6 +619,7 @@ uint32_t CommonChunk::getOutputCharacteristics() const {
|
||||
|
||||
void StringChunk::writeTo(uint8_t *Buf) const {
|
||||
memcpy(Buf + OutputSectionOff, Str.data(), Str.size());
|
||||
Buf[OutputSectionOff + Str.size()] = '\0';
|
||||
}
|
||||
|
||||
ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) {
|
||||
@ -502,13 +662,50 @@ void ImportThunkChunkARM64::writeTo(uint8_t *Buf) const {
|
||||
applyArm64Ldr(Buf + OutputSectionOff + 4, Off);
|
||||
}
|
||||
|
||||
// A Thumb2, PIC, non-interworking range extension thunk.
|
||||
const uint8_t ArmThunk[] = {
|
||||
0x40, 0xf2, 0x00, 0x0c, // P: movw ip,:lower16:S - (P + (L1-P) + 4)
|
||||
0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P) + 4)
|
||||
0xe7, 0x44, // L1: add pc, ip
|
||||
};
|
||||
|
||||
size_t RangeExtensionThunkARM::getSize() const {
|
||||
assert(Config->Machine == ARMNT);
|
||||
return sizeof(ArmThunk);
|
||||
}
|
||||
|
||||
void RangeExtensionThunkARM::writeTo(uint8_t *Buf) const {
|
||||
assert(Config->Machine == ARMNT);
|
||||
uint64_t Offset = Target->getRVA() - RVA - 12;
|
||||
memcpy(Buf + OutputSectionOff, ArmThunk, sizeof(ArmThunk));
|
||||
applyMOV32T(Buf + OutputSectionOff, uint32_t(Offset));
|
||||
}
|
||||
|
||||
// A position independent ARM64 adrp+add thunk, with a maximum range of
|
||||
// +/- 4 GB, which is enough for any PE-COFF.
|
||||
const uint8_t Arm64Thunk[] = {
|
||||
0x10, 0x00, 0x00, 0x90, // adrp x16, Dest
|
||||
0x10, 0x02, 0x00, 0x91, // add x16, x16, :lo12:Dest
|
||||
0x00, 0x02, 0x1f, 0xd6, // br x16
|
||||
};
|
||||
|
||||
size_t RangeExtensionThunkARM64::getSize() const {
|
||||
assert(Config->Machine == ARM64);
|
||||
return sizeof(Arm64Thunk);
|
||||
}
|
||||
|
||||
void RangeExtensionThunkARM64::writeTo(uint8_t *Buf) const {
|
||||
assert(Config->Machine == ARM64);
|
||||
memcpy(Buf + OutputSectionOff, Arm64Thunk, sizeof(Arm64Thunk));
|
||||
applyArm64Addr(Buf + OutputSectionOff + 0, Target->getRVA(), RVA, 12);
|
||||
applyArm64Imm(Buf + OutputSectionOff + 4, Target->getRVA() & 0xfff, 0);
|
||||
}
|
||||
|
||||
void LocalImportChunk::getBaserels(std::vector<Baserel> *Res) {
|
||||
Res->emplace_back(getRVA());
|
||||
}
|
||||
|
||||
size_t LocalImportChunk::getSize() const {
|
||||
return Config->is64() ? 8 : 4;
|
||||
}
|
||||
size_t LocalImportChunk::getSize() const { return Config->Wordsize; }
|
||||
|
||||
void LocalImportChunk::writeTo(uint8_t *Buf) const {
|
||||
if (Config->is64()) {
|
||||
@ -528,6 +725,34 @@ void RVATableChunk::writeTo(uint8_t *Buf) const {
|
||||
"RVA tables should be de-duplicated");
|
||||
}
|
||||
|
||||
// MinGW specific, for the "automatic import of variables from DLLs" feature.
|
||||
size_t PseudoRelocTableChunk::getSize() const {
|
||||
if (Relocs.empty())
|
||||
return 0;
|
||||
return 12 + 12 * Relocs.size();
|
||||
}
|
||||
|
||||
// MinGW specific.
|
||||
void PseudoRelocTableChunk::writeTo(uint8_t *Buf) const {
|
||||
if (Relocs.empty())
|
||||
return;
|
||||
|
||||
ulittle32_t *Table = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff);
|
||||
// This is the list header, to signal the runtime pseudo relocation v2
|
||||
// format.
|
||||
Table[0] = 0;
|
||||
Table[1] = 0;
|
||||
Table[2] = 1;
|
||||
|
||||
size_t Idx = 3;
|
||||
for (const RuntimePseudoReloc &RPR : Relocs) {
|
||||
Table[Idx + 0] = RPR.Sym->getRVA();
|
||||
Table[Idx + 1] = RPR.Target->getRVA() + RPR.TargetOffset;
|
||||
Table[Idx + 2] = RPR.Flags;
|
||||
Idx += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// Windows-specific. This class represents a block in .reloc section.
|
||||
// The format is described here.
|
||||
//
|
||||
@ -613,13 +838,16 @@ void MergeChunk::addSection(SectionChunk *C) {
|
||||
}
|
||||
|
||||
void MergeChunk::finalizeContents() {
|
||||
for (SectionChunk *C : Sections)
|
||||
if (C->isLive())
|
||||
Builder.add(toStringRef(C->getContents()));
|
||||
Builder.finalize();
|
||||
if (!Finalized) {
|
||||
for (SectionChunk *C : Sections)
|
||||
if (C->Live)
|
||||
Builder.add(toStringRef(C->getContents()));
|
||||
Builder.finalize();
|
||||
Finalized = true;
|
||||
}
|
||||
|
||||
for (SectionChunk *C : Sections) {
|
||||
if (!C->isLive())
|
||||
if (!C->Live)
|
||||
continue;
|
||||
size_t Off = Builder.getOffset(toStringRef(C->getContents()));
|
||||
C->setOutputSection(Out);
|
||||
@ -640,5 +868,16 @@ void MergeChunk::writeTo(uint8_t *Buf) const {
|
||||
Builder.write(Buf + OutputSectionOff);
|
||||
}
|
||||
|
||||
// MinGW specific.
|
||||
size_t AbsolutePointerChunk::getSize() const { return Config->Wordsize; }
|
||||
|
||||
void AbsolutePointerChunk::writeTo(uint8_t *Buf) const {
|
||||
if (Config->is64()) {
|
||||
write64le(Buf + OutputSectionOff, Value);
|
||||
} else {
|
||||
write32le(Buf + OutputSectionOff, Value);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace coff
|
||||
} // namespace lld
|
||||
|
||||
130
deps/lld/COFF/Chunks.h
vendored
130
deps/lld/COFF/Chunks.h
vendored
@ -36,6 +36,7 @@ class DefinedImportData;
|
||||
class DefinedRegular;
|
||||
class ObjFile;
|
||||
class OutputSection;
|
||||
class RuntimePseudoReloc;
|
||||
class Symbol;
|
||||
|
||||
// Mask for permissions (discardable, writable, readable, executable, etc).
|
||||
@ -63,6 +64,13 @@ public:
|
||||
// before calling this function.
|
||||
virtual void writeTo(uint8_t *Buf) const {}
|
||||
|
||||
// Called by the writer once before assigning addresses and writing
|
||||
// the output.
|
||||
virtual void readRelocTargets() {}
|
||||
|
||||
// Called if restarting thunk addition.
|
||||
virtual void resetRelocTargets() {}
|
||||
|
||||
// Called by the writer after an RVA is assigned, but before calling
|
||||
// getSize().
|
||||
virtual void finalizeContents() {}
|
||||
@ -114,6 +122,10 @@ protected:
|
||||
public:
|
||||
// The offset from beginning of the output section. The writer sets a value.
|
||||
uint64_t OutputSectionOff = 0;
|
||||
|
||||
// Whether this section needs to be kept distinct from other sections during
|
||||
// ICF. This is set by the driver using address-significance tables.
|
||||
bool KeepUnique = false;
|
||||
};
|
||||
|
||||
// A chunk corresponding a section of an input file.
|
||||
@ -140,6 +152,8 @@ public:
|
||||
|
||||
SectionChunk(ObjFile *File, const coff_section *Header);
|
||||
static bool classof(const Chunk *C) { return C->kind() == SectionKind; }
|
||||
void readRelocTargets() override;
|
||||
void resetRelocTargets() override;
|
||||
size_t getSize() const override { return Header->SizeOfRawData; }
|
||||
ArrayRef<uint8_t> getContents() const;
|
||||
void writeTo(uint8_t *Buf) const override;
|
||||
@ -157,6 +171,8 @@ public:
|
||||
void applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S,
|
||||
uint64_t P) const;
|
||||
|
||||
void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &Res);
|
||||
|
||||
// Called if the garbage collector decides to not include this chunk
|
||||
// in a final output. It's supposed to print out a log message to stdout.
|
||||
void printDiscardedMessage() const;
|
||||
@ -167,16 +183,6 @@ public:
|
||||
|
||||
StringRef getDebugName() override;
|
||||
|
||||
// Returns true if the chunk was not dropped by GC.
|
||||
bool isLive() { return Live; }
|
||||
|
||||
// Used by the garbage collector.
|
||||
void markLive() {
|
||||
assert(Config->DoGC && "should only mark things live from GC");
|
||||
assert(!isLive() && "Cannot mark an already live section!");
|
||||
Live = true;
|
||||
}
|
||||
|
||||
// True if this is a codeview debug info chunk. These will not be laid out in
|
||||
// the image. Instead they will end up in the PDB, if one is requested.
|
||||
bool isCodeView() const {
|
||||
@ -197,10 +203,13 @@ public:
|
||||
// Allow iteration over the associated child chunks for this section.
|
||||
ArrayRef<SectionChunk *> children() const { return AssocChildren; }
|
||||
|
||||
// The section ID this chunk belongs to in its Obj.
|
||||
uint32_t getSectionNumber() const;
|
||||
|
||||
// A pointer pointing to a replacement for this chunk.
|
||||
// Initially it points to "this" object. If this chunk is merged
|
||||
// with other chunk by ICF, it points to another chunk,
|
||||
// and this chunk is considrered as dead.
|
||||
// and this chunk is considered as dead.
|
||||
SectionChunk *Repl;
|
||||
|
||||
// The CRC of the contents as described in the COFF spec 4.5.5.
|
||||
@ -217,13 +226,17 @@ public:
|
||||
|
||||
ArrayRef<coff_relocation> Relocs;
|
||||
|
||||
// Used by the garbage collector.
|
||||
bool Live;
|
||||
|
||||
// When inserting a thunk, we need to adjust a relocation to point to
|
||||
// the thunk instead of the actual original target Symbol.
|
||||
std::vector<Symbol *> RelocTargets;
|
||||
|
||||
private:
|
||||
StringRef SectionName;
|
||||
std::vector<SectionChunk *> AssocChildren;
|
||||
|
||||
// Used by the garbage collector.
|
||||
bool Live;
|
||||
|
||||
// Used for ICF (Identical COMDAT Folding)
|
||||
void replace(SectionChunk *Other);
|
||||
uint32_t Class[2] = {0, 0};
|
||||
@ -254,6 +267,7 @@ public:
|
||||
|
||||
private:
|
||||
llvm::StringTableBuilder Builder;
|
||||
bool Finalized = false;
|
||||
};
|
||||
|
||||
// A chunk for common symbols. Common chunks don't have actual data.
|
||||
@ -297,7 +311,7 @@ static const uint8_t ImportThunkARM64[] = {
|
||||
};
|
||||
|
||||
// Windows-specific.
|
||||
// A chunk for DLL import jump table entry. In a final output, it's
|
||||
// A chunk for DLL import jump table entry. In a final output, its
|
||||
// contents will be a JMP instruction to some __imp_ symbol.
|
||||
class ImportThunkChunkX64 : public Chunk {
|
||||
public:
|
||||
@ -341,11 +355,31 @@ private:
|
||||
Defined *ImpSymbol;
|
||||
};
|
||||
|
||||
class RangeExtensionThunkARM : public Chunk {
|
||||
public:
|
||||
explicit RangeExtensionThunkARM(Defined *T) : Target(T) {}
|
||||
size_t getSize() const override;
|
||||
void writeTo(uint8_t *Buf) const override;
|
||||
|
||||
Defined *Target;
|
||||
};
|
||||
|
||||
class RangeExtensionThunkARM64 : public Chunk {
|
||||
public:
|
||||
explicit RangeExtensionThunkARM64(Defined *T) : Target(T) {}
|
||||
size_t getSize() const override;
|
||||
void writeTo(uint8_t *Buf) const override;
|
||||
|
||||
Defined *Target;
|
||||
};
|
||||
|
||||
// Windows-specific.
|
||||
// See comments for DefinedLocalImport class.
|
||||
class LocalImportChunk : public Chunk {
|
||||
public:
|
||||
explicit LocalImportChunk(Defined *S) : Sym(S) {}
|
||||
explicit LocalImportChunk(Defined *S) : Sym(S) {
|
||||
Alignment = Config->Wordsize;
|
||||
}
|
||||
size_t getSize() const override;
|
||||
void getBaserels(std::vector<Baserel> *Res) override;
|
||||
void writeTo(uint8_t *Buf) const override;
|
||||
@ -414,9 +448,73 @@ public:
|
||||
uint8_t Type;
|
||||
};
|
||||
|
||||
// This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
|
||||
// specific place in a section, without any data. This is used for the MinGW
|
||||
// specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
|
||||
// of an empty chunk isn't MinGW specific.
|
||||
class EmptyChunk : public Chunk {
|
||||
public:
|
||||
EmptyChunk() {}
|
||||
size_t getSize() const override { return 0; }
|
||||
void writeTo(uint8_t *Buf) const override {}
|
||||
};
|
||||
|
||||
// MinGW specific, for the "automatic import of variables from DLLs" feature.
|
||||
// This provides the table of runtime pseudo relocations, for variable
|
||||
// references that turned out to need to be imported from a DLL even though
|
||||
// the reference didn't use the dllimport attribute. The MinGW runtime will
|
||||
// process this table after loading, before handling control over to user
|
||||
// code.
|
||||
class PseudoRelocTableChunk : public Chunk {
|
||||
public:
|
||||
PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &Relocs)
|
||||
: Relocs(std::move(Relocs)) {
|
||||
Alignment = 4;
|
||||
}
|
||||
size_t getSize() const override;
|
||||
void writeTo(uint8_t *Buf) const override;
|
||||
|
||||
private:
|
||||
std::vector<RuntimePseudoReloc> Relocs;
|
||||
};
|
||||
|
||||
// MinGW specific; information about one individual location in the image
|
||||
// that needs to be fixed up at runtime after loading. This represents
|
||||
// one individual element in the PseudoRelocTableChunk table.
|
||||
class RuntimePseudoReloc {
|
||||
public:
|
||||
RuntimePseudoReloc(Defined *Sym, SectionChunk *Target, uint32_t TargetOffset,
|
||||
int Flags)
|
||||
: Sym(Sym), Target(Target), TargetOffset(TargetOffset), Flags(Flags) {}
|
||||
|
||||
Defined *Sym;
|
||||
SectionChunk *Target;
|
||||
uint32_t TargetOffset;
|
||||
// The Flags field contains the size of the relocation, in bits. No other
|
||||
// flags are currently defined.
|
||||
int Flags;
|
||||
};
|
||||
|
||||
// MinGW specific. A Chunk that contains one pointer-sized absolute value.
|
||||
class AbsolutePointerChunk : public Chunk {
|
||||
public:
|
||||
AbsolutePointerChunk(uint64_t Value) : Value(Value) {
|
||||
Alignment = getSize();
|
||||
}
|
||||
size_t getSize() const override;
|
||||
void writeTo(uint8_t *Buf) const override;
|
||||
|
||||
private:
|
||||
uint64_t Value;
|
||||
};
|
||||
|
||||
void applyMOV32T(uint8_t *Off, uint32_t V);
|
||||
void applyBranch24T(uint8_t *Off, int32_t V);
|
||||
|
||||
void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift);
|
||||
void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit);
|
||||
void applyArm64Branch26(uint8_t *Off, int64_t V);
|
||||
|
||||
} // namespace coff
|
||||
} // namespace lld
|
||||
|
||||
|
||||
5
deps/lld/COFF/Config.h
vendored
5
deps/lld/COFF/Config.h
vendored
@ -84,6 +84,7 @@ struct Configuration {
|
||||
bool is64() { return Machine == AMD64 || Machine == ARM64; }
|
||||
|
||||
llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
|
||||
size_t Wordsize;
|
||||
bool Verbose = false;
|
||||
WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN;
|
||||
Symbol *Entry = nullptr;
|
||||
@ -94,7 +95,8 @@ struct Configuration {
|
||||
bool DoICF = true;
|
||||
bool TailMerge;
|
||||
bool Relocatable = true;
|
||||
bool Force = false;
|
||||
bool ForceMultiple = false;
|
||||
bool ForceUnresolved = false;
|
||||
bool Debug = false;
|
||||
bool DebugDwarf = false;
|
||||
bool DebugGHashes = false;
|
||||
@ -195,6 +197,7 @@ struct Configuration {
|
||||
bool MinGW = false;
|
||||
bool WarnMissingOrderSymbol = true;
|
||||
bool WarnLocallyDefinedImported = true;
|
||||
bool WarnDebugInfoUnusable = true;
|
||||
bool Incremental = true;
|
||||
bool IntegrityCheck = false;
|
||||
bool KillAt = false;
|
||||
|
||||
118
deps/lld/COFF/DLL.cpp
vendored
118
deps/lld/COFF/DLL.cpp
vendored
@ -35,8 +35,6 @@ namespace {
|
||||
|
||||
// Import table
|
||||
|
||||
static int ptrSize() { return Config->is64() ? 8 : 4; }
|
||||
|
||||
// A chunk for the import descriptor table.
|
||||
class HintNameChunk : public Chunk {
|
||||
public:
|
||||
@ -49,6 +47,7 @@ public:
|
||||
}
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
memset(Buf + OutputSectionOff, 0, getSize());
|
||||
write16le(Buf + OutputSectionOff, Hint);
|
||||
memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size());
|
||||
}
|
||||
@ -61,11 +60,14 @@ private:
|
||||
// A chunk for the import descriptor table.
|
||||
class LookupChunk : public Chunk {
|
||||
public:
|
||||
explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = ptrSize(); }
|
||||
size_t getSize() const override { return ptrSize(); }
|
||||
explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = Config->Wordsize; }
|
||||
size_t getSize() const override { return Config->Wordsize; }
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
write32le(Buf + OutputSectionOff, HintName->getRVA());
|
||||
if (Config->is64())
|
||||
write64le(Buf + OutputSectionOff, HintName->getRVA());
|
||||
else
|
||||
write32le(Buf + OutputSectionOff, HintName->getRVA());
|
||||
}
|
||||
|
||||
Chunk *HintName;
|
||||
@ -76,8 +78,10 @@ public:
|
||||
// See Microsoft PE/COFF spec 7.1. Import Header for details.
|
||||
class OrdinalOnlyChunk : public Chunk {
|
||||
public:
|
||||
explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) { Alignment = ptrSize(); }
|
||||
size_t getSize() const override { return ptrSize(); }
|
||||
explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {
|
||||
Alignment = Config->Wordsize;
|
||||
}
|
||||
size_t getSize() const override { return Config->Wordsize; }
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
// An import-by-ordinal slot has MSB 1 to indicate that
|
||||
@ -99,6 +103,8 @@ public:
|
||||
size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); }
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
memset(Buf + OutputSectionOff, 0, getSize());
|
||||
|
||||
auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff);
|
||||
E->ImportLookupTableRVA = LookupTab->getRVA();
|
||||
E->NameRVA = DLLName->getRVA();
|
||||
@ -118,6 +124,10 @@ public:
|
||||
bool hasData() const override { return false; }
|
||||
size_t getSize() const override { return Size; }
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
memset(Buf + OutputSectionOff, 0, Size);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t Size;
|
||||
};
|
||||
@ -160,6 +170,8 @@ public:
|
||||
}
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
memset(Buf + OutputSectionOff, 0, getSize());
|
||||
|
||||
auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff);
|
||||
E->Attributes = 1;
|
||||
E->Name = DLLName->getRVA();
|
||||
@ -230,6 +242,36 @@ static const uint8_t ThunkARM[] = {
|
||||
0x60, 0x47, // bx ip
|
||||
};
|
||||
|
||||
static const uint8_t ThunkARM64[] = {
|
||||
0x11, 0x00, 0x00, 0x90, // adrp x17, #0 __imp_<FUNCNAME>
|
||||
0x31, 0x02, 0x00, 0x91, // add x17, x17, #0 :lo12:__imp_<FUNCNAME>
|
||||
0xfd, 0x7b, 0xb3, 0xa9, // stp x29, x30, [sp, #-208]!
|
||||
0xfd, 0x03, 0x00, 0x91, // mov x29, sp
|
||||
0xe0, 0x07, 0x01, 0xa9, // stp x0, x1, [sp, #16]
|
||||
0xe2, 0x0f, 0x02, 0xa9, // stp x2, x3, [sp, #32]
|
||||
0xe4, 0x17, 0x03, 0xa9, // stp x4, x5, [sp, #48]
|
||||
0xe6, 0x1f, 0x04, 0xa9, // stp x6, x7, [sp, #64]
|
||||
0xe0, 0x87, 0x02, 0xad, // stp q0, q1, [sp, #80]
|
||||
0xe2, 0x8f, 0x03, 0xad, // stp q2, q3, [sp, #112]
|
||||
0xe4, 0x97, 0x04, 0xad, // stp q4, q5, [sp, #144]
|
||||
0xe6, 0x9f, 0x05, 0xad, // stp q6, q7, [sp, #176]
|
||||
0xe1, 0x03, 0x11, 0xaa, // mov x1, x17
|
||||
0x00, 0x00, 0x00, 0x90, // adrp x0, #0 DELAY_IMPORT_DESCRIPTOR
|
||||
0x00, 0x00, 0x00, 0x91, // add x0, x0, #0 :lo12:DELAY_IMPORT_DESCRIPTOR
|
||||
0x00, 0x00, 0x00, 0x94, // bl #0 __delayLoadHelper2
|
||||
0xf0, 0x03, 0x00, 0xaa, // mov x16, x0
|
||||
0xe6, 0x9f, 0x45, 0xad, // ldp q6, q7, [sp, #176]
|
||||
0xe4, 0x97, 0x44, 0xad, // ldp q4, q5, [sp, #144]
|
||||
0xe2, 0x8f, 0x43, 0xad, // ldp q2, q3, [sp, #112]
|
||||
0xe0, 0x87, 0x42, 0xad, // ldp q0, q1, [sp, #80]
|
||||
0xe6, 0x1f, 0x44, 0xa9, // ldp x6, x7, [sp, #64]
|
||||
0xe4, 0x17, 0x43, 0xa9, // ldp x4, x5, [sp, #48]
|
||||
0xe2, 0x0f, 0x42, 0xa9, // ldp x2, x3, [sp, #32]
|
||||
0xe0, 0x07, 0x41, 0xa9, // ldp x0, x1, [sp, #16]
|
||||
0xfd, 0x7b, 0xcd, 0xa8, // ldp x29, x30, [sp], #208
|
||||
0x00, 0x02, 0x1f, 0xd6, // br x16
|
||||
};
|
||||
|
||||
// A chunk for the delay import thunk.
|
||||
class ThunkChunkX64 : public Chunk {
|
||||
public:
|
||||
@ -298,11 +340,35 @@ public:
|
||||
Defined *Helper = nullptr;
|
||||
};
|
||||
|
||||
class ThunkChunkARM64 : public Chunk {
|
||||
public:
|
||||
ThunkChunkARM64(Defined *I, Chunk *D, Defined *H)
|
||||
: Imp(I), Desc(D), Helper(H) {}
|
||||
|
||||
size_t getSize() const override { return sizeof(ThunkARM64); }
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
memcpy(Buf + OutputSectionOff, ThunkARM64, sizeof(ThunkARM64));
|
||||
applyArm64Addr(Buf + OutputSectionOff + 0, Imp->getRVA(), RVA + 0, 12);
|
||||
applyArm64Imm(Buf + OutputSectionOff + 4, Imp->getRVA() & 0xfff, 0);
|
||||
applyArm64Addr(Buf + OutputSectionOff + 52, Desc->getRVA(), RVA + 52, 12);
|
||||
applyArm64Imm(Buf + OutputSectionOff + 56, Desc->getRVA() & 0xfff, 0);
|
||||
applyArm64Branch26(Buf + OutputSectionOff + 60,
|
||||
Helper->getRVA() - RVA - 60);
|
||||
}
|
||||
|
||||
Defined *Imp = nullptr;
|
||||
Chunk *Desc = nullptr;
|
||||
Defined *Helper = nullptr;
|
||||
};
|
||||
|
||||
// A chunk for the import descriptor table.
|
||||
class DelayAddressChunk : public Chunk {
|
||||
public:
|
||||
explicit DelayAddressChunk(Chunk *C) : Thunk(C) { Alignment = ptrSize(); }
|
||||
size_t getSize() const override { return ptrSize(); }
|
||||
explicit DelayAddressChunk(Chunk *C) : Thunk(C) {
|
||||
Alignment = Config->Wordsize;
|
||||
}
|
||||
size_t getSize() const override { return Config->Wordsize; }
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
if (Config->is64()) {
|
||||
@ -338,6 +404,8 @@ public:
|
||||
}
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
memset(Buf + OutputSectionOff, 0, getSize());
|
||||
|
||||
auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff);
|
||||
E->NameRVA = DLLName->getRVA();
|
||||
E->OrdinalBase = 0;
|
||||
@ -362,6 +430,8 @@ public:
|
||||
size_t getSize() const override { return Size * 4; }
|
||||
|
||||
void writeTo(uint8_t *Buf) const override {
|
||||
memset(Buf + OutputSectionOff, 0, getSize());
|
||||
|
||||
for (const Export &E : Config->Exports) {
|
||||
uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4;
|
||||
uint32_t Bit = 0;
|
||||
@ -418,30 +488,6 @@ private:
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
uint64_t IdataContents::getDirSize() {
|
||||
return Dirs.size() * sizeof(ImportDirectoryTableEntry);
|
||||
}
|
||||
|
||||
uint64_t IdataContents::getIATSize() {
|
||||
return Addresses.size() * ptrSize();
|
||||
}
|
||||
|
||||
// Returns a list of .idata contents.
|
||||
// See Microsoft PE/COFF spec 5.4 for details.
|
||||
std::vector<Chunk *> IdataContents::getChunks() {
|
||||
create();
|
||||
|
||||
// The loader assumes a specific order of data.
|
||||
// Add each type in the correct order.
|
||||
std::vector<Chunk *> V;
|
||||
V.insert(V.end(), Dirs.begin(), Dirs.end());
|
||||
V.insert(V.end(), Lookups.begin(), Lookups.end());
|
||||
V.insert(V.end(), Addresses.begin(), Addresses.end());
|
||||
V.insert(V.end(), Hints.begin(), Hints.end());
|
||||
V.insert(V.end(), DLLNames.begin(), DLLNames.end());
|
||||
return V;
|
||||
}
|
||||
|
||||
void IdataContents::create() {
|
||||
std::vector<std::vector<DefinedImportData *>> V = binImports(Imports);
|
||||
|
||||
@ -465,8 +511,8 @@ void IdataContents::create() {
|
||||
Hints.push_back(C);
|
||||
}
|
||||
// Terminate with null values.
|
||||
Lookups.push_back(make<NullChunk>(ptrSize()));
|
||||
Addresses.push_back(make<NullChunk>(ptrSize()));
|
||||
Lookups.push_back(make<NullChunk>(Config->Wordsize));
|
||||
Addresses.push_back(make<NullChunk>(Config->Wordsize));
|
||||
|
||||
for (int I = 0, E = Syms.size(); I < E; ++I)
|
||||
Syms[I]->setLocation(Addresses[Base + I]);
|
||||
@ -555,6 +601,8 @@ Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) {
|
||||
return make<ThunkChunkX86>(S, Dir, Helper);
|
||||
case ARMNT:
|
||||
return make<ThunkChunkARM>(S, Dir, Helper);
|
||||
case ARM64:
|
||||
return make<ThunkChunkARM64>(S, Dir, Helper);
|
||||
default:
|
||||
llvm_unreachable("unsupported machine type");
|
||||
}
|
||||
|
||||
9
deps/lld/COFF/DLL.h
vendored
9
deps/lld/COFF/DLL.h
vendored
@ -19,19 +19,12 @@ namespace coff {
|
||||
// Windows-specific.
|
||||
// IdataContents creates all chunks for the DLL import table.
|
||||
// You are supposed to call add() to add symbols and then
|
||||
// call getChunks() to get a list of chunks.
|
||||
// call create() to populate the chunk vectors.
|
||||
class IdataContents {
|
||||
public:
|
||||
void add(DefinedImportData *Sym) { Imports.push_back(Sym); }
|
||||
bool empty() { return Imports.empty(); }
|
||||
std::vector<Chunk *> getChunks();
|
||||
|
||||
uint64_t getDirRVA() { return Dirs[0]->getRVA(); }
|
||||
uint64_t getDirSize();
|
||||
uint64_t getIATRVA() { return Addresses[0]->getRVA(); }
|
||||
uint64_t getIATSize();
|
||||
|
||||
private:
|
||||
void create();
|
||||
|
||||
std::vector<DefinedImportData *> Imports;
|
||||
|
||||
459
deps/lld/COFF/Driver.cpp
vendored
459
deps/lld/COFF/Driver.cpp
vendored
@ -32,6 +32,7 @@
|
||||
#include "llvm/Option/ArgList.h"
|
||||
#include "llvm/Option/Option.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/LEB128.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Support/Process.h"
|
||||
#include "llvm/Support/TarWriter.h"
|
||||
@ -56,7 +57,7 @@ Configuration *Config;
|
||||
LinkerDriver *Driver;
|
||||
|
||||
bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) {
|
||||
errorHandler().LogName = sys::path::filename(Args[0]);
|
||||
errorHandler().LogName = args::getFilenameWithoutExe(Args[0]);
|
||||
errorHandler().ErrorOS = &Diag;
|
||||
errorHandler().ColorDiagnostics = Diag.has_colors();
|
||||
errorHandler().ErrorLimitExceededMsg =
|
||||
@ -370,13 +371,30 @@ Optional<StringRef> LinkerDriver::findFile(StringRef Filename) {
|
||||
return Path;
|
||||
}
|
||||
|
||||
// MinGW specific. If an embedded directive specified to link to
|
||||
// foo.lib, but it isn't found, try libfoo.a instead.
|
||||
StringRef LinkerDriver::doFindLibMinGW(StringRef Filename) {
|
||||
if (Filename.contains('/') || Filename.contains('\\'))
|
||||
return Filename;
|
||||
|
||||
SmallString<128> S = Filename;
|
||||
sys::path::replace_extension(S, ".a");
|
||||
StringRef LibName = Saver.save("lib" + S.str());
|
||||
return doFindFile(LibName);
|
||||
}
|
||||
|
||||
// Find library file from search path.
|
||||
StringRef LinkerDriver::doFindLib(StringRef Filename) {
|
||||
// Add ".lib" to Filename if that has no file extension.
|
||||
bool HasExt = Filename.contains('.');
|
||||
if (!HasExt)
|
||||
Filename = Saver.save(Filename + ".lib");
|
||||
return doFindFile(Filename);
|
||||
StringRef Ret = doFindFile(Filename);
|
||||
// For MinGW, if the find above didn't turn up anything, try
|
||||
// looking for a MinGW formatted library name.
|
||||
if (Config->MinGW && Ret == Filename)
|
||||
return doFindLibMinGW(Filename);
|
||||
return Ret;
|
||||
}
|
||||
|
||||
// Resolves a library path. /nodefaultlib options are taken into
|
||||
@ -429,29 +447,48 @@ StringRef LinkerDriver::findDefaultEntry() {
|
||||
assert(Config->Subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
|
||||
"must handle /subsystem before calling this");
|
||||
|
||||
// As a special case, if /nodefaultlib is given, we directly look for an
|
||||
// entry point. This is because, if no default library is linked, users
|
||||
// need to define an entry point instead of a "main".
|
||||
bool FindMain = !Config->NoDefaultLibAll;
|
||||
if (Config->MinGW)
|
||||
return mangle(Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
|
||||
? "WinMainCRTStartup"
|
||||
: "mainCRTStartup");
|
||||
|
||||
if (Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
|
||||
if (findUnderscoreMangle(FindMain ? "WinMain" : "WinMainCRTStartup"))
|
||||
return mangle("WinMainCRTStartup");
|
||||
if (findUnderscoreMangle(FindMain ? "wWinMain" : "wWinMainCRTStartup"))
|
||||
return mangle("wWinMainCRTStartup");
|
||||
if (findUnderscoreMangle("wWinMain")) {
|
||||
if (!findUnderscoreMangle("WinMain"))
|
||||
return mangle("wWinMainCRTStartup");
|
||||
warn("found both wWinMain and WinMain; using latter");
|
||||
}
|
||||
return mangle("WinMainCRTStartup");
|
||||
}
|
||||
if (findUnderscoreMangle(FindMain ? "main" : "mainCRTStartup"))
|
||||
return mangle("mainCRTStartup");
|
||||
if (findUnderscoreMangle(FindMain ? "wmain" : "wmainCRTStartup"))
|
||||
return mangle("wmainCRTStartup");
|
||||
return "";
|
||||
if (findUnderscoreMangle("wmain")) {
|
||||
if (!findUnderscoreMangle("main"))
|
||||
return mangle("wmainCRTStartup");
|
||||
warn("found both wmain and main; using latter");
|
||||
}
|
||||
return mangle("mainCRTStartup");
|
||||
}
|
||||
|
||||
WindowsSubsystem LinkerDriver::inferSubsystem() {
|
||||
if (Config->DLL)
|
||||
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
|
||||
if (findUnderscoreMangle("main") || findUnderscoreMangle("wmain"))
|
||||
if (Config->MinGW)
|
||||
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
|
||||
if (findUnderscoreMangle("WinMain") || findUnderscoreMangle("wWinMain"))
|
||||
// Note that link.exe infers the subsystem from the presence of these
|
||||
// functions even if /entry: or /nodefaultlib are passed which causes them
|
||||
// to not be called.
|
||||
bool HaveMain = findUnderscoreMangle("main");
|
||||
bool HaveWMain = findUnderscoreMangle("wmain");
|
||||
bool HaveWinMain = findUnderscoreMangle("WinMain");
|
||||
bool HaveWWinMain = findUnderscoreMangle("wWinMain");
|
||||
if (HaveMain || HaveWMain) {
|
||||
if (HaveWinMain || HaveWWinMain) {
|
||||
warn(std::string("found ") + (HaveMain ? "main" : "wmain") + " and " +
|
||||
(HaveWinMain ? "WinMain" : "wWinMain") +
|
||||
"; defaulting to /subsystem:console");
|
||||
}
|
||||
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
|
||||
}
|
||||
if (HaveWinMain || HaveWWinMain)
|
||||
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
|
||||
return IMAGE_SUBSYSTEM_UNKNOWN;
|
||||
}
|
||||
@ -497,26 +534,65 @@ static std::string createResponseFile(const opt::InputArgList &Args,
|
||||
return Data.str();
|
||||
}
|
||||
|
||||
static unsigned getDefaultDebugType(const opt::InputArgList &Args) {
|
||||
unsigned DebugTypes = static_cast<unsigned>(DebugType::CV);
|
||||
enum class DebugKind { Unknown, None, Full, FastLink, GHash, Dwarf, Symtab };
|
||||
|
||||
static DebugKind parseDebugKind(const opt::InputArgList &Args) {
|
||||
auto *A = Args.getLastArg(OPT_debug, OPT_debug_opt);
|
||||
if (!A)
|
||||
return DebugKind::None;
|
||||
if (A->getNumValues() == 0)
|
||||
return DebugKind::Full;
|
||||
|
||||
DebugKind Debug = StringSwitch<DebugKind>(A->getValue())
|
||||
.CaseLower("none", DebugKind::None)
|
||||
.CaseLower("full", DebugKind::Full)
|
||||
.CaseLower("fastlink", DebugKind::FastLink)
|
||||
// LLD extensions
|
||||
.CaseLower("ghash", DebugKind::GHash)
|
||||
.CaseLower("dwarf", DebugKind::Dwarf)
|
||||
.CaseLower("symtab", DebugKind::Symtab)
|
||||
.Default(DebugKind::Unknown);
|
||||
|
||||
if (Debug == DebugKind::FastLink) {
|
||||
warn("/debug:fastlink unsupported; using /debug:full");
|
||||
return DebugKind::Full;
|
||||
}
|
||||
if (Debug == DebugKind::Unknown) {
|
||||
error("/debug: unknown option: " + Twine(A->getValue()));
|
||||
return DebugKind::None;
|
||||
}
|
||||
return Debug;
|
||||
}
|
||||
|
||||
static unsigned parseDebugTypes(const opt::InputArgList &Args) {
|
||||
unsigned DebugTypes = static_cast<unsigned>(DebugType::None);
|
||||
|
||||
if (auto *A = Args.getLastArg(OPT_debugtype)) {
|
||||
SmallVector<StringRef, 3> Types;
|
||||
A->getSpelling().split(Types, ',', /*KeepEmpty=*/false);
|
||||
|
||||
for (StringRef Type : Types) {
|
||||
unsigned V = StringSwitch<unsigned>(Type.lower())
|
||||
.Case("cv", static_cast<unsigned>(DebugType::CV))
|
||||
.Case("pdata", static_cast<unsigned>(DebugType::PData))
|
||||
.Case("fixup", static_cast<unsigned>(DebugType::Fixup))
|
||||
.Default(0);
|
||||
if (V == 0) {
|
||||
warn("/debugtype: unknown option: " + Twine(A->getValue()));
|
||||
continue;
|
||||
}
|
||||
DebugTypes |= V;
|
||||
}
|
||||
return DebugTypes;
|
||||
}
|
||||
|
||||
// Default debug types
|
||||
DebugTypes = static_cast<unsigned>(DebugType::CV);
|
||||
if (Args.hasArg(OPT_driver))
|
||||
DebugTypes |= static_cast<unsigned>(DebugType::PData);
|
||||
if (Args.hasArg(OPT_profile))
|
||||
DebugTypes |= static_cast<unsigned>(DebugType::Fixup);
|
||||
return DebugTypes;
|
||||
}
|
||||
|
||||
static unsigned parseDebugType(StringRef Arg) {
|
||||
SmallVector<StringRef, 3> Types;
|
||||
Arg.split(Types, ',', /*KeepEmpty=*/false);
|
||||
|
||||
unsigned DebugTypes = static_cast<unsigned>(DebugType::None);
|
||||
for (StringRef Type : Types)
|
||||
DebugTypes |= StringSwitch<unsigned>(Type.lower())
|
||||
.Case("cv", static_cast<unsigned>(DebugType::CV))
|
||||
.Case("pdata", static_cast<unsigned>(DebugType::PData))
|
||||
.Case("fixup", static_cast<unsigned>(DebugType::Fixup))
|
||||
.Default(0);
|
||||
return DebugTypes;
|
||||
}
|
||||
|
||||
@ -676,131 +752,6 @@ static void parseModuleDefs(StringRef Path) {
|
||||
}
|
||||
}
|
||||
|
||||
// A helper function for filterBitcodeFiles.
|
||||
static bool needsRebuilding(MemoryBufferRef MB) {
|
||||
// The MSVC linker doesn't support thin archives, so if it's a thin
|
||||
// archive, we always need to rebuild it.
|
||||
std::unique_ptr<Archive> File =
|
||||
CHECK(Archive::create(MB), "Failed to read " + MB.getBufferIdentifier());
|
||||
if (File->isThin())
|
||||
return true;
|
||||
|
||||
// Returns true if the archive contains at least one bitcode file.
|
||||
for (MemoryBufferRef Member : getArchiveMembers(File.get()))
|
||||
if (identify_magic(Member.getBuffer()) == file_magic::bitcode)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Opens a given path as an archive file and removes bitcode files
|
||||
// from them if exists. This function is to appease the MSVC linker as
|
||||
// their linker doesn't like archive files containing non-native
|
||||
// object files.
|
||||
//
|
||||
// If a given archive doesn't contain bitcode files, the archive path
|
||||
// is returned as-is. Otherwise, a new temporary file is created and
|
||||
// its path is returned.
|
||||
static Optional<std::string>
|
||||
filterBitcodeFiles(StringRef Path, std::vector<std::string> &TemporaryFiles) {
|
||||
std::unique_ptr<MemoryBuffer> MB = CHECK(
|
||||
MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path);
|
||||
MemoryBufferRef MBRef = MB->getMemBufferRef();
|
||||
file_magic Magic = identify_magic(MBRef.getBuffer());
|
||||
|
||||
if (Magic == file_magic::bitcode)
|
||||
return None;
|
||||
if (Magic != file_magic::archive)
|
||||
return Path.str();
|
||||
if (!needsRebuilding(MBRef))
|
||||
return Path.str();
|
||||
|
||||
std::unique_ptr<Archive> File =
|
||||
CHECK(Archive::create(MBRef),
|
||||
MBRef.getBufferIdentifier() + ": failed to parse archive");
|
||||
|
||||
std::vector<NewArchiveMember> New;
|
||||
for (MemoryBufferRef Member : getArchiveMembers(File.get()))
|
||||
if (identify_magic(Member.getBuffer()) != file_magic::bitcode)
|
||||
New.emplace_back(Member);
|
||||
|
||||
if (New.empty())
|
||||
return None;
|
||||
|
||||
log("Creating a temporary archive for " + Path + " to remove bitcode files");
|
||||
|
||||
SmallString<128> S;
|
||||
if (std::error_code EC = sys::fs::createTemporaryFile(
|
||||
"lld-" + sys::path::stem(Path), ".lib", S))
|
||||
fatal("cannot create a temporary file: " + EC.message());
|
||||
std::string Temp = S.str();
|
||||
TemporaryFiles.push_back(Temp);
|
||||
|
||||
Error E =
|
||||
llvm::writeArchive(Temp, New, /*WriteSymtab=*/true, Archive::Kind::K_GNU,
|
||||
/*Deterministics=*/true,
|
||||
/*Thin=*/false);
|
||||
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
|
||||
error("failed to create a new archive " + S.str() + ": " + EI.message());
|
||||
});
|
||||
return Temp;
|
||||
}
|
||||
|
||||
// Create response file contents and invoke the MSVC linker.
|
||||
void LinkerDriver::invokeMSVC(opt::InputArgList &Args) {
|
||||
std::string Rsp = "/nologo\n";
|
||||
std::vector<std::string> Temps;
|
||||
|
||||
// Write out archive members that we used in symbol resolution and pass these
|
||||
// to MSVC before any archives, so that MSVC uses the same objects to satisfy
|
||||
// references.
|
||||
for (ObjFile *Obj : ObjFile::Instances) {
|
||||
if (Obj->ParentName.empty())
|
||||
continue;
|
||||
SmallString<128> S;
|
||||
int Fd;
|
||||
if (auto EC = sys::fs::createTemporaryFile(
|
||||
"lld-" + sys::path::filename(Obj->ParentName), ".obj", Fd, S))
|
||||
fatal("cannot create a temporary file: " + EC.message());
|
||||
raw_fd_ostream OS(Fd, /*shouldClose*/ true);
|
||||
OS << Obj->MB.getBuffer();
|
||||
Temps.push_back(S.str());
|
||||
Rsp += quote(S) + "\n";
|
||||
}
|
||||
|
||||
for (auto *Arg : Args) {
|
||||
switch (Arg->getOption().getID()) {
|
||||
case OPT_linkrepro:
|
||||
case OPT_lldmap:
|
||||
case OPT_lldmap_file:
|
||||
case OPT_lldsavetemps:
|
||||
case OPT_msvclto:
|
||||
// LLD-specific options are stripped.
|
||||
break;
|
||||
case OPT_opt:
|
||||
if (!StringRef(Arg->getValue()).startswith("lld"))
|
||||
Rsp += toString(*Arg) + " ";
|
||||
break;
|
||||
case OPT_INPUT: {
|
||||
if (Optional<StringRef> Path = doFindFile(Arg->getValue())) {
|
||||
if (Optional<std::string> S = filterBitcodeFiles(*Path, Temps))
|
||||
Rsp += quote(*S) + "\n";
|
||||
continue;
|
||||
}
|
||||
Rsp += quote(Arg->getValue()) + "\n";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Rsp += toString(*Arg) + "\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<StringRef> ObjFiles = Symtab->compileBitcodeFiles();
|
||||
runMSVCLinker(Rsp, ObjFiles);
|
||||
|
||||
for (StringRef Path : Temps)
|
||||
sys::fs::remove(Path);
|
||||
}
|
||||
|
||||
void LinkerDriver::enqueueTask(std::function<void()> Task) {
|
||||
TaskQueue.push_back(std::move(Task));
|
||||
}
|
||||
@ -856,6 +807,97 @@ static void parseOrderFile(StringRef Arg) {
|
||||
}
|
||||
}
|
||||
|
||||
static void markAddrsig(Symbol *S) {
|
||||
if (auto *D = dyn_cast_or_null<Defined>(S))
|
||||
if (Chunk *C = D->getChunk())
|
||||
C->KeepUnique = true;
|
||||
}
|
||||
|
||||
static void findKeepUniqueSections() {
|
||||
// Exported symbols could be address-significant in other executables or DSOs,
|
||||
// so we conservatively mark them as address-significant.
|
||||
for (Export &R : Config->Exports)
|
||||
markAddrsig(R.Sym);
|
||||
|
||||
// Visit the address-significance table in each object file and mark each
|
||||
// referenced symbol as address-significant.
|
||||
for (ObjFile *Obj : ObjFile::Instances) {
|
||||
ArrayRef<Symbol *> Syms = Obj->getSymbols();
|
||||
if (Obj->AddrsigSec) {
|
||||
ArrayRef<uint8_t> Contents;
|
||||
Obj->getCOFFObj()->getSectionContents(Obj->AddrsigSec, Contents);
|
||||
const uint8_t *Cur = Contents.begin();
|
||||
while (Cur != Contents.end()) {
|
||||
unsigned Size;
|
||||
const char *Err;
|
||||
uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
|
||||
if (Err)
|
||||
fatal(toString(Obj) + ": could not decode addrsig section: " + Err);
|
||||
if (SymIndex >= Syms.size())
|
||||
fatal(toString(Obj) + ": invalid symbol index in addrsig section");
|
||||
markAddrsig(Syms[SymIndex]);
|
||||
Cur += Size;
|
||||
}
|
||||
} else {
|
||||
// If an object file does not have an address-significance table,
|
||||
// conservatively mark all of its symbols as address-significant.
|
||||
for (Symbol *S : Syms)
|
||||
markAddrsig(S);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// link.exe replaces each %foo% in AltPath with the contents of environment
|
||||
// variable foo, and adds the two magic env vars _PDB (expands to the basename
|
||||
// of pdb's output path) and _EXT (expands to the extension of the output
|
||||
// binary).
|
||||
// lld only supports %_PDB% and %_EXT% and warns on references to all other env
|
||||
// vars.
|
||||
static void parsePDBAltPath(StringRef AltPath) {
|
||||
SmallString<128> Buf;
|
||||
StringRef PDBBasename =
|
||||
sys::path::filename(Config->PDBPath, sys::path::Style::windows);
|
||||
StringRef BinaryExtension =
|
||||
sys::path::extension(Config->OutputFile, sys::path::Style::windows);
|
||||
if (!BinaryExtension.empty())
|
||||
BinaryExtension = BinaryExtension.substr(1); // %_EXT% does not include '.'.
|
||||
|
||||
// Invariant:
|
||||
// +--------- Cursor ('a...' might be the empty string).
|
||||
// | +----- FirstMark
|
||||
// | | +- SecondMark
|
||||
// v v v
|
||||
// a...%...%...
|
||||
size_t Cursor = 0;
|
||||
while (Cursor < AltPath.size()) {
|
||||
size_t FirstMark, SecondMark;
|
||||
if ((FirstMark = AltPath.find('%', Cursor)) == StringRef::npos ||
|
||||
(SecondMark = AltPath.find('%', FirstMark + 1)) == StringRef::npos) {
|
||||
// Didn't find another full fragment, treat rest of string as literal.
|
||||
Buf.append(AltPath.substr(Cursor));
|
||||
break;
|
||||
}
|
||||
|
||||
// Found a full fragment. Append text in front of first %, and interpret
|
||||
// text between first and second % as variable name.
|
||||
Buf.append(AltPath.substr(Cursor, FirstMark - Cursor));
|
||||
StringRef Var = AltPath.substr(FirstMark, SecondMark - FirstMark + 1);
|
||||
if (Var.equals_lower("%_pdb%"))
|
||||
Buf.append(PDBBasename);
|
||||
else if (Var.equals_lower("%_ext%"))
|
||||
Buf.append(BinaryExtension);
|
||||
else {
|
||||
warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " +
|
||||
Var + " as literal");
|
||||
Buf.append(Var);
|
||||
}
|
||||
|
||||
Cursor = SecondMark + 1;
|
||||
}
|
||||
|
||||
Config->PDBAltPath = Buf;
|
||||
}
|
||||
|
||||
void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
// If the first command line argument is "/lib", link.exe acts like lib.exe.
|
||||
// We call our own implementation of lib.exe that understands bitcode files.
|
||||
@ -944,11 +986,17 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
|
||||
// Handle /ignore
|
||||
for (auto *Arg : Args.filtered(OPT_ignore)) {
|
||||
if (StringRef(Arg->getValue()) == "4037")
|
||||
Config->WarnMissingOrderSymbol = false;
|
||||
else if (StringRef(Arg->getValue()) == "4217")
|
||||
Config->WarnLocallyDefinedImported = false;
|
||||
// Other warning numbers are ignored.
|
||||
SmallVector<StringRef, 8> Vec;
|
||||
StringRef(Arg->getValue()).split(Vec, ',');
|
||||
for (StringRef S : Vec) {
|
||||
if (S == "4037")
|
||||
Config->WarnMissingOrderSymbol = false;
|
||||
else if (S == "4099")
|
||||
Config->WarnDebugInfoUnusable = false;
|
||||
else if (S == "4217")
|
||||
Config->WarnLocallyDefinedImported = false;
|
||||
// Other warning numbers are ignored.
|
||||
}
|
||||
}
|
||||
|
||||
// Handle /out
|
||||
@ -962,20 +1010,26 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
|
||||
// Handle /force or /force:unresolved
|
||||
if (Args.hasArg(OPT_force, OPT_force_unresolved))
|
||||
Config->Force = true;
|
||||
Config->ForceUnresolved = true;
|
||||
|
||||
// Handle /force or /force:multiple
|
||||
if (Args.hasArg(OPT_force, OPT_force_multiple))
|
||||
Config->ForceMultiple = true;
|
||||
|
||||
// Handle /debug
|
||||
if (Args.hasArg(OPT_debug, OPT_debug_dwarf, OPT_debug_ghash)) {
|
||||
DebugKind Debug = parseDebugKind(Args);
|
||||
if (Debug == DebugKind::Full || Debug == DebugKind::Dwarf ||
|
||||
Debug == DebugKind::GHash) {
|
||||
Config->Debug = true;
|
||||
Config->Incremental = true;
|
||||
if (auto *Arg = Args.getLastArg(OPT_debugtype))
|
||||
Config->DebugTypes = parseDebugType(Arg->getValue());
|
||||
else
|
||||
Config->DebugTypes = getDefaultDebugType(Args);
|
||||
}
|
||||
|
||||
// Handle /debugtype
|
||||
Config->DebugTypes = parseDebugTypes(Args);
|
||||
|
||||
// Handle /pdb
|
||||
bool ShouldCreatePDB = Args.hasArg(OPT_debug, OPT_debug_ghash);
|
||||
bool ShouldCreatePDB =
|
||||
(Debug == DebugKind::Full || Debug == DebugKind::GHash);
|
||||
if (ShouldCreatePDB) {
|
||||
if (auto *Arg = Args.getLastArg(OPT_pdb))
|
||||
Config->PDBPath = Arg->getValue();
|
||||
@ -1096,7 +1150,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
Config->Implib = Arg->getValue();
|
||||
|
||||
// Handle /opt.
|
||||
bool DoGC = !Args.hasArg(OPT_debug) || Args.hasArg(OPT_profile);
|
||||
bool DoGC = Debug == DebugKind::None || Args.hasArg(OPT_profile);
|
||||
unsigned ICFLevel =
|
||||
Args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on
|
||||
unsigned TailMerge = 1;
|
||||
@ -1181,6 +1235,12 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
parseMerge(".xdata=.rdata");
|
||||
parseMerge(".bss=.data");
|
||||
|
||||
if (Config->MinGW) {
|
||||
parseMerge(".ctors=.rdata");
|
||||
parseMerge(".dtors=.rdata");
|
||||
parseMerge(".CRT=.rdata");
|
||||
}
|
||||
|
||||
// Handle /section
|
||||
for (auto *Arg : Args.filtered(OPT_section))
|
||||
parseSection(Arg->getValue());
|
||||
@ -1234,9 +1294,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
Config->NxCompat = Args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true);
|
||||
Config->TerminalServerAware =
|
||||
!Config->DLL && Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true);
|
||||
Config->DebugDwarf = Args.hasArg(OPT_debug_dwarf);
|
||||
Config->DebugGHashes = Args.hasArg(OPT_debug_ghash);
|
||||
Config->DebugSymtab = Args.hasArg(OPT_debug_symtab);
|
||||
Config->DebugDwarf = Debug == DebugKind::Dwarf;
|
||||
Config->DebugGHashes = Debug == DebugKind::GHash;
|
||||
Config->DebugSymtab = Debug == DebugKind::Symtab;
|
||||
|
||||
Config->MapFile = getMapFile(Args);
|
||||
|
||||
@ -1266,10 +1326,14 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
return;
|
||||
|
||||
std::set<sys::fs::UniqueID> WholeArchives;
|
||||
for (auto *Arg : Args.filtered(OPT_wholearchive_file))
|
||||
if (Optional<StringRef> Path = doFindFile(Arg->getValue()))
|
||||
AutoExporter Exporter;
|
||||
for (auto *Arg : Args.filtered(OPT_wholearchive_file)) {
|
||||
if (Optional<StringRef> Path = doFindFile(Arg->getValue())) {
|
||||
if (Optional<sys::fs::UniqueID> ID = getUniqueID(*Path))
|
||||
WholeArchives.insert(*ID);
|
||||
Exporter.addWholeArchive(*Path);
|
||||
}
|
||||
}
|
||||
|
||||
// A predicate returning true if a given path is an argument for
|
||||
// /wholearchive:, or /wholearchive is enabled globally.
|
||||
@ -1300,12 +1364,16 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
// Read all input files given via the command line.
|
||||
run();
|
||||
|
||||
if (errorCount())
|
||||
return;
|
||||
|
||||
// We should have inferred a machine type by now from the input files, but if
|
||||
// not we assume x64.
|
||||
if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
|
||||
warn("/machine is not specified. x64 is assumed");
|
||||
Config->Machine = AMD64;
|
||||
}
|
||||
Config->Wordsize = Config->is64() ? 8 : 4;
|
||||
|
||||
// Input files can be Windows resource files (.res files). We use
|
||||
// WindowsResource to convert resource files to a regular COFF file,
|
||||
@ -1418,6 +1486,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
// tools won't work correctly if these assumptions are not held.
|
||||
sys::fs::make_absolute(Config->PDBAltPath);
|
||||
sys::path::remove_dots(Config->PDBAltPath);
|
||||
} else {
|
||||
// Don't do this earlier, so that Config->OutputFile is ready.
|
||||
parsePDBAltPath(Config->PDBAltPath);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1441,6 +1512,13 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
// Needed for MSVC 2017 15.5 CRT.
|
||||
Symtab->addAbsolute(mangle("__enclave_config"), 0);
|
||||
|
||||
if (Config->MinGW) {
|
||||
Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
|
||||
Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
|
||||
Symtab->addAbsolute(mangle("__CTOR_LIST__"), 0);
|
||||
Symtab->addAbsolute(mangle("__DTOR_LIST__"), 0);
|
||||
}
|
||||
|
||||
// This code may add new undefined symbols to the link, which may enqueue more
|
||||
// symbol resolution tasks, so we need to continue executing tasks until we
|
||||
// converge.
|
||||
@ -1480,18 +1558,29 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
if (errorCount())
|
||||
return;
|
||||
|
||||
// If /msvclto is given, we use the MSVC linker to link LTO output files.
|
||||
// This is useful because MSVC link.exe can generate complete PDBs.
|
||||
if (Args.hasArg(OPT_msvclto)) {
|
||||
invokeMSVC(Args);
|
||||
return;
|
||||
}
|
||||
|
||||
// Do LTO by compiling bitcode input files to a set of native COFF files then
|
||||
// link those files.
|
||||
Symtab->addCombinedLTOObjects();
|
||||
run();
|
||||
|
||||
if (Config->MinGW) {
|
||||
// Load any further object files that might be needed for doing automatic
|
||||
// imports.
|
||||
//
|
||||
// For cases with no automatically imported symbols, this iterates once
|
||||
// over the symbol table and doesn't do anything.
|
||||
//
|
||||
// For the normal case with a few automatically imported symbols, this
|
||||
// should only need to be run once, since each new object file imported
|
||||
// is an import library and wouldn't add any new undefined references,
|
||||
// but there's nothing stopping the __imp_ symbols from coming from a
|
||||
// normal object file as well (although that won't be used for the
|
||||
// actual autoimport later on). If this pass adds new undefined references,
|
||||
// we won't iterate further to resolve them.
|
||||
Symtab->loadMinGWAutomaticImports();
|
||||
run();
|
||||
}
|
||||
|
||||
// Make sure we have resolved all symbols.
|
||||
Symtab->reportRemainingUndefines();
|
||||
if (errorCount())
|
||||
@ -1510,7 +1599,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
// are chosen to be exported.
|
||||
if (Config->DLL && ((Config->MinGW && Config->Exports.empty()) ||
|
||||
Args.hasArg(OPT_export_all_symbols))) {
|
||||
AutoExporter Exporter;
|
||||
Exporter.initSymbolExcludes();
|
||||
|
||||
Symtab->forEachSymbol([=](Symbol *S) {
|
||||
auto *Def = dyn_cast<Defined>(S);
|
||||
@ -1574,8 +1663,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
markLive(Symtab->getChunks());
|
||||
|
||||
// Identify identical COMDAT sections to merge them.
|
||||
if (Config->DoICF)
|
||||
if (Config->DoICF) {
|
||||
findKeepUniqueSections();
|
||||
doICF(Symtab->getChunks());
|
||||
}
|
||||
|
||||
// Write the result.
|
||||
writeResult();
|
||||
|
||||
3
deps/lld/COFF/Driver.h
vendored
3
deps/lld/COFF/Driver.h
vendored
@ -89,6 +89,7 @@ private:
|
||||
Optional<StringRef> findLib(StringRef Filename);
|
||||
StringRef doFindFile(StringRef Filename);
|
||||
StringRef doFindLib(StringRef Filename);
|
||||
StringRef doFindLibMinGW(StringRef Filename);
|
||||
|
||||
// Parses LIB environment which contains a list of search paths.
|
||||
void addLibSearchPaths();
|
||||
@ -114,8 +115,6 @@ private:
|
||||
StringRef findDefaultEntry();
|
||||
WindowsSubsystem inferSubsystem();
|
||||
|
||||
void invokeMSVC(llvm::opt::InputArgList &Args);
|
||||
|
||||
void addBuffer(std::unique_ptr<MemoryBuffer> MB, bool WholeArchive);
|
||||
void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName,
|
||||
StringRef ParentName);
|
||||
|
||||
24
deps/lld/COFF/DriverUtils.cpp
vendored
24
deps/lld/COFF/DriverUtils.cpp
vendored
@ -713,26 +713,6 @@ MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> MBs) {
|
||||
return MBRef;
|
||||
}
|
||||
|
||||
// Run MSVC link.exe for given in-memory object files.
|
||||
// Command line options are copied from those given to LLD.
|
||||
// This is for the /msvclto option.
|
||||
void runMSVCLinker(std::string Rsp, ArrayRef<StringRef> Objects) {
|
||||
// Write the in-memory object files to disk.
|
||||
std::vector<TemporaryFile> Temps;
|
||||
for (StringRef S : Objects) {
|
||||
Temps.emplace_back("lto", "obj", S);
|
||||
Rsp += quote(Temps.back().Path) + "\n";
|
||||
}
|
||||
|
||||
log("link.exe " + Rsp);
|
||||
|
||||
// Run MSVC link.exe.
|
||||
Temps.emplace_back("lto", "rsp", Rsp);
|
||||
Executor E("link.exe");
|
||||
E.add(Twine("@" + Temps.back().Path));
|
||||
E.run();
|
||||
}
|
||||
|
||||
// Create OptTable
|
||||
|
||||
// Create prefix string literals used in Options.td
|
||||
@ -883,7 +863,9 @@ std::vector<const char *> ArgParser::tokenize(StringRef S) {
|
||||
}
|
||||
|
||||
void printHelp(const char *Argv0) {
|
||||
COFFOptTable().PrintHelp(outs(), Argv0, "LLVM Linker", false);
|
||||
COFFOptTable().PrintHelp(outs(),
|
||||
(std::string(Argv0) + " [options] file...").c_str(),
|
||||
"LLVM Linker", false);
|
||||
}
|
||||
|
||||
} // namespace coff
|
||||
|
||||
31
deps/lld/COFF/ICF.cpp
vendored
31
deps/lld/COFF/ICF.cpp
vendored
@ -22,6 +22,7 @@
|
||||
#include "Chunks.h"
|
||||
#include "Symbols.h"
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "lld/Common/Threads.h"
|
||||
#include "lld/Common/Timer.h"
|
||||
#include "llvm/ADT/Hashing.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
@ -80,7 +81,7 @@ private:
|
||||
bool ICF::isEligible(SectionChunk *C) {
|
||||
// Non-comdat chunks, dead chunks, and writable chunks are not elegible.
|
||||
bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
|
||||
if (!C->isCOMDAT() || !C->isLive() || Writable)
|
||||
if (!C->isCOMDAT() || !C->Live || Writable)
|
||||
return false;
|
||||
|
||||
// Code sections are eligible.
|
||||
@ -93,7 +94,11 @@ bool ICF::isEligible(SectionChunk *C) {
|
||||
return true;
|
||||
|
||||
// So are vtables.
|
||||
return C->Sym && C->Sym->getName().startswith("??_7");
|
||||
if (C->Sym && C->Sym->getName().startswith("??_7"))
|
||||
return true;
|
||||
|
||||
// Anything else not in an address-significance table is eligible.
|
||||
return !C->KeepUnique;
|
||||
}
|
||||
|
||||
// Split an equivalence class into smaller classes.
|
||||
@ -222,10 +227,10 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
|
||||
size_t Boundaries[NumShards + 1];
|
||||
Boundaries[0] = 0;
|
||||
Boundaries[NumShards] = Chunks.size();
|
||||
for_each_n(parallel::par, size_t(1), NumShards, [&](size_t I) {
|
||||
parallelForEachN(1, NumShards, [&](size_t I) {
|
||||
Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size());
|
||||
});
|
||||
for_each_n(parallel::par, size_t(1), NumShards + 1, [&](size_t I) {
|
||||
parallelForEachN(1, NumShards + 1, [&](size_t I) {
|
||||
if (Boundaries[I - 1] < Boundaries[I]) {
|
||||
forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
|
||||
}
|
||||
@ -257,11 +262,23 @@ void ICF::run(ArrayRef<Chunk *> Vec) {
|
||||
SC->Class[0] = NextId++;
|
||||
|
||||
// Initially, we use hash values to partition sections.
|
||||
for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) {
|
||||
// Set MSB to 1 to avoid collisions with non-hash classs.
|
||||
SC->Class[0] = xxHash64(SC->getContents()) | (1 << 31);
|
||||
parallelForEach(Chunks, [&](SectionChunk *SC) {
|
||||
SC->Class[0] = xxHash64(SC->getContents());
|
||||
});
|
||||
|
||||
// Combine the hashes of the sections referenced by each section into its
|
||||
// hash.
|
||||
for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
|
||||
parallelForEach(Chunks, [&](SectionChunk *SC) {
|
||||
uint32_t Hash = SC->Class[Cnt % 2];
|
||||
for (Symbol *B : SC->symbols())
|
||||
if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
|
||||
Hash += Sym->getChunk()->Class[Cnt % 2];
|
||||
// Set MSB to 1 to avoid collisions with non-hash classs.
|
||||
SC->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
|
||||
});
|
||||
}
|
||||
|
||||
// From now on, sections in Chunks are ordered so that sections in
|
||||
// the same group are consecutive in the vector.
|
||||
std::stable_sort(Chunks.begin(), Chunks.end(),
|
||||
|
||||
43
deps/lld/COFF/InputFiles.cpp
vendored
43
deps/lld/COFF/InputFiles.cpp
vendored
@ -54,8 +54,16 @@ std::vector<BitcodeFile *> BitcodeFile::Instances;
|
||||
static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
|
||||
Symbol *Source, Symbol *Target) {
|
||||
if (auto *U = dyn_cast<Undefined>(Source)) {
|
||||
if (U->WeakAlias && U->WeakAlias != Target)
|
||||
if (U->WeakAlias && U->WeakAlias != Target) {
|
||||
// Weak aliases as produced by GCC are named in the form
|
||||
// .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
|
||||
// of another symbol emitted near the weak symbol.
|
||||
// Just use the definition from the first object file that defined
|
||||
// this weak symbol.
|
||||
if (Config->MinGW)
|
||||
return;
|
||||
Symtab->reportDuplicate(Source, F);
|
||||
}
|
||||
U->WeakAlias = Target;
|
||||
}
|
||||
}
|
||||
@ -147,9 +155,10 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
|
||||
const coff_aux_section_definition *Def,
|
||||
StringRef LeaderName) {
|
||||
const coff_section *Sec;
|
||||
StringRef Name;
|
||||
if (auto EC = COFFObj->getSection(SectionNumber, Sec))
|
||||
fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
|
||||
|
||||
StringRef Name;
|
||||
if (auto EC = COFFObj->getSectionName(Sec, Name))
|
||||
fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
|
||||
EC.message());
|
||||
@ -161,6 +170,11 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (Name == ".llvm_addrsig") {
|
||||
AddrsigSec = Sec;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Object files may have DWARF debug info or MS CodeView debug info
|
||||
// (or both).
|
||||
//
|
||||
@ -168,8 +182,8 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
|
||||
// of the linker; they are just a data section containing relocations.
|
||||
// We can just link them to complete debug info.
|
||||
//
|
||||
// CodeView needs a linker support. We need to interpret and debug
|
||||
// info, and then write it to a separate .pdb file.
|
||||
// CodeView needs linker support. We need to interpret debug info,
|
||||
// and then write it to a separate .pdb file.
|
||||
|
||||
// Ignore DWARF debug info unless /debug is given.
|
||||
if (!Config->Debug && Name.startswith(".debug_"))
|
||||
@ -267,10 +281,17 @@ Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
|
||||
COFFObj->getSymbolName(Sym, Name);
|
||||
if (SC)
|
||||
return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
|
||||
// For MinGW symbols named .weak.* that point to a discarded section,
|
||||
// don't create an Undefined symbol. If nothing ever refers to the symbol,
|
||||
// everything should be fine. If something actually refers to the symbol
|
||||
// (e.g. the undefined weak alias), linking will fail due to undefined
|
||||
// references at the end.
|
||||
if (Config->MinGW && Name.startswith(".weak."))
|
||||
return nullptr;
|
||||
return Symtab->addUndefined(Name, this, false);
|
||||
}
|
||||
if (SC)
|
||||
return make<DefinedRegular>(this, /*Name*/ "", false,
|
||||
return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
|
||||
/*IsExternal*/ false, Sym.getGeneric(), SC);
|
||||
return nullptr;
|
||||
}
|
||||
@ -318,7 +339,7 @@ void ObjFile::initializeSymbols() {
|
||||
|
||||
for (uint32_t I : PendingIndexes) {
|
||||
COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
|
||||
if (auto *Def = Sym.getSectionDefinition()) {
|
||||
if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
|
||||
if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
|
||||
readAssociativeDefinition(Sym, Def);
|
||||
else if (Config->MinGW)
|
||||
@ -401,7 +422,7 @@ Optional<Symbol *> ObjFile::createDefined(
|
||||
std::tie(Leader, Prevailing) =
|
||||
Symtab->addComdat(this, GetName(), Sym.getGeneric());
|
||||
} else {
|
||||
Leader = make<DefinedRegular>(this, /*Name*/ "", false,
|
||||
Leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
|
||||
/*IsExternal*/ false, Sym.getGeneric());
|
||||
Prevailing = true;
|
||||
}
|
||||
@ -421,7 +442,7 @@ Optional<Symbol *> ObjFile::createDefined(
|
||||
// leader symbol by setting the section's ComdatDefs pointer if we encounter a
|
||||
// non-associative comdat.
|
||||
if (SparseChunks[SectionNumber] == PendingComdat) {
|
||||
if (auto *Def = Sym.getSectionDefinition()) {
|
||||
if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
|
||||
if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
|
||||
readAssociativeDefinition(Sym, Def);
|
||||
else
|
||||
@ -429,8 +450,10 @@ Optional<Symbol *> ObjFile::createDefined(
|
||||
}
|
||||
}
|
||||
|
||||
// readAssociativeDefinition() writes to SparseChunks, so need to check again.
|
||||
if (SparseChunks[SectionNumber] == PendingComdat)
|
||||
return None;
|
||||
|
||||
return createRegular(Sym);
|
||||
}
|
||||
|
||||
@ -481,6 +504,10 @@ void ImportFile::parse() {
|
||||
ExternalName = ExtName;
|
||||
|
||||
ImpSym = Symtab->addImportData(ImpName, this);
|
||||
// If this was a duplicate, we logged an error but may continue;
|
||||
// in this case, ImpSym is nullptr.
|
||||
if (!ImpSym)
|
||||
return;
|
||||
|
||||
if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
|
||||
static_cast<void>(Symtab->addImportData(Name, this));
|
||||
|
||||
13
deps/lld/COFF/InputFiles.h
vendored
13
deps/lld/COFF/InputFiles.h
vendored
@ -15,6 +15,7 @@
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
|
||||
#include "llvm/LTO/LTO.h"
|
||||
#include "llvm/Object/Archive.h"
|
||||
#include "llvm/Object/COFF.h"
|
||||
@ -122,9 +123,12 @@ public:
|
||||
return Symbols[SymbolIndex];
|
||||
}
|
||||
|
||||
// Returns the underying COFF file.
|
||||
// Returns the underlying COFF file.
|
||||
COFFObjectFile *getCOFFObj() { return COFFObj.get(); }
|
||||
|
||||
// Whether the object was already merged into the final PDB or not
|
||||
bool wasProcessedForPDB() const { return !!ModuleDBI; }
|
||||
|
||||
static std::vector<ObjFile *> Instances;
|
||||
|
||||
// Flags in the absolute @feat.00 symbol if it is present. These usually
|
||||
@ -145,6 +149,13 @@ public:
|
||||
// if we are not producing a PDB.
|
||||
llvm::pdb::DbiModuleDescriptorBuilder *ModuleDBI = nullptr;
|
||||
|
||||
const coff_section *AddrsigSec = nullptr;
|
||||
|
||||
// When using Microsoft precompiled headers, this is the PCH's key.
|
||||
// The same key is used by both the precompiled object, and objects using the
|
||||
// precompiled object. Any difference indicates out-of-date objects.
|
||||
llvm::Optional<uint32_t> PCHSignature;
|
||||
|
||||
private:
|
||||
void initializeChunks();
|
||||
void initializeSymbols();
|
||||
|
||||
3
deps/lld/COFF/LTO.cpp
vendored
3
deps/lld/COFF/LTO.cpp
vendored
@ -60,6 +60,9 @@ static std::unique_ptr<lto::LTO> createLTO() {
|
||||
C.DisableVerify = true;
|
||||
C.DiagHandler = diagnosticHandler;
|
||||
C.OptLevel = Config->LTOO;
|
||||
C.CPU = GetCPUStr();
|
||||
C.MAttrs = GetMAttrs();
|
||||
|
||||
if (Config->SaveTemps)
|
||||
checkError(C.addSaveTemps(std::string(Config->OutputFile) + ".",
|
||||
/*UseInputModulePath*/ true));
|
||||
|
||||
2
deps/lld/COFF/MapFile.cpp
vendored
2
deps/lld/COFF/MapFile.cpp
vendored
@ -110,7 +110,7 @@ void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) {
|
||||
writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize);
|
||||
OS << Sec->Name << '\n';
|
||||
|
||||
for (Chunk *C : Sec->getChunks()) {
|
||||
for (Chunk *C : Sec->Chunks) {
|
||||
auto *SC = dyn_cast<SectionChunk>(C);
|
||||
if (!SC)
|
||||
continue;
|
||||
|
||||
8
deps/lld/COFF/MarkLive.cpp
vendored
8
deps/lld/COFF/MarkLive.cpp
vendored
@ -32,13 +32,13 @@ void markLive(ArrayRef<Chunk *> Chunks) {
|
||||
// COMDAT section chunks are dead by default. Add non-COMDAT chunks.
|
||||
for (Chunk *C : Chunks)
|
||||
if (auto *SC = dyn_cast<SectionChunk>(C))
|
||||
if (SC->isLive())
|
||||
if (SC->Live)
|
||||
Worklist.push_back(SC);
|
||||
|
||||
auto Enqueue = [&](SectionChunk *C) {
|
||||
if (C->isLive())
|
||||
if (C->Live)
|
||||
return;
|
||||
C->markLive();
|
||||
C->Live = true;
|
||||
Worklist.push_back(C);
|
||||
};
|
||||
|
||||
@ -57,7 +57,7 @@ void markLive(ArrayRef<Chunk *> Chunks) {
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
SectionChunk *SC = Worklist.pop_back_val();
|
||||
assert(SC->isLive() && "We mark as live when pushing onto the worklist!");
|
||||
assert(SC->Live && "We mark as live when pushing onto the worklist!");
|
||||
|
||||
// Mark all symbols listed in the relocation table for this section.
|
||||
for (Symbol *B : SC->symbols())
|
||||
|
||||
42
deps/lld/COFF/MinGW.cpp
vendored
42
deps/lld/COFF/MinGW.cpp
vendored
@ -19,7 +19,23 @@ using namespace lld::coff;
|
||||
using namespace llvm;
|
||||
using namespace llvm::COFF;
|
||||
|
||||
AutoExporter::AutoExporter() {
|
||||
void AutoExporter::initSymbolExcludes() {
|
||||
ExcludeSymbolPrefixes = {
|
||||
// Import symbols
|
||||
"__imp_",
|
||||
"__IMPORT_DESCRIPTOR_",
|
||||
// Extra import symbols from GNU import libraries
|
||||
"__nm_",
|
||||
// C++ symbols
|
||||
"__rtti_",
|
||||
"__builtin_",
|
||||
// Artifical symbols such as .refptr
|
||||
".",
|
||||
};
|
||||
ExcludeSymbolSuffixes = {
|
||||
"_iname",
|
||||
"_NULL_THUNK_DATA",
|
||||
};
|
||||
if (Config->Machine == I386) {
|
||||
ExcludeSymbols = {
|
||||
"__NULL_IMPORT_DESCRIPTOR",
|
||||
@ -36,9 +52,10 @@ AutoExporter::AutoExporter() {
|
||||
"_DllEntryPoint@12",
|
||||
"_DllMainCRTStartup@12",
|
||||
};
|
||||
ExcludeSymbolPrefixes.insert("__head_");
|
||||
} else {
|
||||
ExcludeSymbols = {
|
||||
"_NULL_IMPORT_DESCRIPTOR",
|
||||
"__NULL_IMPORT_DESCRIPTOR",
|
||||
"_pei386_runtime_relocator",
|
||||
"do_pseudo_reloc",
|
||||
"impure_ptr",
|
||||
@ -52,8 +69,11 @@ AutoExporter::AutoExporter() {
|
||||
"DllEntryPoint",
|
||||
"DllMainCRTStartup",
|
||||
};
|
||||
ExcludeSymbolPrefixes.insert("_head_");
|
||||
}
|
||||
}
|
||||
|
||||
AutoExporter::AutoExporter() {
|
||||
ExcludeLibs = {
|
||||
"libgcc",
|
||||
"libgcc_s",
|
||||
@ -64,6 +84,7 @@ AutoExporter::AutoExporter() {
|
||||
"libsupc++",
|
||||
"libobjc",
|
||||
"libgcj",
|
||||
"libclang_rt.builtins",
|
||||
"libclang_rt.builtins-aarch64",
|
||||
"libclang_rt.builtins-arm",
|
||||
"libclang_rt.builtins-i386",
|
||||
@ -90,6 +111,13 @@ AutoExporter::AutoExporter() {
|
||||
};
|
||||
}
|
||||
|
||||
void AutoExporter::addWholeArchive(StringRef Path) {
|
||||
StringRef LibName = sys::path::filename(Path);
|
||||
// Drop the file extension, to match the processing below.
|
||||
LibName = LibName.substr(0, LibName.rfind('.'));
|
||||
ExcludeLibs.erase(LibName);
|
||||
}
|
||||
|
||||
bool AutoExporter::shouldExport(Defined *Sym) const {
|
||||
if (!Sym || !Sym->isLive() || !Sym->getChunk())
|
||||
return false;
|
||||
@ -101,10 +129,12 @@ bool AutoExporter::shouldExport(Defined *Sym) const {
|
||||
if (ExcludeSymbols.count(Sym->getName()))
|
||||
return false;
|
||||
|
||||
// Don't export anything that looks like an import symbol (which also can be
|
||||
// a manually defined data symbol with such a name).
|
||||
if (Sym->getName().startswith("__imp_"))
|
||||
return false;
|
||||
for (StringRef Prefix : ExcludeSymbolPrefixes.keys())
|
||||
if (Sym->getName().startswith(Prefix))
|
||||
return false;
|
||||
for (StringRef Suffix : ExcludeSymbolSuffixes.keys())
|
||||
if (Sym->getName().endswith(Suffix))
|
||||
return false;
|
||||
|
||||
// If a corresponding __imp_ symbol exists and is defined, don't export it.
|
||||
if (Symtab->find(("__imp_" + Sym->getName()).str()))
|
||||
|
||||
6
deps/lld/COFF/MinGW.h
vendored
6
deps/lld/COFF/MinGW.h
vendored
@ -23,7 +23,13 @@ class AutoExporter {
|
||||
public:
|
||||
AutoExporter();
|
||||
|
||||
void initSymbolExcludes();
|
||||
|
||||
void addWholeArchive(StringRef Path);
|
||||
|
||||
llvm::StringSet<> ExcludeSymbols;
|
||||
llvm::StringSet<> ExcludeSymbolPrefixes;
|
||||
llvm::StringSet<> ExcludeSymbolSuffixes;
|
||||
llvm::StringSet<> ExcludeLibs;
|
||||
llvm::StringSet<> ExcludeObjects;
|
||||
|
||||
|
||||
37
deps/lld/COFF/Options.td
vendored
37
deps/lld/COFF/Options.td
vendored
@ -66,13 +66,18 @@ def wholearchive_file : P<"wholearchive", "Include all object files from this ar
|
||||
|
||||
def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>;
|
||||
|
||||
def manifest : F<"manifest">;
|
||||
def manifest_colon : P<"manifest", "Create manifest file">;
|
||||
def manifest : F<"manifest">, HelpText<"Create .manifest file">;
|
||||
def manifest_colon : P<
|
||||
"manifest",
|
||||
"NO disables manifest output; EMBED[,ID=#] embeds manifest as resource in the image">;
|
||||
def manifestuac : P<"manifestuac", "User access control">;
|
||||
def manifestfile : P<"manifestfile", "Manifest file path">;
|
||||
def manifestdependency : P<"manifestdependency",
|
||||
"Attributes for <dependency> in manifest file">;
|
||||
def manifestinput : P<"manifestinput", "Specify manifest file">;
|
||||
def manifestfile : P<"manifestfile", "Manifest output path, with /manifest">;
|
||||
def manifestdependency : P<
|
||||
"manifestdependency",
|
||||
"Attributes for <dependency> element in manifest file; implies /manifest">;
|
||||
def manifestinput : P<
|
||||
"manifestinput",
|
||||
"Additional manifest inputs; only valid with /manifest:embed">;
|
||||
|
||||
// We cannot use multiclass P because class name "incl" is different
|
||||
// from its command line option name. We do this because "include" is
|
||||
@ -85,22 +90,28 @@ def deffile : Joined<["/", "-"], "def:">,
|
||||
HelpText<"Use module-definition file">;
|
||||
|
||||
def debug : F<"debug">, HelpText<"Embed a symbol table in the image">;
|
||||
def debug_full : F<"debug:full">, Alias<debug>;
|
||||
def debug_opt : P<"debug", "Embed a symbol table in the image with option">;
|
||||
def debugtype : P<"debugtype", "Debug Info Options">;
|
||||
def dll : F<"dll">, HelpText<"Create a DLL">;
|
||||
def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">;
|
||||
def nodefaultlib_all : F<"nodefaultlib">;
|
||||
def noentry : F<"noentry">;
|
||||
def nodefaultlib_all : F<"nodefaultlib">,
|
||||
HelpText<"Remove all default libraries">;
|
||||
def noentry : F<"noentry">,
|
||||
HelpText<"Don't add reference to DllMainCRTStartup; only valid with /dll">;
|
||||
def profile : F<"profile">;
|
||||
def repro : F<"Brepro">, HelpText<"Use a hash of the executable as the PE header timestamp">;
|
||||
def repro : F<"Brepro">,
|
||||
HelpText<"Use a hash of the executable as the PE header timestamp">;
|
||||
def swaprun_cd : F<"swaprun:cd">;
|
||||
def swaprun_net : F<"swaprun:net">;
|
||||
def verbose : F<"verbose">;
|
||||
def wholearchive_flag : F<"wholearchive">;
|
||||
|
||||
def force : F<"force">,
|
||||
HelpText<"Allow undefined and multiply defined symbols when creating executables">;
|
||||
def force_unresolved : F<"force:unresolved">,
|
||||
HelpText<"Allow undefined symbols when creating executables">;
|
||||
def force_unresolved : F<"force:unresolved">;
|
||||
def force_multiple : F<"force:multiple">,
|
||||
HelpText<"Allow multiply defined symbols when creating executables">;
|
||||
defm WX : B<"WX", "Treat warnings as errors", "Don't treat warnings as errors">;
|
||||
|
||||
defm allowbind : B<"allowbind", "Enable DLL binding (default)",
|
||||
@ -139,13 +150,9 @@ def help : F<"help">;
|
||||
def help_q : Flag<["/?", "-?"], "">, Alias<help>;
|
||||
|
||||
// LLD extensions
|
||||
def debug_ghash : F<"debug:ghash">;
|
||||
def debug_dwarf : F<"debug:dwarf">;
|
||||
def debug_symtab : F<"debug:symtab">;
|
||||
def export_all_symbols : F<"export-all-symbols">;
|
||||
def kill_at : F<"kill-at">;
|
||||
def lldmingw : F<"lldmingw">;
|
||||
def msvclto : F<"msvclto">;
|
||||
def output_def : Joined<["/", "-"], "output-def:">;
|
||||
def pdb_source_path : P<"pdbsourcepath",
|
||||
"Base path used to make relative source file path absolute in PDB">;
|
||||
|
||||
1029
deps/lld/COFF/PDB.cpp
vendored
1029
deps/lld/COFF/PDB.cpp
vendored
File diff suppressed because it is too large
Load Diff
2
deps/lld/COFF/PDB.h
vendored
2
deps/lld/COFF/PDB.h
vendored
@ -28,7 +28,7 @@ class SymbolTable;
|
||||
void createPDB(SymbolTable *Symtab,
|
||||
llvm::ArrayRef<OutputSection *> OutputSections,
|
||||
llvm::ArrayRef<uint8_t> SectionTable,
|
||||
const llvm::codeview::DebugInfo &BuildId);
|
||||
llvm::codeview::DebugInfo *BuildId);
|
||||
|
||||
std::pair<llvm::StringRef, uint32_t> getFileLine(const SectionChunk *C,
|
||||
uint32_t Addr);
|
||||
|
||||
185
deps/lld/COFF/SymbolTable.cpp
vendored
185
deps/lld/COFF/SymbolTable.cpp
vendored
@ -60,16 +60,16 @@ void SymbolTable::addFile(InputFile *File) {
|
||||
}
|
||||
|
||||
static void errorOrWarn(const Twine &S) {
|
||||
if (Config->Force)
|
||||
if (Config->ForceUnresolved)
|
||||
warn(S);
|
||||
else
|
||||
error(S);
|
||||
}
|
||||
|
||||
// Returns the name of the symbol in SC whose value is <= Addr that is closest
|
||||
// to Addr. This is generally the name of the global variable or function whose
|
||||
// definition contains Addr.
|
||||
static StringRef getSymbolName(SectionChunk *SC, uint32_t Addr) {
|
||||
// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
|
||||
// This is generally the global variable or function whose definition contains
|
||||
// Addr.
|
||||
static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) {
|
||||
DefinedRegular *Candidate = nullptr;
|
||||
|
||||
for (Symbol *S : SC->File->getSymbols()) {
|
||||
@ -81,14 +81,12 @@ static StringRef getSymbolName(SectionChunk *SC, uint32_t Addr) {
|
||||
Candidate = D;
|
||||
}
|
||||
|
||||
if (!Candidate)
|
||||
return "";
|
||||
return Candidate->getName();
|
||||
return Candidate;
|
||||
}
|
||||
|
||||
static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
|
||||
std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
|
||||
struct Location {
|
||||
StringRef SymName;
|
||||
Symbol *Sym;
|
||||
std::pair<StringRef, uint32_t> FileLine;
|
||||
};
|
||||
std::vector<Location> Locations;
|
||||
@ -102,14 +100,14 @@ static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
|
||||
continue;
|
||||
std::pair<StringRef, uint32_t> FileLine =
|
||||
getFileLine(SC, R.VirtualAddress);
|
||||
StringRef SymName = getSymbolName(SC, R.VirtualAddress);
|
||||
if (!FileLine.first.empty() || !SymName.empty())
|
||||
Locations.push_back({SymName, FileLine});
|
||||
Symbol *Sym = getSymbol(SC, R.VirtualAddress);
|
||||
if (!FileLine.first.empty() || Sym)
|
||||
Locations.push_back({Sym, FileLine});
|
||||
}
|
||||
}
|
||||
|
||||
if (Locations.empty())
|
||||
return "\n>>> referenced by " + toString(File) + "\n";
|
||||
return "\n>>> referenced by " + toString(File);
|
||||
|
||||
std::string Out;
|
||||
llvm::raw_string_ostream OS(Out);
|
||||
@ -119,13 +117,87 @@ static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
|
||||
OS << Loc.FileLine.first << ":" << Loc.FileLine.second
|
||||
<< "\n>>> ";
|
||||
OS << toString(File);
|
||||
if (!Loc.SymName.empty())
|
||||
OS << ":(" << Loc.SymName << ')';
|
||||
if (Loc.Sym)
|
||||
OS << ":(" << toString(*Loc.Sym) << ')';
|
||||
}
|
||||
OS << '\n';
|
||||
return OS.str();
|
||||
}
|
||||
|
||||
void SymbolTable::loadMinGWAutomaticImports() {
|
||||
for (auto &I : SymMap) {
|
||||
Symbol *Sym = I.second;
|
||||
auto *Undef = dyn_cast<Undefined>(Sym);
|
||||
if (!Undef)
|
||||
continue;
|
||||
if (!Sym->IsUsedInRegularObj)
|
||||
continue;
|
||||
|
||||
StringRef Name = Undef->getName();
|
||||
|
||||
if (Name.startswith("__imp_"))
|
||||
continue;
|
||||
// If we have an undefined symbol, but we have a Lazy representing a
|
||||
// symbol we could load from file, make sure to load that.
|
||||
Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str()));
|
||||
if (!L || L->PendingArchiveLoad)
|
||||
continue;
|
||||
|
||||
log("Loading lazy " + L->getName() + " from " + L->File->getName() +
|
||||
" for automatic import");
|
||||
L->PendingArchiveLoad = true;
|
||||
L->File->addMember(&L->Sym);
|
||||
}
|
||||
}
|
||||
|
||||
bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) {
|
||||
if (Name.startswith("__imp_"))
|
||||
return false;
|
||||
Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str()));
|
||||
if (!Imp)
|
||||
return false;
|
||||
|
||||
// Replace the reference directly to a variable with a reference
|
||||
// to the import address table instead. This obviously isn't right,
|
||||
// but we mark the symbol as IsRuntimePseudoReloc, and a later pass
|
||||
// will add runtime pseudo relocations for every relocation against
|
||||
// this Symbol. The runtime pseudo relocation framework expects the
|
||||
// reference itself to point at the IAT entry.
|
||||
size_t ImpSize = 0;
|
||||
if (isa<DefinedImportData>(Imp)) {
|
||||
log("Automatically importing " + Name + " from " +
|
||||
cast<DefinedImportData>(Imp)->getDLLName());
|
||||
ImpSize = sizeof(DefinedImportData);
|
||||
} else if (isa<DefinedRegular>(Imp)) {
|
||||
log("Automatically importing " + Name + " from " +
|
||||
toString(cast<DefinedRegular>(Imp)->File));
|
||||
ImpSize = sizeof(DefinedRegular);
|
||||
} else {
|
||||
warn("unable to automatically import " + Name + " from " + Imp->getName() +
|
||||
" from " + toString(cast<DefinedRegular>(Imp)->File) +
|
||||
"; unexpected symbol type");
|
||||
return false;
|
||||
}
|
||||
Sym->replaceKeepingName(Imp, ImpSize);
|
||||
Sym->IsRuntimePseudoReloc = true;
|
||||
|
||||
// There may exist symbols named .refptr.<name> which only consist
|
||||
// of a single pointer to <name>. If it turns out <name> is
|
||||
// automatically imported, we don't need to keep the .refptr.<name>
|
||||
// pointer at all, but redirect all accesses to it to the IAT entry
|
||||
// for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
|
||||
DefinedRegular *Refptr =
|
||||
dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str()));
|
||||
if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) {
|
||||
SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk());
|
||||
if (SC && SC->Relocs.size() == 1 && *SC->symbols().begin() == Sym) {
|
||||
log("Replacing .refptr." + Name + " with " + Imp->getName());
|
||||
Refptr->getChunk()->Live = false;
|
||||
Refptr->replaceKeepingName(Imp, ImpSize);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SymbolTable::reportRemainingUndefines() {
|
||||
SmallPtrSet<Symbol *, 8> Undefs;
|
||||
DenseMap<Symbol *, Symbol *> LocalImports;
|
||||
@ -169,9 +241,17 @@ void SymbolTable::reportRemainingUndefines() {
|
||||
}
|
||||
}
|
||||
|
||||
// We don't want to report missing Microsoft precompiled headers symbols.
|
||||
// A proper message will be emitted instead in PDBLinker::aquirePrecompObj
|
||||
if (Name.contains("_PchSym_"))
|
||||
continue;
|
||||
|
||||
if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name))
|
||||
continue;
|
||||
|
||||
// Remaining undefined symbols are not fatal if /force is specified.
|
||||
// They are replaced with dummy defined symbols.
|
||||
if (Config->Force)
|
||||
if (Config->ForceUnresolved)
|
||||
replaceSymbol<DefinedAbsolute>(Sym, Name, 0);
|
||||
Undefs.insert(Sym);
|
||||
}
|
||||
@ -181,10 +261,10 @@ void SymbolTable::reportRemainingUndefines() {
|
||||
|
||||
for (Symbol *B : Config->GCRoot) {
|
||||
if (Undefs.count(B))
|
||||
errorOrWarn("<root>: undefined symbol: " + B->getName());
|
||||
errorOrWarn("<root>: undefined symbol: " + toString(*B));
|
||||
if (Config->WarnLocallyDefinedImported)
|
||||
if (Symbol *Imp = LocalImports.lookup(B))
|
||||
warn("<root>: locally defined symbol imported: " + Imp->getName() +
|
||||
warn("<root>: locally defined symbol imported: " + toString(*Imp) +
|
||||
" (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
|
||||
}
|
||||
|
||||
@ -195,34 +275,41 @@ void SymbolTable::reportRemainingUndefines() {
|
||||
if (!Sym)
|
||||
continue;
|
||||
if (Undefs.count(Sym))
|
||||
errorOrWarn("undefined symbol: " + Sym->getName() +
|
||||
errorOrWarn("undefined symbol: " + toString(*Sym) +
|
||||
getSymbolLocations(File, SymIndex));
|
||||
if (Config->WarnLocallyDefinedImported)
|
||||
if (Symbol *Imp = LocalImports.lookup(Sym))
|
||||
warn(toString(File) + ": locally defined symbol imported: " +
|
||||
Imp->getName() + " (defined in " + toString(Imp->getFile()) +
|
||||
") [LNK4217]");
|
||||
warn(toString(File) +
|
||||
": locally defined symbol imported: " + toString(*Imp) +
|
||||
" (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) {
|
||||
bool Inserted = false;
|
||||
Symbol *&Sym = SymMap[CachedHashStringRef(Name)];
|
||||
if (Sym)
|
||||
return {Sym, false};
|
||||
Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
|
||||
Sym->IsUsedInRegularObj = false;
|
||||
Sym->PendingArchiveLoad = false;
|
||||
return {Sym, true};
|
||||
if (!Sym) {
|
||||
Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
|
||||
Sym->IsUsedInRegularObj = false;
|
||||
Sym->PendingArchiveLoad = false;
|
||||
Inserted = true;
|
||||
}
|
||||
return {Sym, Inserted};
|
||||
}
|
||||
|
||||
std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) {
|
||||
std::pair<Symbol *, bool> Result = insert(Name);
|
||||
if (!File || !isa<BitcodeFile>(File))
|
||||
Result.first->IsUsedInRegularObj = true;
|
||||
return Result;
|
||||
}
|
||||
|
||||
Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F,
|
||||
bool IsWeakAlias) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(Name);
|
||||
if (!F || !isa<BitcodeFile>(F))
|
||||
S->IsUsedInRegularObj = true;
|
||||
std::tie(S, WasInserted) = insert(Name, F);
|
||||
if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) {
|
||||
replaceSymbol<Undefined>(S, Name);
|
||||
return S;
|
||||
@ -253,14 +340,20 @@ void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) {
|
||||
}
|
||||
|
||||
void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) {
|
||||
error("duplicate symbol: " + toString(*Existing) + " in " +
|
||||
toString(Existing->getFile()) + " and in " + toString(NewFile));
|
||||
std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " +
|
||||
toString(Existing->getFile()) + " and in " +
|
||||
toString(NewFile);
|
||||
|
||||
if (Config->ForceMultiple)
|
||||
warn(Msg);
|
||||
else
|
||||
error(Msg);
|
||||
}
|
||||
|
||||
Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(N);
|
||||
std::tie(S, WasInserted) = insert(N, nullptr);
|
||||
S->IsUsedInRegularObj = true;
|
||||
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
|
||||
replaceSymbol<DefinedAbsolute>(S, N, Sym);
|
||||
@ -272,7 +365,7 @@ Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
|
||||
Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(N);
|
||||
std::tie(S, WasInserted) = insert(N, nullptr);
|
||||
S->IsUsedInRegularObj = true;
|
||||
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
|
||||
replaceSymbol<DefinedAbsolute>(S, N, VA);
|
||||
@ -284,7 +377,7 @@ Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
|
||||
Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(N);
|
||||
std::tie(S, WasInserted) = insert(N, nullptr);
|
||||
S->IsUsedInRegularObj = true;
|
||||
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
|
||||
replaceSymbol<DefinedSynthetic>(S, N, C);
|
||||
@ -298,9 +391,7 @@ Symbol *SymbolTable::addRegular(InputFile *F, StringRef N,
|
||||
SectionChunk *C) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(N);
|
||||
if (!isa<BitcodeFile>(F))
|
||||
S->IsUsedInRegularObj = true;
|
||||
std::tie(S, WasInserted) = insert(N, F);
|
||||
if (WasInserted || !isa<DefinedRegular>(S))
|
||||
replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false,
|
||||
/*IsExternal*/ true, Sym, C);
|
||||
@ -314,9 +405,7 @@ SymbolTable::addComdat(InputFile *F, StringRef N,
|
||||
const coff_symbol_generic *Sym) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(N);
|
||||
if (!isa<BitcodeFile>(F))
|
||||
S->IsUsedInRegularObj = true;
|
||||
std::tie(S, WasInserted) = insert(N, F);
|
||||
if (WasInserted || !isa<DefinedRegular>(S)) {
|
||||
replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true,
|
||||
/*IsExternal*/ true, Sym, nullptr);
|
||||
@ -331,9 +420,7 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
|
||||
const coff_symbol_generic *Sym, CommonChunk *C) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(N);
|
||||
if (!isa<BitcodeFile>(F))
|
||||
S->IsUsedInRegularObj = true;
|
||||
std::tie(S, WasInserted) = insert(N, F);
|
||||
if (WasInserted || !isa<DefinedCOFF>(S))
|
||||
replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
|
||||
else if (auto *DC = dyn_cast<DefinedCommon>(S))
|
||||
@ -345,7 +432,7 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
|
||||
Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(N);
|
||||
std::tie(S, WasInserted) = insert(N, nullptr);
|
||||
S->IsUsedInRegularObj = true;
|
||||
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
|
||||
replaceSymbol<DefinedImportData>(S, N, F);
|
||||
@ -360,7 +447,7 @@ Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID,
|
||||
uint16_t Machine) {
|
||||
Symbol *S;
|
||||
bool WasInserted;
|
||||
std::tie(S, WasInserted) = insert(Name);
|
||||
std::tie(S, WasInserted) = insert(Name, nullptr);
|
||||
S->IsUsedInRegularObj = true;
|
||||
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
|
||||
replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine);
|
||||
|
||||
8
deps/lld/COFF/SymbolTable.h
vendored
8
deps/lld/COFF/SymbolTable.h
vendored
@ -54,6 +54,9 @@ public:
|
||||
// symbols.
|
||||
void reportRemainingUndefines();
|
||||
|
||||
void loadMinGWAutomaticImports();
|
||||
bool handleMinGWAutomaticImport(Symbol *Sym, StringRef Name);
|
||||
|
||||
// Returns a list of chunks of selected symbols.
|
||||
std::vector<Chunk *> getChunks();
|
||||
|
||||
@ -108,7 +111,10 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Inserts symbol if not already present.
|
||||
std::pair<Symbol *, bool> insert(StringRef Name);
|
||||
/// Same as insert(Name), but also sets IsUsedInRegularObj.
|
||||
std::pair<Symbol *, bool> insert(StringRef Name, InputFile *F);
|
||||
StringRef findByPrefix(StringRef Prefix);
|
||||
|
||||
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap;
|
||||
@ -117,6 +123,8 @@ private:
|
||||
|
||||
extern SymbolTable *Symtab;
|
||||
|
||||
std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex);
|
||||
|
||||
} // namespace coff
|
||||
} // namespace lld
|
||||
|
||||
|
||||
9
deps/lld/COFF/Symbols.cpp
vendored
9
deps/lld/COFF/Symbols.cpp
vendored
@ -54,7 +54,7 @@ InputFile *Symbol::getFile() {
|
||||
|
||||
bool Symbol::isLive() const {
|
||||
if (auto *R = dyn_cast<DefinedRegular>(this))
|
||||
return R->getChunk()->isLive();
|
||||
return R->getChunk()->Live;
|
||||
if (auto *Imp = dyn_cast<DefinedImportData>(this))
|
||||
return Imp->File->Live;
|
||||
if (auto *Imp = dyn_cast<DefinedImportThunk>(this))
|
||||
@ -63,6 +63,13 @@ bool Symbol::isLive() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// MinGW specific.
|
||||
void Symbol::replaceKeepingName(Symbol *Other, size_t Size) {
|
||||
StringRef OrigName = Name;
|
||||
memcpy(this, Other, Size);
|
||||
Name = OrigName;
|
||||
}
|
||||
|
||||
COFFSymbolRef DefinedCOFF::getCOFFSymbol() {
|
||||
size_t SymSize = cast<ObjFile>(File)->getCOFFObj()->getSymbolTableEntrySize();
|
||||
if (SymSize == sizeof(coff_symbol16))
|
||||
|
||||
14
deps/lld/COFF/Symbols.h
vendored
14
deps/lld/COFF/Symbols.h
vendored
@ -39,9 +39,9 @@ class Symbol {
|
||||
public:
|
||||
enum Kind {
|
||||
// The order of these is significant. We start with the regular defined
|
||||
// symbols as those are the most prevelant and the zero tag is the cheapest
|
||||
// symbols as those are the most prevalent and the zero tag is the cheapest
|
||||
// to set. Among the defined kinds, the lower the kind is preferred over
|
||||
// the higher kind when testing wether one symbol should take precedence
|
||||
// the higher kind when testing whether one symbol should take precedence
|
||||
// over another.
|
||||
DefinedRegularKind = 0,
|
||||
DefinedCommonKind,
|
||||
@ -66,6 +66,8 @@ public:
|
||||
// Returns the symbol name.
|
||||
StringRef getName();
|
||||
|
||||
void replaceKeepingName(Symbol *Other, size_t Size);
|
||||
|
||||
// Returns the file from which this symbol was created.
|
||||
InputFile *getFile();
|
||||
|
||||
@ -78,7 +80,7 @@ protected:
|
||||
explicit Symbol(Kind K, StringRef N = "")
|
||||
: SymbolKind(K), IsExternal(true), IsCOMDAT(false),
|
||||
WrittenToSymtab(false), PendingArchiveLoad(false), IsGCRoot(false),
|
||||
Name(N) {}
|
||||
IsRuntimePseudoReloc(false), Name(N) {}
|
||||
|
||||
const unsigned SymbolKind : 8;
|
||||
unsigned IsExternal : 1;
|
||||
@ -102,6 +104,8 @@ public:
|
||||
/// True if we've already added this symbol to the list of GC roots.
|
||||
unsigned IsGCRoot : 1;
|
||||
|
||||
unsigned IsRuntimePseudoReloc : 1;
|
||||
|
||||
protected:
|
||||
StringRef Name;
|
||||
};
|
||||
@ -331,8 +335,8 @@ private:
|
||||
Chunk *Data;
|
||||
};
|
||||
|
||||
// If you have a symbol "__imp_foo" in your object file, a symbol name
|
||||
// "foo" becomes automatically available as a pointer to "__imp_foo".
|
||||
// If you have a symbol "foo" in your object file, a symbol name
|
||||
// "__imp_foo" becomes automatically available as a pointer to "foo".
|
||||
// This class is for such automatically-created symbols.
|
||||
// Yes, this is an odd feature. We didn't intend to implement that.
|
||||
// This is here just for compatibility with MSVC.
|
||||
|
||||
751
deps/lld/COFF/Writer.cpp
vendored
751
deps/lld/COFF/Writer.cpp
vendored
File diff suppressed because it is too large
Load Diff
6
deps/lld/COFF/Writer.h
vendored
6
deps/lld/COFF/Writer.h
vendored
@ -34,8 +34,8 @@ public:
|
||||
Header.Characteristics = Chars;
|
||||
}
|
||||
void addChunk(Chunk *C);
|
||||
void insertChunkAtStart(Chunk *C);
|
||||
void merge(OutputSection *Other);
|
||||
ArrayRef<Chunk *> getChunks() { return Chunks; }
|
||||
void addPermissions(uint32_t C);
|
||||
void setPermissions(uint32_t C);
|
||||
uint64_t getRVA() { return Header.VirtualAddress; }
|
||||
@ -62,9 +62,11 @@ public:
|
||||
llvm::StringRef Name;
|
||||
llvm::object::coff_section Header = {};
|
||||
|
||||
std::vector<Chunk *> Chunks;
|
||||
std::vector<Chunk *> OrigChunks;
|
||||
|
||||
private:
|
||||
uint32_t StringTableOff = 0;
|
||||
std::vector<Chunk *> Chunks;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
9
deps/lld/Common/Args.cpp
vendored
9
deps/lld/Common/Args.cpp
vendored
@ -13,6 +13,7 @@
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Option/ArgList.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace lld;
|
||||
@ -40,7 +41,7 @@ std::vector<StringRef> lld::args::getStrings(opt::InputArgList &Args, int Id) {
|
||||
|
||||
uint64_t lld::args::getZOptionValue(opt::InputArgList &Args, int Id,
|
||||
StringRef Key, uint64_t Default) {
|
||||
for (auto *Arg : Args.filtered(Id)) {
|
||||
for (auto *Arg : Args.filtered_reverse(Id)) {
|
||||
std::pair<StringRef, StringRef> KV = StringRef(Arg->getValue()).split('=');
|
||||
if (KV.first == Key) {
|
||||
uint64_t Result = Default;
|
||||
@ -64,3 +65,9 @@ std::vector<StringRef> lld::args::getLines(MemoryBufferRef MB) {
|
||||
}
|
||||
return Ret;
|
||||
}
|
||||
|
||||
StringRef lld::args::getFilenameWithoutExe(StringRef Path) {
|
||||
if (Path.endswith_lower(".exe"))
|
||||
return sys::path::stem(Path);
|
||||
return sys::path::filename(Path);
|
||||
}
|
||||
|
||||
5
deps/lld/Common/ErrorHandler.cpp
vendored
5
deps/lld/Common/ErrorHandler.cpp
vendored
@ -47,8 +47,9 @@ ErrorHandler &lld::errorHandler() {
|
||||
}
|
||||
|
||||
void lld::exitLld(int Val) {
|
||||
// Delete the output buffer so that any tempory file is deleted.
|
||||
errorHandler().OutputBuffer.reset();
|
||||
// Delete any temporary file, while keeping the memory mapping open.
|
||||
if (errorHandler().OutputBuffer)
|
||||
errorHandler().OutputBuffer->discard();
|
||||
|
||||
// Dealloc/destroy ManagedStatic variables before calling
|
||||
// _exit(). In a non-LTO build, this is a nop. In an LTO
|
||||
|
||||
33
deps/lld/Common/Strings.cpp
vendored
33
deps/lld/Common/Strings.cpp
vendored
@ -16,14 +16,6 @@
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <Windows.h>
|
||||
|
||||
// DbgHelp.h must be included after Windows.h.
|
||||
#include <DbgHelp.h>
|
||||
#pragma comment(lib, "dbghelp.lib")
|
||||
#endif
|
||||
|
||||
using namespace llvm;
|
||||
using namespace lld;
|
||||
|
||||
@ -45,18 +37,21 @@ Optional<std::string> lld::demangleItanium(StringRef Name) {
|
||||
return S;
|
||||
}
|
||||
|
||||
Optional<std::string> lld::demangleMSVC(StringRef S) {
|
||||
#if defined(_MSC_VER)
|
||||
// UnDecorateSymbolName is not thread-safe, so we need a mutex.
|
||||
static std::mutex Mu;
|
||||
std::lock_guard<std::mutex> Lock(Mu);
|
||||
Optional<std::string> lld::demangleMSVC(StringRef Name) {
|
||||
std::string Prefix;
|
||||
if (Name.consume_front("__imp_"))
|
||||
Prefix = "__declspec(dllimport) ";
|
||||
|
||||
char Buf[4096];
|
||||
if (S.startswith("?"))
|
||||
if (size_t Len = UnDecorateSymbolName(S.str().c_str(), Buf, sizeof(Buf), 0))
|
||||
return std::string(Buf, Len);
|
||||
#endif
|
||||
return None;
|
||||
// Demangle only C++ names.
|
||||
if (!Name.startswith("?"))
|
||||
return None;
|
||||
|
||||
char *Buf = microsoftDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
|
||||
if (!Buf)
|
||||
return None;
|
||||
std::string S(Buf);
|
||||
free(Buf);
|
||||
return Prefix + S;
|
||||
}
|
||||
|
||||
StringMatcher::StringMatcher(ArrayRef<StringRef> Pat) {
|
||||
|
||||
@ -32,3 +32,4 @@ llvm::Optional<llvm::CodeModel::Model> lld::GetCodeModelFromCMModel() {
|
||||
}
|
||||
|
||||
std::string lld::GetCPUStr() { return ::getCPUStr(); }
|
||||
std::vector<std::string> lld::GetMAttrs() { return ::MAttrs; }
|
||||
|
||||
27
deps/lld/ELF/AArch64ErrataFix.cpp
vendored
27
deps/lld/ELF/AArch64ErrataFix.cpp
vendored
@ -356,7 +356,7 @@ static uint64_t scanCortexA53Errata843419(InputSection *IS, uint64_t &Off,
|
||||
}
|
||||
|
||||
uint64_t PatchOff = 0;
|
||||
const uint8_t *Buf = IS->Data.begin();
|
||||
const uint8_t *Buf = IS->data().begin();
|
||||
const ulittle32_t *InstBuf = reinterpret_cast<const ulittle32_t *>(Buf + Off);
|
||||
uint32_t Instr1 = *InstBuf++;
|
||||
uint32_t Instr2 = *InstBuf++;
|
||||
@ -411,7 +411,7 @@ uint64_t lld::elf::Patch843419Section::getLDSTAddr() const {
|
||||
void lld::elf::Patch843419Section::writeTo(uint8_t *Buf) {
|
||||
// Copy the instruction that we will be replacing with a branch in the
|
||||
// Patchee Section.
|
||||
write32le(Buf, read32le(Patchee->Data.begin() + PatcheeOffset));
|
||||
write32le(Buf, read32le(Patchee->data().begin() + PatcheeOffset));
|
||||
|
||||
// Apply any relocation transferred from the original PatcheeSection.
|
||||
// For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc
|
||||
@ -451,7 +451,7 @@ void AArch64Err843419Patcher::init() {
|
||||
continue;
|
||||
if (!IsCodeMapSymbol(Def) && !IsDataMapSymbol(Def))
|
||||
continue;
|
||||
if (auto *Sec = dyn_cast<InputSection>(Def->Section))
|
||||
if (auto *Sec = dyn_cast_or_null<InputSection>(Def->Section))
|
||||
if (Sec->Flags & SHF_EXECINSTR)
|
||||
SectionMap[Sec].push_back(Def);
|
||||
}
|
||||
@ -487,7 +487,8 @@ void AArch64Err843419Patcher::insertPatches(
|
||||
InputSectionDescription &ISD, std::vector<Patch843419Section *> &Patches) {
|
||||
uint64_t ISLimit;
|
||||
uint64_t PrevISLimit = ISD.Sections.front()->OutSecOff;
|
||||
uint64_t PatchUpperBound = PrevISLimit + Target->ThunkSectionSpacing;
|
||||
uint64_t PatchUpperBound = PrevISLimit + Target->getThunkSectionSpacing();
|
||||
uint64_t OutSecAddr = ISD.Sections.front()->getParent()->Addr;
|
||||
|
||||
// Set the OutSecOff of patches to the place where we want to insert them.
|
||||
// We use a similar strategy to Thunk placement. Place patches roughly
|
||||
@ -498,12 +499,12 @@ void AArch64Err843419Patcher::insertPatches(
|
||||
ISLimit = IS->OutSecOff + IS->getSize();
|
||||
if (ISLimit > PatchUpperBound) {
|
||||
while (PatchIt != PatchEnd) {
|
||||
if ((*PatchIt)->getLDSTAddr() >= PrevISLimit)
|
||||
if ((*PatchIt)->getLDSTAddr() - OutSecAddr >= PrevISLimit)
|
||||
break;
|
||||
(*PatchIt)->OutSecOff = PrevISLimit;
|
||||
++PatchIt;
|
||||
}
|
||||
PatchUpperBound = PrevISLimit + Target->ThunkSectionSpacing;
|
||||
PatchUpperBound = PrevISLimit + Target->getThunkSectionSpacing();
|
||||
}
|
||||
PrevISLimit = ISLimit;
|
||||
}
|
||||
@ -538,20 +539,24 @@ static void implementPatch(uint64_t AdrpAddr, uint64_t PatcheeOffset,
|
||||
InputSection *IS,
|
||||
std::vector<Patch843419Section *> &Patches) {
|
||||
// There may be a relocation at the same offset that we are patching. There
|
||||
// are three cases that we need to consider.
|
||||
// are four cases that we need to consider.
|
||||
// Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this
|
||||
// instance of the erratum on a previous patch and altered the relocation. We
|
||||
// have nothing more to do.
|
||||
// Case 2: A load/store register (unsigned immediate) class relocation. There
|
||||
// Case 2: A TLS Relaxation R_RELAX_TLS_IE_TO_LE. In this case the ADRP that
|
||||
// we read will be transformed into a MOVZ later so we actually don't match
|
||||
// the sequence and have nothing more to do.
|
||||
// Case 3: A load/store register (unsigned immediate) class relocation. There
|
||||
// are two of these R_AARCH_LD64_ABS_LO12_NC and R_AARCH_LD64_GOT_LO12_NC and
|
||||
// they are both absolute. We need to add the same relocation to the patch,
|
||||
// and replace the relocation with a R_AARCH_JUMP26 branch relocation.
|
||||
// Case 3: No relocation. We must create a new R_AARCH64_JUMP26 branch
|
||||
// Case 4: No relocation. We must create a new R_AARCH64_JUMP26 branch
|
||||
// relocation at the offset.
|
||||
auto RelIt = std::find_if(
|
||||
IS->Relocations.begin(), IS->Relocations.end(),
|
||||
[=](const Relocation &R) { return R.Offset == PatcheeOffset; });
|
||||
if (RelIt != IS->Relocations.end() && RelIt->Type == R_AARCH64_JUMP26)
|
||||
if (RelIt != IS->Relocations.end() &&
|
||||
(RelIt->Type == R_AARCH64_JUMP26 || RelIt->Expr == R_RELAX_TLS_IE_TO_LE))
|
||||
return;
|
||||
|
||||
log("detected cortex-a53-843419 erratum sequence starting at " +
|
||||
@ -598,7 +603,7 @@ AArch64Err843419Patcher::patchInputSectionDescription(
|
||||
auto DataSym = std::next(CodeSym);
|
||||
uint64_t Off = (*CodeSym)->Value;
|
||||
uint64_t Limit =
|
||||
(DataSym == MapSyms.end()) ? IS->Data.size() : (*DataSym)->Value;
|
||||
(DataSym == MapSyms.end()) ? IS->data().size() : (*DataSym)->Value;
|
||||
|
||||
while (Off < Limit) {
|
||||
uint64_t StartAddr = IS->getVA(Off);
|
||||
|
||||
37
deps/lld/ELF/Arch/AArch64.cpp
vendored
37
deps/lld/ELF/Arch/AArch64.cpp
vendored
@ -41,6 +41,7 @@ public:
|
||||
int32_t Index, unsigned RelOff) const override;
|
||||
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
uint64_t BranchAddr, const Symbol &S) const override;
|
||||
uint32_t getThunkSectionSpacing() const override;
|
||||
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
|
||||
bool usesOnlyLowPageBits(RelType Type) const override;
|
||||
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
@ -57,6 +58,7 @@ AArch64::AArch64() {
|
||||
RelativeRel = R_AARCH64_RELATIVE;
|
||||
IRelativeRel = R_AARCH64_IRELATIVE;
|
||||
GotRel = R_AARCH64_GLOB_DAT;
|
||||
NoneRel = R_AARCH64_NONE;
|
||||
PltRel = R_AARCH64_JUMP_SLOT;
|
||||
TlsDescRel = R_AARCH64_TLSDESC;
|
||||
TlsGotRel = R_AARCH64_TLS_TPREL64;
|
||||
@ -66,22 +68,18 @@ AArch64::AArch64() {
|
||||
PltHeaderSize = 32;
|
||||
DefaultMaxPageSize = 65536;
|
||||
|
||||
// It doesn't seem to be documented anywhere, but tls on aarch64 uses variant
|
||||
// 1 of the tls structures and the tcb size is 16.
|
||||
TcbSize = 16;
|
||||
NeedsThunks = true;
|
||||
// Align to the 2 MiB page size (known as a superpage or huge page).
|
||||
// FreeBSD automatically promotes 2 MiB-aligned allocations.
|
||||
DefaultImageBase = 0x200000;
|
||||
|
||||
// See comment in Arch/ARM.cpp for a more detailed explanation of
|
||||
// ThunkSectionSpacing. For AArch64 the only branches we are permitted to
|
||||
// Thunk have a range of +/- 128 MiB
|
||||
ThunkSectionSpacing = (128 * 1024 * 1024) - 0x30000;
|
||||
NeedsThunks = true;
|
||||
}
|
||||
|
||||
RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const {
|
||||
switch (Type) {
|
||||
case R_AARCH64_TLSDESC_ADR_PAGE21:
|
||||
return R_TLSDESC_PAGE;
|
||||
return R_AARCH64_TLSDESC_PAGE;
|
||||
case R_AARCH64_TLSDESC_LD64_LO12:
|
||||
case R_AARCH64_TLSDESC_ADD_LO12:
|
||||
return R_TLSDESC;
|
||||
@ -107,13 +105,13 @@ RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
|
||||
case R_AARCH64_LD_PREL_LO19:
|
||||
return R_PC;
|
||||
case R_AARCH64_ADR_PREL_PG_HI21:
|
||||
return R_PAGE_PC;
|
||||
return R_AARCH64_PAGE_PC;
|
||||
case R_AARCH64_LD64_GOT_LO12_NC:
|
||||
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
|
||||
return R_GOT;
|
||||
case R_AARCH64_ADR_GOT_PAGE:
|
||||
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
|
||||
return R_GOT_PAGE_PC;
|
||||
return R_AARCH64_GOT_PAGE_PC;
|
||||
case R_AARCH64_NONE:
|
||||
return R_NONE;
|
||||
default:
|
||||
@ -125,7 +123,7 @@ RelExpr AArch64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
|
||||
RelExpr Expr) const {
|
||||
if (Expr == R_RELAX_TLS_GD_TO_IE) {
|
||||
if (Type == R_AARCH64_TLSDESC_ADR_PAGE21)
|
||||
return R_RELAX_TLS_GD_TO_IE_PAGE_PC;
|
||||
return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
|
||||
return R_RELAX_TLS_GD_TO_IE_ABS;
|
||||
}
|
||||
return Expr;
|
||||
@ -156,7 +154,7 @@ RelType AArch64::getDynRel(RelType Type) const {
|
||||
}
|
||||
|
||||
void AArch64::writeGotPlt(uint8_t *Buf, const Symbol &) const {
|
||||
write64le(Buf, InX::Plt->getVA());
|
||||
write64le(Buf, In.Plt->getVA());
|
||||
}
|
||||
|
||||
void AArch64::writePltHeader(uint8_t *Buf) const {
|
||||
@ -172,8 +170,8 @@ void AArch64::writePltHeader(uint8_t *Buf) const {
|
||||
};
|
||||
memcpy(Buf, PltData, sizeof(PltData));
|
||||
|
||||
uint64_t Got = InX::GotPlt->getVA();
|
||||
uint64_t Plt = InX::Plt->getVA();
|
||||
uint64_t Got = In.GotPlt->getVA();
|
||||
uint64_t Plt = In.Plt->getVA();
|
||||
relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
|
||||
getAArch64Page(Got + 16) - getAArch64Page(Plt + 4));
|
||||
relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16);
|
||||
@ -208,6 +206,13 @@ bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
return !inBranchRange(Type, BranchAddr, Dst);
|
||||
}
|
||||
|
||||
uint32_t AArch64::getThunkSectionSpacing() const {
|
||||
// See comment in Arch/ARM.cpp for a more detailed explanation of
|
||||
// getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
|
||||
// Thunk have a range of +/- 128 MiB
|
||||
return (128 * 1024 * 1024) - 0x30000;
|
||||
}
|
||||
|
||||
bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
|
||||
if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
|
||||
return true;
|
||||
@ -338,7 +343,7 @@ void AArch64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
or32le(Loc, (Val & 0xFFFC) << 3);
|
||||
break;
|
||||
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
|
||||
checkInt(Loc, Val, 24, Type);
|
||||
checkUInt(Loc, Val, 24, Type);
|
||||
or32AArch64Imm(Loc, Val >> 12);
|
||||
break;
|
||||
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
|
||||
|
||||
1
deps/lld/ELF/Arch/AMDGPU.cpp
vendored
1
deps/lld/ELF/Arch/AMDGPU.cpp
vendored
@ -35,6 +35,7 @@ public:
|
||||
AMDGPU::AMDGPU() {
|
||||
RelativeRel = R_AMDGPU_RELATIVE64;
|
||||
GotRel = R_AMDGPU_ABS64;
|
||||
NoneRel = R_AMDGPU_NONE;
|
||||
GotEntrySize = 8;
|
||||
}
|
||||
|
||||
|
||||
124
deps/lld/ELF/Arch/ARM.cpp
vendored
124
deps/lld/ELF/Arch/ARM.cpp
vendored
@ -40,6 +40,7 @@ public:
|
||||
void addPltHeaderSymbols(InputSection &ISD) const override;
|
||||
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
uint64_t BranchAddr, const Symbol &S) const override;
|
||||
uint32_t getThunkSectionSpacing() const override;
|
||||
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
|
||||
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
};
|
||||
@ -50,6 +51,7 @@ ARM::ARM() {
|
||||
RelativeRel = R_ARM_RELATIVE;
|
||||
IRelativeRel = R_ARM_IRELATIVE;
|
||||
GotRel = R_ARM_GLOB_DAT;
|
||||
NoneRel = R_ARM_NONE;
|
||||
PltRel = R_ARM_JUMP_SLOT;
|
||||
TlsGotRel = R_ARM_TLS_TPOFF32;
|
||||
TlsModuleIndexRel = R_ARM_TLS_DTPMOD32;
|
||||
@ -59,41 +61,8 @@ ARM::ARM() {
|
||||
GotPltEntrySize = 4;
|
||||
PltEntrySize = 16;
|
||||
PltHeaderSize = 32;
|
||||
TrapInstr = 0xd4d4d4d4;
|
||||
// ARM uses Variant 1 TLS
|
||||
TcbSize = 8;
|
||||
TrapInstr = {0xd4, 0xd4, 0xd4, 0xd4};
|
||||
NeedsThunks = true;
|
||||
|
||||
// The placing of pre-created ThunkSections is controlled by the
|
||||
// ThunkSectionSpacing parameter. The aim is to place the
|
||||
// ThunkSection such that all branches from the InputSections prior to the
|
||||
// ThunkSection can reach a Thunk placed at the end of the ThunkSection.
|
||||
// Graphically:
|
||||
// | up to ThunkSectionSpacing .text input sections |
|
||||
// | ThunkSection |
|
||||
// | up to ThunkSectionSpacing .text input sections |
|
||||
// | ThunkSection |
|
||||
|
||||
// Pre-created ThunkSections are spaced roughly 16MiB apart on ARM. This is to
|
||||
// match the most common expected case of a Thumb 2 encoded BL, BLX or B.W
|
||||
// ARM B, BL, BLX range +/- 32MiB
|
||||
// Thumb B.W, BL, BLX range +/- 16MiB
|
||||
// Thumb B<cc>.W range +/- 1MiB
|
||||
// If a branch cannot reach a pre-created ThunkSection a new one will be
|
||||
// created so we can handle the rare cases of a Thumb 2 conditional branch.
|
||||
// We intentionally use a lower size for ThunkSectionSpacing than the maximum
|
||||
// branch range so the end of the ThunkSection is more likely to be within
|
||||
// range of the branch instruction that is furthest away. The value we shorten
|
||||
// ThunkSectionSpacing by is set conservatively to allow us to create 16,384
|
||||
// 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
|
||||
// one of the Thunks going out of range.
|
||||
|
||||
// FIXME: lld assumes that the Thumb BL and BLX encoding permits the J1 and
|
||||
// J2 bits to be used to extend the branch range. On earlier Architectures
|
||||
// such as ARMv4, ARMv5 and ARMv6 (except ARMv6T2) the range is +/- 4MiB. If
|
||||
// support for the earlier encodings is added then when they are used the
|
||||
// ThunkSectionSpacing will need lowering.
|
||||
ThunkSectionSpacing = 0x1000000 - 0x30000;
|
||||
}
|
||||
|
||||
uint32_t ARM::calcEFlags() const {
|
||||
@ -165,6 +134,12 @@ RelExpr ARM::getRelExpr(RelType Type, const Symbol &S,
|
||||
return R_NONE;
|
||||
case R_ARM_TLS_LE32:
|
||||
return R_TLS;
|
||||
case R_ARM_V4BX:
|
||||
// V4BX is just a marker to indicate there's a "bx rN" instruction at the
|
||||
// given address. It can be used to implement a special linker mode which
|
||||
// rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and
|
||||
// not ARMv4 output, we can just ignore it.
|
||||
return R_HINT;
|
||||
default:
|
||||
return R_ABS;
|
||||
}
|
||||
@ -177,7 +152,7 @@ RelType ARM::getDynRel(RelType Type) const {
|
||||
}
|
||||
|
||||
void ARM::writeGotPlt(uint8_t *Buf, const Symbol &) const {
|
||||
write32le(Buf, InX::Plt->getVA());
|
||||
write32le(Buf, In.Plt->getVA());
|
||||
}
|
||||
|
||||
void ARM::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
|
||||
@ -198,8 +173,8 @@ static void writePltHeaderLong(uint8_t *Buf) {
|
||||
0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
|
||||
0xd4, 0xd4, 0xd4, 0xd4};
|
||||
memcpy(Buf, PltData, sizeof(PltData));
|
||||
uint64_t GotPlt = InX::GotPlt->getVA();
|
||||
uint64_t L1 = InX::Plt->getVA() + 8;
|
||||
uint64_t GotPlt = In.GotPlt->getVA();
|
||||
uint64_t L1 = In.Plt->getVA() + 8;
|
||||
write32le(Buf + 16, GotPlt - L1 - 8);
|
||||
}
|
||||
|
||||
@ -217,7 +192,7 @@ void ARM::writePltHeader(uint8_t *Buf) const {
|
||||
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
|
||||
};
|
||||
|
||||
uint64_t Offset = InX::GotPlt->getVA() - InX::Plt->getVA() - 4;
|
||||
uint64_t Offset = In.GotPlt->getVA() - In.Plt->getVA() - 4;
|
||||
if (!llvm::isUInt<27>(Offset)) {
|
||||
// We cannot encode the Offset, use the long form.
|
||||
writePltHeaderLong(Buf);
|
||||
@ -227,10 +202,10 @@ void ARM::writePltHeader(uint8_t *Buf) const {
|
||||
write32le(Buf + 4, PltData[1] | ((Offset >> 20) & 0xff));
|
||||
write32le(Buf + 8, PltData[2] | ((Offset >> 12) & 0xff));
|
||||
write32le(Buf + 12, PltData[3] | (Offset & 0xfff));
|
||||
write32le(Buf + 16, TrapInstr); // Pad to 32-byte boundary
|
||||
write32le(Buf + 20, TrapInstr);
|
||||
write32le(Buf + 24, TrapInstr);
|
||||
write32le(Buf + 28, TrapInstr);
|
||||
memcpy(Buf + 16, TrapInstr.data(), 4); // Pad to 32-byte boundary
|
||||
memcpy(Buf + 20, TrapInstr.data(), 4);
|
||||
memcpy(Buf + 24, TrapInstr.data(), 4);
|
||||
memcpy(Buf + 28, TrapInstr.data(), 4);
|
||||
}
|
||||
|
||||
void ARM::addPltHeaderSymbols(InputSection &IS) const {
|
||||
@ -279,7 +254,7 @@ void ARM::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
write32le(Buf + 0, PltData[0] | ((Offset >> 20) & 0xff));
|
||||
write32le(Buf + 4, PltData[1] | ((Offset >> 12) & 0xff));
|
||||
write32le(Buf + 8, PltData[2] | (Offset & 0xfff));
|
||||
write32le(Buf + 12, TrapInstr); // Pad to 16-byte boundary
|
||||
memcpy(Buf + 12, TrapInstr.data(), 4); // Pad to 16-byte boundary
|
||||
}
|
||||
|
||||
void ARM::addPltSymbols(InputSection &IS, uint64_t Off) const {
|
||||
@ -324,6 +299,40 @@ bool ARM::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t ARM::getThunkSectionSpacing() const {
|
||||
// The placing of pre-created ThunkSections is controlled by the value
|
||||
// ThunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to
|
||||
// place the ThunkSection such that all branches from the InputSections
|
||||
// prior to the ThunkSection can reach a Thunk placed at the end of the
|
||||
// ThunkSection. Graphically:
|
||||
// | up to ThunkSectionSpacing .text input sections |
|
||||
// | ThunkSection |
|
||||
// | up to ThunkSectionSpacing .text input sections |
|
||||
// | ThunkSection |
|
||||
|
||||
// Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This
|
||||
// is to match the most common expected case of a Thumb 2 encoded BL, BLX or
|
||||
// B.W:
|
||||
// ARM B, BL, BLX range +/- 32MiB
|
||||
// Thumb B.W, BL, BLX range +/- 16MiB
|
||||
// Thumb B<cc>.W range +/- 1MiB
|
||||
// If a branch cannot reach a pre-created ThunkSection a new one will be
|
||||
// created so we can handle the rare cases of a Thumb 2 conditional branch.
|
||||
// We intentionally use a lower size for ThunkSectionSpacing than the maximum
|
||||
// branch range so the end of the ThunkSection is more likely to be within
|
||||
// range of the branch instruction that is furthest away. The value we shorten
|
||||
// ThunkSectionSpacing by is set conservatively to allow us to create 16,384
|
||||
// 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
|
||||
// one of the Thunks going out of range.
|
||||
|
||||
// On Arm the ThunkSectionSpacing depends on the range of the Thumb Branch
|
||||
// range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except
|
||||
// ARMv6T2) the range is +/- 4MiB.
|
||||
|
||||
return (Config->ARMJ1J2BranchEncoding) ? 0x1000000 - 0x30000
|
||||
: 0x400000 - 0x7500;
|
||||
}
|
||||
|
||||
bool ARM::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
|
||||
uint64_t Range;
|
||||
uint64_t InstrSize;
|
||||
@ -342,7 +351,7 @@ bool ARM::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
|
||||
break;
|
||||
case R_ARM_THM_JUMP24:
|
||||
case R_ARM_THM_CALL:
|
||||
Range = 0x1000000;
|
||||
Range = Config->ARMJ1J2BranchEncoding ? 0x1000000 : 0x400000;
|
||||
InstrSize = 2;
|
||||
break;
|
||||
default:
|
||||
@ -447,11 +456,23 @@ void ARM::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
}
|
||||
// Bit 12 is 0 for BLX, 1 for BL
|
||||
write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12);
|
||||
if (!Config->ARMJ1J2BranchEncoding) {
|
||||
// Older Arm architectures do not support R_ARM_THM_JUMP24 and have
|
||||
// different encoding rules and range due to J1 and J2 always being 1.
|
||||
checkInt(Loc, Val, 23, Type);
|
||||
write16le(Loc,
|
||||
0xf000 | // opcode
|
||||
((Val >> 12) & 0x07ff)); // imm11
|
||||
write16le(Loc + 2,
|
||||
(read16le(Loc + 2) & 0xd000) | // opcode
|
||||
0x2800 | // J1 == J2 == 1
|
||||
((Val >> 1) & 0x07ff)); // imm11
|
||||
break;
|
||||
}
|
||||
// Fall through as rest of encoding is the same as B.W
|
||||
LLVM_FALLTHROUGH;
|
||||
case R_ARM_THM_JUMP24:
|
||||
// Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
|
||||
// FIXME: Use of I1 and I2 require v6T2ops
|
||||
checkInt(Loc, Val, 25, Type);
|
||||
write16le(Loc,
|
||||
0xf000 | // opcode
|
||||
@ -470,14 +491,12 @@ void ARM::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
break;
|
||||
case R_ARM_MOVT_ABS:
|
||||
case R_ARM_MOVT_PREL:
|
||||
checkInt(Loc, Val, 32, Type);
|
||||
write32le(Loc, (read32le(Loc) & ~0x000f0fff) |
|
||||
(((Val >> 16) & 0xf000) << 4) | ((Val >> 16) & 0xfff));
|
||||
break;
|
||||
case R_ARM_THM_MOVT_ABS:
|
||||
case R_ARM_THM_MOVT_PREL:
|
||||
// Encoding T1: A = imm4:i:imm3:imm8
|
||||
checkInt(Loc, Val, 32, Type);
|
||||
write16le(Loc,
|
||||
0xf2c0 | // opcode
|
||||
((Val >> 17) & 0x0400) | // i
|
||||
@ -542,10 +561,19 @@ int64_t ARM::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
|
||||
((Lo & 0x07ff) << 1)); // imm11:0
|
||||
}
|
||||
case R_ARM_THM_CALL:
|
||||
if (!Config->ARMJ1J2BranchEncoding) {
|
||||
// Older Arm architectures do not support R_ARM_THM_JUMP24 and have
|
||||
// different encoding rules and range due to J1 and J2 always being 1.
|
||||
uint16_t Hi = read16le(Buf);
|
||||
uint16_t Lo = read16le(Buf + 2);
|
||||
return SignExtend64<22>(((Hi & 0x7ff) << 12) | // imm11
|
||||
((Lo & 0x7ff) << 1)); // imm11:0
|
||||
break;
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case R_ARM_THM_JUMP24: {
|
||||
// Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0
|
||||
// I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)
|
||||
// FIXME: I1 and I2 require v6T2ops
|
||||
uint16_t Hi = read16le(Buf);
|
||||
uint16_t Lo = read16le(Buf + 2);
|
||||
return SignExtend64<24>(((Hi & 0x0400) << 14) | // S
|
||||
|
||||
3
deps/lld/ELF/Arch/AVR.cpp
vendored
3
deps/lld/ELF/Arch/AVR.cpp
vendored
@ -43,12 +43,15 @@ using namespace lld::elf;
|
||||
namespace {
|
||||
class AVR final : public TargetInfo {
|
||||
public:
|
||||
AVR();
|
||||
RelExpr getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const override;
|
||||
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
AVR::AVR() { NoneRel = R_AVR_NONE; }
|
||||
|
||||
RelExpr AVR::getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const {
|
||||
return R_ABS;
|
||||
|
||||
195
deps/lld/ELF/Arch/Hexagon.cpp
vendored
195
deps/lld/ELF/Arch/Hexagon.cpp
vendored
@ -9,6 +9,7 @@
|
||||
|
||||
#include "InputFiles.h"
|
||||
#include "Symbols.h"
|
||||
#include "SyntheticSections.h"
|
||||
#include "Target.h"
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
@ -25,15 +26,48 @@ using namespace lld::elf;
|
||||
namespace {
|
||||
class Hexagon final : public TargetInfo {
|
||||
public:
|
||||
Hexagon();
|
||||
uint32_t calcEFlags() const override;
|
||||
RelExpr getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const override;
|
||||
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
void writePltHeader(uint8_t *Buf) const override;
|
||||
void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
|
||||
int32_t Index, unsigned RelOff) const override;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// Support V60 only at the moment.
|
||||
uint32_t Hexagon::calcEFlags() const { return 0x60; }
|
||||
Hexagon::Hexagon() {
|
||||
PltRel = R_HEX_JMP_SLOT;
|
||||
RelativeRel = R_HEX_RELATIVE;
|
||||
GotRel = R_HEX_GLOB_DAT;
|
||||
GotEntrySize = 4;
|
||||
// The zero'th GOT entry is reserved for the address of _DYNAMIC. The
|
||||
// next 3 are reserved for the dynamic loader.
|
||||
GotPltHeaderEntriesNum = 4;
|
||||
GotPltEntrySize = 4;
|
||||
|
||||
PltEntrySize = 16;
|
||||
PltHeaderSize = 32;
|
||||
|
||||
// Hexagon Linux uses 64K pages by default.
|
||||
DefaultMaxPageSize = 0x10000;
|
||||
NoneRel = R_HEX_NONE;
|
||||
}
|
||||
|
||||
uint32_t Hexagon::calcEFlags() const {
|
||||
assert(!ObjectFiles.empty());
|
||||
|
||||
// The architecture revision must always be equal to or greater than
|
||||
// greatest revision in the list of inputs.
|
||||
uint32_t Ret = 0;
|
||||
for (InputFile *F : ObjectFiles) {
|
||||
uint32_t EFlags = cast<ObjFile<ELF32LE>>(F)->getObj().getHeader()->e_flags;
|
||||
if (EFlags > Ret)
|
||||
Ret = EFlags;
|
||||
}
|
||||
return Ret;
|
||||
}
|
||||
|
||||
static uint32_t applyMask(uint32_t Mask, uint32_t Data) {
|
||||
uint32_t Result = 0;
|
||||
@ -53,29 +87,143 @@ static uint32_t applyMask(uint32_t Mask, uint32_t Data) {
|
||||
RelExpr Hexagon::getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const {
|
||||
switch (Type) {
|
||||
case R_HEX_B9_PCREL:
|
||||
case R_HEX_B9_PCREL_X:
|
||||
case R_HEX_B13_PCREL:
|
||||
case R_HEX_B15_PCREL:
|
||||
case R_HEX_B15_PCREL_X:
|
||||
case R_HEX_6_PCREL_X:
|
||||
case R_HEX_32_PCREL:
|
||||
return R_PC;
|
||||
case R_HEX_B22_PCREL:
|
||||
case R_HEX_PLT_B22_PCREL:
|
||||
case R_HEX_B22_PCREL_X:
|
||||
case R_HEX_B32_PCREL_X:
|
||||
return R_PC;
|
||||
return R_PLT_PC;
|
||||
case R_HEX_GOT_11_X:
|
||||
case R_HEX_GOT_16_X:
|
||||
case R_HEX_GOT_32_6_X:
|
||||
return R_HEXAGON_GOT;
|
||||
default:
|
||||
return R_ABS;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t findMaskR6(uint32_t Insn) {
|
||||
// There are (arguably too) many relocation masks for the DSP's
|
||||
// R_HEX_6_X type. The table below is used to select the correct mask
|
||||
// for the given instruction.
|
||||
struct InstructionMask {
|
||||
uint32_t CmpMask;
|
||||
uint32_t RelocMask;
|
||||
};
|
||||
|
||||
static const InstructionMask R6[] = {
|
||||
{0x38000000, 0x0000201f}, {0x39000000, 0x0000201f},
|
||||
{0x3e000000, 0x00001f80}, {0x3f000000, 0x00001f80},
|
||||
{0x40000000, 0x000020f8}, {0x41000000, 0x000007e0},
|
||||
{0x42000000, 0x000020f8}, {0x43000000, 0x000007e0},
|
||||
{0x44000000, 0x000020f8}, {0x45000000, 0x000007e0},
|
||||
{0x46000000, 0x000020f8}, {0x47000000, 0x000007e0},
|
||||
{0x6a000000, 0x00001f80}, {0x7c000000, 0x001f2000},
|
||||
{0x9a000000, 0x00000f60}, {0x9b000000, 0x00000f60},
|
||||
{0x9c000000, 0x00000f60}, {0x9d000000, 0x00000f60},
|
||||
{0x9f000000, 0x001f0100}, {0xab000000, 0x0000003f},
|
||||
{0xad000000, 0x0000003f}, {0xaf000000, 0x00030078},
|
||||
{0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
|
||||
{0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};
|
||||
|
||||
// Duplex forms have a fixed mask and parse bits 15:14 are always
|
||||
// zero. Non-duplex insns will always have at least one bit set in the
|
||||
// parse field.
|
||||
if ((0xC000 & Insn) == 0x0)
|
||||
return 0x03f00000;
|
||||
|
||||
for (InstructionMask I : R6)
|
||||
if ((0xff000000 & Insn) == I.CmpMask)
|
||||
return I.RelocMask;
|
||||
|
||||
error("unrecognized instruction for R_HEX_6 relocation: 0x" +
|
||||
utohexstr(Insn));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t findMaskR8(uint32_t Insn) {
|
||||
if ((0xff000000 & Insn) == 0xde000000)
|
||||
return 0x00e020e8;
|
||||
if ((0xff000000 & Insn) == 0x3c000000)
|
||||
return 0x0000207f;
|
||||
return 0x00001fe0;
|
||||
}
|
||||
|
||||
static uint32_t findMaskR11(uint32_t Insn) {
|
||||
if ((0xff000000 & Insn) == 0xa1000000)
|
||||
return 0x060020ff;
|
||||
return 0x06003fe0;
|
||||
}
|
||||
|
||||
static uint32_t findMaskR16(uint32_t Insn) {
|
||||
if ((0xff000000 & Insn) == 0x48000000)
|
||||
return 0x061f20ff;
|
||||
if ((0xff000000 & Insn) == 0x49000000)
|
||||
return 0x061f3fe0;
|
||||
if ((0xff000000 & Insn) == 0x78000000)
|
||||
return 0x00df3fe0;
|
||||
if ((0xff000000 & Insn) == 0xb0000000)
|
||||
return 0x0fe03fe0;
|
||||
|
||||
error("unrecognized instruction for R_HEX_16_X relocation: 0x" +
|
||||
utohexstr(Insn));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); }
|
||||
|
||||
void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
switch (Type) {
|
||||
case R_HEX_NONE:
|
||||
break;
|
||||
case R_HEX_6_PCREL_X:
|
||||
case R_HEX_6_X:
|
||||
or32le(Loc, applyMask(findMaskR6(read32le(Loc)), Val));
|
||||
break;
|
||||
case R_HEX_8_X:
|
||||
or32le(Loc, applyMask(findMaskR8(read32le(Loc)), Val));
|
||||
break;
|
||||
case R_HEX_9_X:
|
||||
or32le(Loc, applyMask(0x00003fe0, Val & 0x3f));
|
||||
break;
|
||||
case R_HEX_10_X:
|
||||
or32le(Loc, applyMask(0x00203fe0, Val & 0x3f));
|
||||
break;
|
||||
case R_HEX_11_X:
|
||||
case R_HEX_GOT_11_X:
|
||||
or32le(Loc, applyMask(findMaskR11(read32le(Loc)), Val & 0x3f));
|
||||
break;
|
||||
case R_HEX_12_X:
|
||||
or32le(Loc, applyMask(0x000007e0, Val));
|
||||
break;
|
||||
case R_HEX_16_X: // These relocs only have 6 effective bits.
|
||||
case R_HEX_GOT_16_X:
|
||||
or32le(Loc, applyMask(findMaskR16(read32le(Loc)), Val & 0x3f));
|
||||
break;
|
||||
case R_HEX_32:
|
||||
case R_HEX_32_PCREL:
|
||||
or32le(Loc, Val);
|
||||
break;
|
||||
case R_HEX_32_6_X:
|
||||
case R_HEX_GOT_32_6_X:
|
||||
or32le(Loc, applyMask(0x0fff3fff, Val >> 6));
|
||||
break;
|
||||
case R_HEX_B9_PCREL:
|
||||
or32le(Loc, applyMask(0x003000fe, Val >> 2));
|
||||
break;
|
||||
case R_HEX_B9_PCREL_X:
|
||||
or32le(Loc, applyMask(0x003000fe, Val & 0x3f));
|
||||
break;
|
||||
case R_HEX_B13_PCREL:
|
||||
or32le(Loc, applyMask(0x00202ffe, Val >> 2));
|
||||
break;
|
||||
case R_HEX_B15_PCREL:
|
||||
or32le(Loc, applyMask(0x00df20fe, Val >> 2));
|
||||
break;
|
||||
@ -83,6 +231,7 @@ void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
or32le(Loc, applyMask(0x00df20fe, Val & 0x3f));
|
||||
break;
|
||||
case R_HEX_B22_PCREL:
|
||||
case R_HEX_PLT_B22_PCREL:
|
||||
or32le(Loc, applyMask(0x1ff3ffe, Val >> 2));
|
||||
break;
|
||||
case R_HEX_B22_PCREL_X:
|
||||
@ -91,12 +240,52 @@ void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
case R_HEX_B32_PCREL_X:
|
||||
or32le(Loc, applyMask(0x0fff3fff, Val >> 6));
|
||||
break;
|
||||
case R_HEX_HI16:
|
||||
or32le(Loc, applyMask(0x00c03fff, Val >> 16));
|
||||
break;
|
||||
case R_HEX_LO16:
|
||||
or32le(Loc, applyMask(0x00c03fff, Val));
|
||||
break;
|
||||
default:
|
||||
error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Hexagon::writePltHeader(uint8_t *Buf) const {
|
||||
const uint8_t PltData[] = {
|
||||
0x00, 0x40, 0x00, 0x00, // { immext (#0)
|
||||
0x1c, 0xc0, 0x49, 0x6a, // r28 = add (pc, ##GOT0@PCREL) } # @GOT0
|
||||
0x0e, 0x42, 0x9c, 0xe2, // { r14 -= add (r28, #16) # offset of GOTn
|
||||
0x4f, 0x40, 0x9c, 0x91, // r15 = memw (r28 + #8) # object ID at GOT2
|
||||
0x3c, 0xc0, 0x9c, 0x91, // r28 = memw (r28 + #4) }# dynamic link at GOT1
|
||||
0x0e, 0x42, 0x0e, 0x8c, // { r14 = asr (r14, #2) # index of PLTn
|
||||
0x00, 0xc0, 0x9c, 0x52, // jumpr r28 } # call dynamic linker
|
||||
0x0c, 0xdb, 0x00, 0x54, // trap0(#0xdb) # bring plt0 into 16byte alignment
|
||||
};
|
||||
memcpy(Buf, PltData, sizeof(PltData));
|
||||
|
||||
// Offset from PLT0 to the GOT.
|
||||
uint64_t Off = In.GotPlt->getVA() - In.Plt->getVA();
|
||||
relocateOne(Buf, R_HEX_B32_PCREL_X, Off);
|
||||
relocateOne(Buf + 4, R_HEX_6_PCREL_X, Off);
|
||||
}
|
||||
|
||||
void Hexagon::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
uint64_t PltEntryAddr, int32_t Index,
|
||||
unsigned RelOff) const {
|
||||
const uint8_t Inst[] = {
|
||||
0x00, 0x40, 0x00, 0x00, // { immext (#0)
|
||||
0x0e, 0xc0, 0x49, 0x6a, // r14 = add (pc, ##GOTn@PCREL) }
|
||||
0x1c, 0xc0, 0x8e, 0x91, // r28 = memw (r14)
|
||||
0x00, 0xc0, 0x9c, 0x52, // jumpr r28
|
||||
};
|
||||
memcpy(Buf, Inst, sizeof(Inst));
|
||||
|
||||
relocateOne(Buf, R_HEX_B32_PCREL_X, GotPltEntryAddr - PltEntryAddr);
|
||||
relocateOne(Buf + 4, R_HEX_6_PCREL_X, GotPltEntryAddr - PltEntryAddr);
|
||||
}
|
||||
|
||||
TargetInfo *elf::getHexagonTargetInfo() {
|
||||
static Hexagon Target;
|
||||
return &Target;
|
||||
|
||||
94
deps/lld/ELF/Arch/MSP430.cpp
vendored
Normal file
94
deps/lld/ELF/Arch/MSP430.cpp
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
//===- MSP430.cpp ---------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Linker
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The MSP430 is a 16-bit microcontroller RISC architecture. The instruction set
|
||||
// has only 27 core instructions orthogonally augmented with a variety
|
||||
// of addressing modes for source and destination operands. Entire address space
|
||||
// of MSP430 is 64KB (the extended MSP430X architecture is not considered here).
|
||||
// A typical MSP430 MCU has several kilobytes of RAM and ROM, plenty
|
||||
// of peripherals and is generally optimized for a low power consumption.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "InputFiles.h"
|
||||
#include "Symbols.h"
|
||||
#include "Target.h"
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "llvm/Object/ELF.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::object;
|
||||
using namespace llvm::support::endian;
|
||||
using namespace llvm::ELF;
|
||||
using namespace lld;
|
||||
using namespace lld::elf;
|
||||
|
||||
namespace {
|
||||
class MSP430 final : public TargetInfo {
|
||||
public:
|
||||
MSP430();
|
||||
RelExpr getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const override;
|
||||
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
MSP430::MSP430() {
|
||||
// mov.b #0, r3
|
||||
TrapInstr = {0x43, 0x43, 0x43, 0x43};
|
||||
}
|
||||
|
||||
RelExpr MSP430::getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const {
|
||||
switch (Type) {
|
||||
case R_MSP430_10_PCREL:
|
||||
case R_MSP430_16_PCREL:
|
||||
case R_MSP430_16_PCREL_BYTE:
|
||||
case R_MSP430_2X_PCREL:
|
||||
case R_MSP430_RL_PCREL:
|
||||
case R_MSP430_SYM_DIFF:
|
||||
return R_PC;
|
||||
default:
|
||||
return R_ABS;
|
||||
}
|
||||
}
|
||||
|
||||
void MSP430::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
switch (Type) {
|
||||
case R_MSP430_8:
|
||||
checkIntUInt(Loc, Val, 8, Type);
|
||||
*Loc = Val;
|
||||
break;
|
||||
case R_MSP430_16:
|
||||
case R_MSP430_16_PCREL:
|
||||
case R_MSP430_16_BYTE:
|
||||
case R_MSP430_16_PCREL_BYTE:
|
||||
checkIntUInt(Loc, Val, 16, Type);
|
||||
write16le(Loc, Val);
|
||||
break;
|
||||
case R_MSP430_32:
|
||||
checkIntUInt(Loc, Val, 32, Type);
|
||||
write32le(Loc, Val);
|
||||
break;
|
||||
case R_MSP430_10_PCREL: {
|
||||
int16_t Offset = ((int16_t)Val >> 1) - 1;
|
||||
checkInt(Loc, Offset, 10, Type);
|
||||
write16le(Loc, (read16le(Loc) & 0xFC00) | (Offset & 0x3FF));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
|
||||
}
|
||||
}
|
||||
|
||||
TargetInfo *elf::getMSP430TargetInfo() {
|
||||
static MSP430 Target;
|
||||
return &Target;
|
||||
}
|
||||
13
deps/lld/ELF/Arch/Mips.cpp
vendored
13
deps/lld/ELF/Arch/Mips.cpp
vendored
@ -53,9 +53,12 @@ template <class ELFT> MIPS<ELFT>::MIPS() {
|
||||
PltEntrySize = 16;
|
||||
PltHeaderSize = 32;
|
||||
CopyRel = R_MIPS_COPY;
|
||||
NoneRel = R_MIPS_NONE;
|
||||
PltRel = R_MIPS_JUMP_SLOT;
|
||||
NeedsThunks = true;
|
||||
TrapInstr = 0xefefefef;
|
||||
|
||||
// Set `sigrie 1` as a trap instruction.
|
||||
write32(TrapInstr.data(), 0x04170001);
|
||||
|
||||
if (ELFT::Is64Bits) {
|
||||
RelativeRel = (R_MIPS_64 << 8) | R_MIPS_REL32;
|
||||
@ -185,7 +188,7 @@ template <class ELFT> RelType MIPS<ELFT>::getDynRel(RelType Type) const {
|
||||
|
||||
template <class ELFT>
|
||||
void MIPS<ELFT>::writeGotPlt(uint8_t *Buf, const Symbol &) const {
|
||||
uint64_t VA = InX::Plt->getVA();
|
||||
uint64_t VA = In.Plt->getVA();
|
||||
if (isMicroMips())
|
||||
VA |= 1;
|
||||
write32<ELFT::TargetEndianness>(Buf, VA);
|
||||
@ -239,8 +242,8 @@ static void writeMicroRelocation16(uint8_t *Loc, uint64_t V, uint8_t BitsSize,
|
||||
template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const {
|
||||
const endianness E = ELFT::TargetEndianness;
|
||||
if (isMicroMips()) {
|
||||
uint64_t GotPlt = InX::GotPlt->getVA();
|
||||
uint64_t Plt = InX::Plt->getVA();
|
||||
uint64_t GotPlt = In.GotPlt->getVA();
|
||||
uint64_t Plt = In.Plt->getVA();
|
||||
// Overwrite trap instructions written by Writer::writeTrapInstr.
|
||||
memset(Buf, 0, PltHeaderSize);
|
||||
|
||||
@ -292,7 +295,7 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const {
|
||||
write32<E>(Buf + 24, JalrInst); // jalr.hb $25 or jalr $25
|
||||
write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2
|
||||
|
||||
uint64_t GotPlt = InX::GotPlt->getVA();
|
||||
uint64_t GotPlt = In.GotPlt->getVA();
|
||||
writeValue<E>(Buf, GotPlt + 0x8000, 16, 16);
|
||||
writeValue<E>(Buf + 4, GotPlt, 16, 0);
|
||||
writeValue<E>(Buf + 8, GotPlt, 16, 0);
|
||||
|
||||
5
deps/lld/ELF/Arch/PPC.cpp
vendored
5
deps/lld/ELF/Arch/PPC.cpp
vendored
@ -29,6 +29,7 @@ public:
|
||||
} // namespace
|
||||
|
||||
PPC::PPC() {
|
||||
NoneRel = R_PPC_NONE;
|
||||
GotBaseSymOff = 0x8000;
|
||||
GotBaseSymInGotPlt = false;
|
||||
}
|
||||
@ -36,6 +37,7 @@ PPC::PPC() {
|
||||
RelExpr PPC::getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const {
|
||||
switch (Type) {
|
||||
case R_PPC_REL14:
|
||||
case R_PPC_REL24:
|
||||
case R_PPC_REL32:
|
||||
return R_PC;
|
||||
@ -61,6 +63,9 @@ void PPC::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
case R_PPC_REL32:
|
||||
write32be(Loc, Val);
|
||||
break;
|
||||
case R_PPC_REL14:
|
||||
write32be(Loc, read32be(Loc) | (Val & 0xFFFC));
|
||||
break;
|
||||
case R_PPC_PLTREL24:
|
||||
case R_PPC_REL24:
|
||||
write32be(Loc, read32be(Loc) | (Val & 0x3FFFFFC));
|
||||
|
||||
473
deps/lld/ELF/Arch/PPC64.cpp
vendored
473
deps/lld/ELF/Arch/PPC64.cpp
vendored
@ -23,12 +23,49 @@ using namespace lld::elf;
|
||||
static uint64_t PPC64TocOffset = 0x8000;
|
||||
static uint64_t DynamicThreadPointerOffset = 0x8000;
|
||||
|
||||
// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
|
||||
// instructions that can be used as part of the initial exec TLS sequence.
|
||||
enum XFormOpcd {
|
||||
LBZX = 87,
|
||||
LHZX = 279,
|
||||
LWZX = 23,
|
||||
LDX = 21,
|
||||
STBX = 215,
|
||||
STHX = 407,
|
||||
STWX = 151,
|
||||
STDX = 149,
|
||||
ADD = 266,
|
||||
};
|
||||
|
||||
enum DFormOpcd {
|
||||
LBZ = 34,
|
||||
LBZU = 35,
|
||||
LHZ = 40,
|
||||
LHZU = 41,
|
||||
LHAU = 43,
|
||||
LWZ = 32,
|
||||
LWZU = 33,
|
||||
LFSU = 49,
|
||||
LD = 58,
|
||||
LFDU = 51,
|
||||
STB = 38,
|
||||
STBU = 39,
|
||||
STH = 44,
|
||||
STHU = 45,
|
||||
STW = 36,
|
||||
STWU = 37,
|
||||
STFSU = 53,
|
||||
STFDU = 55,
|
||||
STD = 62,
|
||||
ADDI = 14
|
||||
};
|
||||
|
||||
uint64_t elf::getPPC64TocBase() {
|
||||
// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
|
||||
// TOC starts where the first of these sections starts. We always create a
|
||||
// .got when we see a relocation that uses it, so for us the start is always
|
||||
// the .got.
|
||||
uint64_t TocVA = InX::Got->getVA();
|
||||
uint64_t TocVA = In.Got->getVA();
|
||||
|
||||
// Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
|
||||
// thus permitting a full 64 Kbytes segment. Note that the glibc startup
|
||||
@ -37,6 +74,31 @@ uint64_t elf::getPPC64TocBase() {
|
||||
return TocVA + PPC64TocOffset;
|
||||
}
|
||||
|
||||
unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther) {
|
||||
// The offset is encoded into the 3 most significant bits of the st_other
|
||||
// field, with some special values described in section 3.4.1 of the ABI:
|
||||
// 0 --> Zero offset between the GEP and LEP, and the function does NOT use
|
||||
// the TOC pointer (r2). r2 will hold the same value on returning from
|
||||
// the function as it did on entering the function.
|
||||
// 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a
|
||||
// caller-saved register for all callers.
|
||||
// 2-6 --> The binary logarithm of the offset eg:
|
||||
// 2 --> 2^2 = 4 bytes --> 1 instruction.
|
||||
// 6 --> 2^6 = 64 bytes --> 16 instructions.
|
||||
// 7 --> Reserved.
|
||||
uint8_t GepToLep = (StOther >> 5) & 7;
|
||||
if (GepToLep < 2)
|
||||
return 0;
|
||||
|
||||
// The value encoded in the st_other bits is the
|
||||
// log-base-2(offset).
|
||||
if (GepToLep < 7)
|
||||
return 1 << GepToLep;
|
||||
|
||||
error("reserved value of 7 in the 3 most-significant-bits of st_other");
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace {
|
||||
class PPC64 final : public TargetInfo {
|
||||
public:
|
||||
@ -51,11 +113,16 @@ public:
|
||||
void writeGotHeader(uint8_t *Buf) const override;
|
||||
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
uint64_t BranchAddr, const Symbol &S) const override;
|
||||
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
|
||||
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
|
||||
RelExpr Expr) const override;
|
||||
void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
|
||||
bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
|
||||
uint8_t StOther) const override;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@ -71,8 +138,64 @@ static uint16_t highera(uint64_t V) { return (V + 0x8000) >> 32; }
|
||||
static uint16_t highest(uint64_t V) { return V >> 48; }
|
||||
static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; }
|
||||
|
||||
// Extracts the 'PO' field of an instruction encoding.
|
||||
static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); }
|
||||
|
||||
static bool isDQFormInstruction(uint32_t Encoding) {
|
||||
switch (getPrimaryOpCode(Encoding)) {
|
||||
default:
|
||||
return false;
|
||||
case 56:
|
||||
// The only instruction with a primary opcode of 56 is `lq`.
|
||||
return true;
|
||||
case 61:
|
||||
// There are both DS and DQ instruction forms with this primary opcode.
|
||||
// Namely `lxv` and `stxv` are the DQ-forms that use it.
|
||||
// The DS 'XO' bits being set to 01 is restricted to DQ form.
|
||||
return (Encoding & 3) == 0x1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isInstructionUpdateForm(uint32_t Encoding) {
|
||||
switch (getPrimaryOpCode(Encoding)) {
|
||||
default:
|
||||
return false;
|
||||
case LBZU:
|
||||
case LHAU:
|
||||
case LHZU:
|
||||
case LWZU:
|
||||
case LFSU:
|
||||
case LFDU:
|
||||
case STBU:
|
||||
case STHU:
|
||||
case STWU:
|
||||
case STFSU:
|
||||
case STFDU:
|
||||
return true;
|
||||
// LWA has the same opcode as LD, and the DS bits is what differentiates
|
||||
// between LD/LDU/LWA
|
||||
case LD:
|
||||
case STD:
|
||||
return (Encoding & 3) == 1;
|
||||
}
|
||||
}
|
||||
|
||||
// There are a number of places when we either want to read or write an
|
||||
// instruction when handling a half16 relocation type. On big-endian the buffer
|
||||
// pointer is pointing into the middle of the word we want to extract, and on
|
||||
// little-endian it is pointing to the start of the word. These 2 helpers are to
|
||||
// simplify reading and writing in that context.
|
||||
static void writeInstrFromHalf16(uint8_t *Loc, uint32_t Instr) {
|
||||
write32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0), Instr);
|
||||
}
|
||||
|
||||
static uint32_t readInstrFromHalf16(const uint8_t *Loc) {
|
||||
return read32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0));
|
||||
}
|
||||
|
||||
PPC64::PPC64() {
|
||||
GotRel = R_PPC64_GLOB_DAT;
|
||||
NoneRel = R_PPC64_NONE;
|
||||
PltRel = R_PPC64_JMP_SLOT;
|
||||
RelativeRel = R_PPC64_RELATIVE;
|
||||
IRelativeRel = R_PPC64_IRELATIVE;
|
||||
@ -85,14 +208,14 @@ PPC64::PPC64() {
|
||||
GotPltHeaderEntriesNum = 2;
|
||||
PltHeaderSize = 60;
|
||||
NeedsThunks = true;
|
||||
TcbSize = 8;
|
||||
TlsTpOffset = 0x7000;
|
||||
|
||||
TlsModuleIndexRel = R_PPC64_DTPMOD64;
|
||||
TlsOffsetRel = R_PPC64_DTPREL64;
|
||||
|
||||
TlsGotRel = R_PPC64_TPREL64;
|
||||
|
||||
NeedsMoreStackNonSplit = false;
|
||||
|
||||
// We need 64K pages (at least under glibc/Linux, the loader won't
|
||||
// set different permissions on a finer granularity than that).
|
||||
DefaultMaxPageSize = 65536;
|
||||
@ -107,8 +230,7 @@ PPC64::PPC64() {
|
||||
// use 0x10000000 as the starting address.
|
||||
DefaultImageBase = 0x10000000;
|
||||
|
||||
TrapInstr =
|
||||
(Config->IsLE == sys::IsLittleEndianHost) ? 0x7fe00008 : 0x0800e07f;
|
||||
write32(TrapInstr.data(), 0x7fe00008);
|
||||
}
|
||||
|
||||
static uint32_t getEFlags(InputFile *File) {
|
||||
@ -146,27 +268,29 @@ void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
// bl __tls_get_addr(x@tlsgd) into nop
|
||||
// nop into addi r3, r3, x@tprel@l
|
||||
|
||||
uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U;
|
||||
|
||||
switch (Type) {
|
||||
case R_PPC64_GOT_TLSGD16_HA:
|
||||
write32(Loc - EndianOffset, 0x60000000); // nop
|
||||
writeInstrFromHalf16(Loc, 0x60000000); // nop
|
||||
break;
|
||||
case R_PPC64_GOT_TLSGD16:
|
||||
case R_PPC64_GOT_TLSGD16_LO:
|
||||
write32(Loc - EndianOffset, 0x3c6d0000); // addis r3, r13
|
||||
writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13
|
||||
relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
|
||||
break;
|
||||
case R_PPC64_TLSGD:
|
||||
write32(Loc, 0x60000000); // nop
|
||||
write32(Loc + 4, 0x38630000); // addi r3, r3
|
||||
relocateOne(Loc + 4 + EndianOffset, R_PPC64_TPREL16_LO, Val);
|
||||
// Since we are relocating a half16 type relocation and Loc + 4 points to
|
||||
// the start of an instruction we need to advance the buffer by an extra
|
||||
// 2 bytes on BE.
|
||||
relocateOne(Loc + 4 + (Config->EKind == ELF64BEKind ? 2 : 0),
|
||||
R_PPC64_TPREL16_LO, Val);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
// Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.
|
||||
// The local dynamic code sequence for a global `x` will look like:
|
||||
@ -183,13 +307,12 @@ void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
// bl __tls_get_addr(x@tlsgd) into nop
|
||||
// nop into addi r3, r3, 4096
|
||||
|
||||
uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U;
|
||||
switch (Type) {
|
||||
case R_PPC64_GOT_TLSLD16_HA:
|
||||
write32(Loc - EndianOffset, 0x60000000); // nop
|
||||
writeInstrFromHalf16(Loc, 0x60000000); // nop
|
||||
break;
|
||||
case R_PPC64_GOT_TLSLD16_LO:
|
||||
write32(Loc - EndianOffset, 0x3c6d0000); // addis r3, r13, 0
|
||||
writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13, 0
|
||||
break;
|
||||
case R_PPC64_TLSLD:
|
||||
write32(Loc, 0x60000000); // nop
|
||||
@ -212,9 +335,90 @@ void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getDFormOp(unsigned SecondaryOp) {
|
||||
switch (SecondaryOp) {
|
||||
case LBZX:
|
||||
return LBZ;
|
||||
case LHZX:
|
||||
return LHZ;
|
||||
case LWZX:
|
||||
return LWZ;
|
||||
case LDX:
|
||||
return LD;
|
||||
case STBX:
|
||||
return STB;
|
||||
case STHX:
|
||||
return STH;
|
||||
case STWX:
|
||||
return STW;
|
||||
case STDX:
|
||||
return STD;
|
||||
case ADD:
|
||||
return ADDI;
|
||||
default:
|
||||
error("unrecognized instruction for IE to LE R_PPC64_TLS");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void PPC64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
// The initial exec code sequence for a global `x` will look like:
|
||||
// Instruction Relocation Symbol
|
||||
// addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x
|
||||
// ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x
|
||||
// add r9, r9, x@tls R_PPC64_TLS x
|
||||
|
||||
// Relaxing to local exec entails converting:
|
||||
// addis r9, r2, x@got@tprel@ha into nop
|
||||
// ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha
|
||||
// add r9, r9, x@tls into addi r9, r9, x@tprel@l
|
||||
|
||||
// x@tls R_PPC64_TLS is a relocation which does not compute anything,
|
||||
// it is replaced with r13 (thread pointer).
|
||||
|
||||
// The add instruction in the initial exec sequence has multiple variations
|
||||
// that need to be handled. If we are building an address it will use an add
|
||||
// instruction, if we are accessing memory it will use any of the X-form
|
||||
// indexed load or store instructions.
|
||||
|
||||
unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0;
|
||||
switch (Type) {
|
||||
case R_PPC64_GOT_TPREL16_HA:
|
||||
write32(Loc - Offset, 0x60000000); // nop
|
||||
break;
|
||||
case R_PPC64_GOT_TPREL16_LO_DS:
|
||||
case R_PPC64_GOT_TPREL16_DS: {
|
||||
uint32_t RegNo = read32(Loc - Offset) & 0x03E00000; // bits 6-10
|
||||
write32(Loc - Offset, 0x3C0D0000 | RegNo); // addis RegNo, r13
|
||||
relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
|
||||
break;
|
||||
}
|
||||
case R_PPC64_TLS: {
|
||||
uint32_t PrimaryOp = getPrimaryOpCode(read32(Loc));
|
||||
if (PrimaryOp != 31)
|
||||
error("unrecognized instruction for IE to LE R_PPC64_TLS");
|
||||
uint32_t SecondaryOp = (read32(Loc) & 0x000007FE) >> 1; // bits 21-30
|
||||
uint32_t DFormOp = getDFormOp(SecondaryOp);
|
||||
write32(Loc, ((DFormOp << 26) | (read32(Loc) & 0x03FFFFFF)));
|
||||
relocateOne(Loc + Offset, R_PPC64_TPREL16_LO, Val);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("unknown relocation for IE to LE");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const {
|
||||
switch (Type) {
|
||||
case R_PPC64_GOT16:
|
||||
case R_PPC64_GOT16_DS:
|
||||
case R_PPC64_GOT16_HA:
|
||||
case R_PPC64_GOT16_HI:
|
||||
case R_PPC64_GOT16_LO:
|
||||
case R_PPC64_GOT16_LO_DS:
|
||||
return R_GOT_OFF;
|
||||
case R_PPC64_TOC16:
|
||||
case R_PPC64_TOC16_DS:
|
||||
case R_PPC64_TOC16_HA:
|
||||
@ -224,6 +428,7 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
|
||||
return R_GOTREL;
|
||||
case R_PPC64_TOC:
|
||||
return R_PPC_TOC;
|
||||
case R_PPC64_REL14:
|
||||
case R_PPC64_REL24:
|
||||
return R_PPC_CALL_PLT;
|
||||
case R_PPC64_REL16_LO:
|
||||
@ -279,7 +484,7 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
|
||||
case R_PPC64_TLSLD:
|
||||
return R_TLSLD_HINT;
|
||||
case R_PPC64_TLS:
|
||||
return R_HINT;
|
||||
return R_TLSIE_HINT;
|
||||
default:
|
||||
return R_ABS;
|
||||
}
|
||||
@ -308,16 +513,16 @@ void PPC64::writePltHeader(uint8_t *Buf) const {
|
||||
// The 'bcl' instruction will set the link register to the address of the
|
||||
// following instruction ('mflr r11'). Here we store the offset from that
|
||||
// instruction to the first entry in the GotPlt section.
|
||||
int64_t GotPltOffset = InX::GotPlt->getVA() - (InX::Plt->getVA() + 8);
|
||||
int64_t GotPltOffset = In.GotPlt->getVA() - (In.Plt->getVA() + 8);
|
||||
write64(Buf + 52, GotPltOffset);
|
||||
}
|
||||
|
||||
void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
uint64_t PltEntryAddr, int32_t Index,
|
||||
unsigned RelOff) const {
|
||||
int32_t Offset = PltHeaderSize + Index * PltEntrySize;
|
||||
// bl __glink_PLTresolve
|
||||
write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc));
|
||||
int32_t Offset = PltHeaderSize + Index * PltEntrySize;
|
||||
// bl __glink_PLTresolve
|
||||
write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc));
|
||||
}
|
||||
|
||||
static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
|
||||
@ -328,30 +533,36 @@ static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
|
||||
|
||||
switch (Type) {
|
||||
// TOC biased relocation.
|
||||
case R_PPC64_GOT16:
|
||||
case R_PPC64_GOT_TLSGD16:
|
||||
case R_PPC64_GOT_TLSLD16:
|
||||
case R_PPC64_TOC16:
|
||||
return {R_PPC64_ADDR16, TocBiasedVal};
|
||||
case R_PPC64_GOT16_DS:
|
||||
case R_PPC64_TOC16_DS:
|
||||
case R_PPC64_GOT_TPREL16_DS:
|
||||
case R_PPC64_GOT_DTPREL16_DS:
|
||||
return {R_PPC64_ADDR16_DS, TocBiasedVal};
|
||||
case R_PPC64_GOT16_HA:
|
||||
case R_PPC64_GOT_TLSGD16_HA:
|
||||
case R_PPC64_GOT_TLSLD16_HA:
|
||||
case R_PPC64_GOT_TPREL16_HA:
|
||||
case R_PPC64_GOT_DTPREL16_HA:
|
||||
case R_PPC64_TOC16_HA:
|
||||
return {R_PPC64_ADDR16_HA, TocBiasedVal};
|
||||
case R_PPC64_GOT16_HI:
|
||||
case R_PPC64_GOT_TLSGD16_HI:
|
||||
case R_PPC64_GOT_TLSLD16_HI:
|
||||
case R_PPC64_GOT_TPREL16_HI:
|
||||
case R_PPC64_GOT_DTPREL16_HI:
|
||||
case R_PPC64_TOC16_HI:
|
||||
return {R_PPC64_ADDR16_HI, TocBiasedVal};
|
||||
case R_PPC64_GOT16_LO:
|
||||
case R_PPC64_GOT_TLSGD16_LO:
|
||||
case R_PPC64_GOT_TLSLD16_LO:
|
||||
case R_PPC64_TOC16_LO:
|
||||
return {R_PPC64_ADDR16_LO, TocBiasedVal};
|
||||
case R_PPC64_GOT16_LO_DS:
|
||||
case R_PPC64_TOC16_LO_DS:
|
||||
case R_PPC64_GOT_TPREL16_LO_DS:
|
||||
case R_PPC64_GOT_DTPREL16_LO_DS:
|
||||
@ -386,9 +597,27 @@ static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool isTocOptType(RelType Type) {
|
||||
switch (Type) {
|
||||
case R_PPC64_GOT16_HA:
|
||||
case R_PPC64_GOT16_LO_DS:
|
||||
case R_PPC64_TOC16_HA:
|
||||
case R_PPC64_TOC16_LO_DS:
|
||||
case R_PPC64_TOC16_LO:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
// For a TOC-relative relocation, proceed in terms of the corresponding
|
||||
// ADDR16 relocation type.
|
||||
// We need to save the original relocation type to use in diagnostics, and
|
||||
// use the original type to determine if we should toc-optimize the
|
||||
// instructions being relocated.
|
||||
RelType OriginalType = Type;
|
||||
bool ShouldTocOptimize = isTocOptType(Type);
|
||||
// For dynamic thread pointer relative, toc-relative, and got-indirect
|
||||
// relocations, proceed in terms of the corresponding ADDR16 relocation type.
|
||||
std::tie(Type, Val) = toAddr16Rel(Type, Val);
|
||||
|
||||
switch (Type) {
|
||||
@ -401,18 +630,25 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
}
|
||||
case R_PPC64_ADDR16:
|
||||
case R_PPC64_TPREL16:
|
||||
checkInt(Loc, Val, 16, Type);
|
||||
checkInt(Loc, Val, 16, OriginalType);
|
||||
write16(Loc, Val);
|
||||
break;
|
||||
case R_PPC64_ADDR16_DS:
|
||||
case R_PPC64_TPREL16_DS:
|
||||
checkInt(Loc, Val, 16, Type);
|
||||
write16(Loc, (read16(Loc) & 3) | (Val & ~3));
|
||||
break;
|
||||
case R_PPC64_TPREL16_DS: {
|
||||
checkInt(Loc, Val, 16, OriginalType);
|
||||
// DQ-form instructions use bits 28-31 as part of the instruction encoding
|
||||
// DS-form instructions only use bits 30-31.
|
||||
uint16_t Mask = isDQFormInstruction(readInstrFromHalf16(Loc)) ? 0xF : 0x3;
|
||||
checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
|
||||
write16(Loc, (read16(Loc) & Mask) | lo(Val));
|
||||
} break;
|
||||
case R_PPC64_ADDR16_HA:
|
||||
case R_PPC64_REL16_HA:
|
||||
case R_PPC64_TPREL16_HA:
|
||||
write16(Loc, ha(Val));
|
||||
if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0)
|
||||
writeInstrFromHalf16(Loc, 0x60000000);
|
||||
else
|
||||
write16(Loc, ha(Val));
|
||||
break;
|
||||
case R_PPC64_ADDR16_HI:
|
||||
case R_PPC64_REL16_HI:
|
||||
@ -438,12 +674,40 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
case R_PPC64_ADDR16_LO:
|
||||
case R_PPC64_REL16_LO:
|
||||
case R_PPC64_TPREL16_LO:
|
||||
// When the high-adjusted part of a toc relocation evalutes to 0, it is
|
||||
// changed into a nop. The lo part then needs to be updated to use the
|
||||
// toc-pointer register r2, as the base register.
|
||||
if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
|
||||
uint32_t Instr = readInstrFromHalf16(Loc);
|
||||
if (isInstructionUpdateForm(Instr))
|
||||
error(getErrorLocation(Loc) +
|
||||
"can't toc-optimize an update instruction: 0x" +
|
||||
utohexstr(Instr));
|
||||
Instr = (Instr & 0xFFE00000) | 0x00020000;
|
||||
writeInstrFromHalf16(Loc, Instr);
|
||||
}
|
||||
write16(Loc, lo(Val));
|
||||
break;
|
||||
case R_PPC64_ADDR16_LO_DS:
|
||||
case R_PPC64_TPREL16_LO_DS:
|
||||
write16(Loc, (read16(Loc) & 3) | (lo(Val) & ~3));
|
||||
break;
|
||||
case R_PPC64_TPREL16_LO_DS: {
|
||||
// DQ-form instructions use bits 28-31 as part of the instruction encoding
|
||||
// DS-form instructions only use bits 30-31.
|
||||
uint32_t Inst = readInstrFromHalf16(Loc);
|
||||
uint16_t Mask = isDQFormInstruction(Inst) ? 0xF : 0x3;
|
||||
checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
|
||||
if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
|
||||
// When the high-adjusted part of a toc relocation evalutes to 0, it is
|
||||
// changed into a nop. The lo part then needs to be updated to use the toc
|
||||
// pointer register r2, as the base register.
|
||||
if (isInstructionUpdateForm(Inst))
|
||||
error(getErrorLocation(Loc) +
|
||||
"Can't toc-optimize an update instruction: 0x" +
|
||||
Twine::utohexstr(Inst));
|
||||
Inst = (Inst & 0xFFE0000F) | 0x00020000;
|
||||
writeInstrFromHalf16(Loc, Inst);
|
||||
}
|
||||
write16(Loc, (read16(Loc) & Mask) | lo(Val));
|
||||
} break;
|
||||
case R_PPC64_ADDR32:
|
||||
case R_PPC64_REL32:
|
||||
checkInt(Loc, Val, 32, Type);
|
||||
@ -454,9 +718,17 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
case R_PPC64_TOC:
|
||||
write64(Loc, Val);
|
||||
break;
|
||||
case R_PPC64_REL14: {
|
||||
uint32_t Mask = 0x0000FFFC;
|
||||
checkInt(Loc, Val, 16, Type);
|
||||
checkAlignment(Loc, Val, 4, Type);
|
||||
write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
|
||||
break;
|
||||
}
|
||||
case R_PPC64_REL24: {
|
||||
uint32_t Mask = 0x03FFFFFC;
|
||||
checkInt(Loc, Val, 24, Type);
|
||||
checkInt(Loc, Val, 26, Type);
|
||||
checkAlignment(Loc, Val, 4, Type);
|
||||
write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
|
||||
break;
|
||||
}
|
||||
@ -470,9 +742,30 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
|
||||
bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
|
||||
uint64_t BranchAddr, const Symbol &S) const {
|
||||
// If a function is in the plt it needs to be called through
|
||||
// a call stub.
|
||||
return Type == R_PPC64_REL24 && S.isInPlt();
|
||||
if (Type != R_PPC64_REL14 && Type != R_PPC64_REL24)
|
||||
return false;
|
||||
|
||||
// If a function is in the Plt it needs to be called with a call-stub.
|
||||
if (S.isInPlt())
|
||||
return true;
|
||||
|
||||
// If a symbol is a weak undefined and we are compiling an executable
|
||||
// it doesn't need a range-extending thunk since it can't be called.
|
||||
if (S.isUndefWeak() && !Config->Shared)
|
||||
return false;
|
||||
|
||||
// If the offset exceeds the range of the branch type then it will need
|
||||
// a range-extending thunk.
|
||||
return !inBranchRange(Type, BranchAddr, S.getVA());
|
||||
}
|
||||
|
||||
bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
|
||||
int64_t Offset = Dst - Src;
|
||||
if (Type == R_PPC64_REL14)
|
||||
return isInt<16>(Offset);
|
||||
if (Type == R_PPC64_REL24)
|
||||
return isInt<26>(Offset);
|
||||
llvm_unreachable("unsupported relocation type used in branch");
|
||||
}
|
||||
|
||||
RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
|
||||
@ -511,9 +804,8 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
case R_PPC64_GOT_TLSGD16_LO: {
|
||||
// Relax from addi r3, rA, sym@got@tlsgd@l to
|
||||
// ld r3, sym@got@tprel@l(rA)
|
||||
uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U;
|
||||
uint32_t InputRegister = (read32(Loc - EndianOffset) & (0x1f << 16));
|
||||
write32(Loc - EndianOffset, 0xE8600000 | InputRegister);
|
||||
uint32_t InputRegister = (readInstrFromHalf16(Loc) & (0x1f << 16));
|
||||
writeInstrFromHalf16(Loc, 0xE8600000 | InputRegister);
|
||||
relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val);
|
||||
return;
|
||||
}
|
||||
@ -526,6 +818,113 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
|
||||
}
|
||||
}
|
||||
|
||||
// The prologue for a split-stack function is expected to look roughly
|
||||
// like this:
|
||||
// .Lglobal_entry_point:
|
||||
// # TOC pointer initalization.
|
||||
// ...
|
||||
// .Llocal_entry_point:
|
||||
// # load the __private_ss member of the threads tcbhead.
|
||||
// ld r0,-0x7000-64(r13)
|
||||
// # subtract the functions stack size from the stack pointer.
|
||||
// addis r12, r1, ha(-stack-frame size)
|
||||
// addi r12, r12, l(-stack-frame size)
|
||||
// # compare needed to actual and branch to allocate_more_stack if more
|
||||
// # space is needed, otherwise fallthrough to 'normal' function body.
|
||||
// cmpld cr7,r12,r0
|
||||
// blt- cr7, .Lallocate_more_stack
|
||||
//
|
||||
// -) The allocate_more_stack block might be placed after the split-stack
|
||||
// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
|
||||
// instead.
|
||||
// -) If either the addis or addi is not needed due to the stack size being
|
||||
// smaller then 32K or a multiple of 64K they will be replaced with a nop,
|
||||
// but there will always be 2 instructions the linker can overwrite for the
|
||||
// adjusted stack size.
|
||||
//
|
||||
// The linkers job here is to increase the stack size used in the addis/addi
|
||||
// pair by split-stack-size-adjust.
|
||||
// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
|
||||
// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
|
||||
bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
|
||||
uint8_t StOther) const {
|
||||
// If the caller has a global entry point adjust the buffer past it. The start
|
||||
// of the split-stack prologue will be at the local entry point.
|
||||
Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther);
|
||||
|
||||
// At the very least we expect to see a load of some split-stack data from the
|
||||
// tcb, and 2 instructions that calculate the ending stack address this
|
||||
// function will require. If there is not enough room for at least 3
|
||||
// instructions it can't be a split-stack prologue.
|
||||
if (Loc + 12 >= End)
|
||||
return false;
|
||||
|
||||
// First instruction must be `ld r0, -0x7000-64(r13)`
|
||||
if (read32(Loc) != 0xe80d8fc0)
|
||||
return false;
|
||||
|
||||
int16_t HiImm = 0;
|
||||
int16_t LoImm = 0;
|
||||
// First instruction can be either an addis if the frame size is larger then
|
||||
// 32K, or an addi if the size is less then 32K.
|
||||
int32_t FirstInstr = read32(Loc + 4);
|
||||
if (getPrimaryOpCode(FirstInstr) == 15) {
|
||||
HiImm = FirstInstr & 0xFFFF;
|
||||
} else if (getPrimaryOpCode(FirstInstr) == 14) {
|
||||
LoImm = FirstInstr & 0xFFFF;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Second instruction is either an addi or a nop. If the first instruction was
|
||||
// an addi then LoImm is set and the second instruction must be a nop.
|
||||
uint32_t SecondInstr = read32(Loc + 8);
|
||||
if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) {
|
||||
LoImm = SecondInstr & 0xFFFF;
|
||||
} else if (SecondInstr != 0x60000000) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The register operands of the first instruction should be the stack-pointer
|
||||
// (r1) as the input (RA) and r12 as the output (RT). If the second
|
||||
// instruction is not a nop, then it should use r12 as both input and output.
|
||||
auto CheckRegOperands = [](uint32_t Instr, uint8_t ExpectedRT,
|
||||
uint8_t ExpectedRA) {
|
||||
return ((Instr & 0x3E00000) >> 21 == ExpectedRT) &&
|
||||
((Instr & 0x1F0000) >> 16 == ExpectedRA);
|
||||
};
|
||||
if (!CheckRegOperands(FirstInstr, 12, 1))
|
||||
return false;
|
||||
if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12))
|
||||
return false;
|
||||
|
||||
int32_t StackFrameSize = (HiImm * 65536) + LoImm;
|
||||
// Check that the adjusted size doesn't overflow what we can represent with 2
|
||||
// instructions.
|
||||
if (StackFrameSize < Config->SplitStackAdjustSize + INT32_MIN) {
|
||||
error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows");
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t AdjustedStackFrameSize =
|
||||
StackFrameSize - Config->SplitStackAdjustSize;
|
||||
|
||||
LoImm = AdjustedStackFrameSize & 0xFFFF;
|
||||
HiImm = (AdjustedStackFrameSize + 0x8000) >> 16;
|
||||
if (HiImm) {
|
||||
write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm);
|
||||
// If the low immediate is zero the second instruction will be a nop.
|
||||
SecondInstr = LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000;
|
||||
write32(Loc + 8, SecondInstr);
|
||||
} else {
|
||||
// addi r12, r1, imm
|
||||
write32(Loc + 4, (0x39810000) | (uint16_t)LoImm);
|
||||
write32(Loc + 8, 0x60000000);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
TargetInfo *elf::getPPC64TargetInfo() {
|
||||
static PPC64 Target;
|
||||
return &Target;
|
||||
|
||||
279
deps/lld/ELF/Arch/RISCV.cpp
vendored
Normal file
279
deps/lld/ELF/Arch/RISCV.cpp
vendored
Normal file
@ -0,0 +1,279 @@
|
||||
//===- RISCV.cpp ----------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Linker
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "InputFiles.h"
|
||||
#include "Target.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::object;
|
||||
using namespace llvm::support::endian;
|
||||
using namespace llvm::ELF;
|
||||
using namespace lld;
|
||||
using namespace lld::elf;
|
||||
|
||||
namespace {
|
||||
|
||||
class RISCV final : public TargetInfo {
|
||||
public:
|
||||
RISCV();
|
||||
uint32_t calcEFlags() const override;
|
||||
RelExpr getRelExpr(RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const override;
|
||||
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
RISCV::RISCV() { NoneRel = R_RISCV_NONE; }
|
||||
|
||||
static uint32_t getEFlags(InputFile *F) {
|
||||
if (Config->Is64)
|
||||
return cast<ObjFile<ELF64LE>>(F)->getObj().getHeader()->e_flags;
|
||||
return cast<ObjFile<ELF32LE>>(F)->getObj().getHeader()->e_flags;
|
||||
}
|
||||
|
||||
uint32_t RISCV::calcEFlags() const {
|
||||
assert(!ObjectFiles.empty());
|
||||
|
||||
uint32_t Target = getEFlags(ObjectFiles.front());
|
||||
|
||||
for (InputFile *F : ObjectFiles) {
|
||||
uint32_t EFlags = getEFlags(F);
|
||||
if (EFlags & EF_RISCV_RVC)
|
||||
Target |= EF_RISCV_RVC;
|
||||
|
||||
if ((EFlags & EF_RISCV_FLOAT_ABI) != (Target & EF_RISCV_FLOAT_ABI))
|
||||
error(toString(F) +
|
||||
": cannot link object files with different floating-point ABI");
|
||||
|
||||
if ((EFlags & EF_RISCV_RVE) != (Target & EF_RISCV_RVE))
|
||||
error(toString(F) +
|
||||
": cannot link object files with different EF_RISCV_RVE");
|
||||
}
|
||||
|
||||
return Target;
|
||||
}
|
||||
|
||||
RelExpr RISCV::getRelExpr(const RelType Type, const Symbol &S,
|
||||
const uint8_t *Loc) const {
|
||||
switch (Type) {
|
||||
case R_RISCV_JAL:
|
||||
case R_RISCV_BRANCH:
|
||||
case R_RISCV_CALL:
|
||||
case R_RISCV_PCREL_HI20:
|
||||
case R_RISCV_RVC_BRANCH:
|
||||
case R_RISCV_RVC_JUMP:
|
||||
case R_RISCV_32_PCREL:
|
||||
return R_PC;
|
||||
case R_RISCV_PCREL_LO12_I:
|
||||
case R_RISCV_PCREL_LO12_S:
|
||||
return R_RISCV_PC_INDIRECT;
|
||||
case R_RISCV_RELAX:
|
||||
case R_RISCV_ALIGN:
|
||||
return R_HINT;
|
||||
default:
|
||||
return R_ABS;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63.
|
||||
static uint32_t extractBits(uint64_t V, uint32_t Begin, uint32_t End) {
|
||||
return (V & ((1ULL << (Begin + 1)) - 1)) >> End;
|
||||
}
|
||||
|
||||
void RISCV::relocateOne(uint8_t *Loc, const RelType Type,
|
||||
const uint64_t Val) const {
|
||||
switch (Type) {
|
||||
case R_RISCV_32:
|
||||
write32le(Loc, Val);
|
||||
return;
|
||||
case R_RISCV_64:
|
||||
write64le(Loc, Val);
|
||||
return;
|
||||
|
||||
case R_RISCV_RVC_BRANCH: {
|
||||
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 8, Type);
|
||||
checkAlignment(Loc, Val, 2, Type);
|
||||
uint16_t Insn = read16le(Loc) & 0xE383;
|
||||
uint16_t Imm8 = extractBits(Val, 8, 8) << 12;
|
||||
uint16_t Imm4_3 = extractBits(Val, 4, 3) << 10;
|
||||
uint16_t Imm7_6 = extractBits(Val, 7, 6) << 5;
|
||||
uint16_t Imm2_1 = extractBits(Val, 2, 1) << 3;
|
||||
uint16_t Imm5 = extractBits(Val, 5, 5) << 2;
|
||||
Insn |= Imm8 | Imm4_3 | Imm7_6 | Imm2_1 | Imm5;
|
||||
|
||||
write16le(Loc, Insn);
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_RVC_JUMP: {
|
||||
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 11, Type);
|
||||
checkAlignment(Loc, Val, 2, Type);
|
||||
uint16_t Insn = read16le(Loc) & 0xE003;
|
||||
uint16_t Imm11 = extractBits(Val, 11, 11) << 12;
|
||||
uint16_t Imm4 = extractBits(Val, 4, 4) << 11;
|
||||
uint16_t Imm9_8 = extractBits(Val, 9, 8) << 9;
|
||||
uint16_t Imm10 = extractBits(Val, 10, 10) << 8;
|
||||
uint16_t Imm6 = extractBits(Val, 6, 6) << 7;
|
||||
uint16_t Imm7 = extractBits(Val, 7, 7) << 6;
|
||||
uint16_t Imm3_1 = extractBits(Val, 3, 1) << 3;
|
||||
uint16_t Imm5 = extractBits(Val, 5, 5) << 2;
|
||||
Insn |= Imm11 | Imm4 | Imm9_8 | Imm10 | Imm6 | Imm7 | Imm3_1 | Imm5;
|
||||
|
||||
write16le(Loc, Insn);
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_RVC_LUI: {
|
||||
int32_t Imm = ((Val + 0x800) >> 12);
|
||||
checkUInt(Loc, Imm, 6, Type);
|
||||
if (Imm == 0) { // `c.lui rd, 0` is illegal, convert to `c.li rd, 0`
|
||||
write16le(Loc, (read16le(Loc) & 0x0F83) | 0x4000);
|
||||
} else {
|
||||
uint16_t Imm17 = extractBits(Val + 0x800, 17, 17) << 12;
|
||||
uint16_t Imm16_12 = extractBits(Val + 0x800, 16, 12) << 2;
|
||||
write16le(Loc, (read16le(Loc) & 0xEF83) | Imm17 | Imm16_12);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_JAL: {
|
||||
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 20, Type);
|
||||
checkAlignment(Loc, Val, 2, Type);
|
||||
|
||||
uint32_t Insn = read32le(Loc) & 0xFFF;
|
||||
uint32_t Imm20 = extractBits(Val, 20, 20) << 31;
|
||||
uint32_t Imm10_1 = extractBits(Val, 10, 1) << 21;
|
||||
uint32_t Imm11 = extractBits(Val, 11, 11) << 20;
|
||||
uint32_t Imm19_12 = extractBits(Val, 19, 12) << 12;
|
||||
Insn |= Imm20 | Imm10_1 | Imm11 | Imm19_12;
|
||||
|
||||
write32le(Loc, Insn);
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_BRANCH: {
|
||||
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 12, Type);
|
||||
checkAlignment(Loc, Val, 2, Type);
|
||||
|
||||
uint32_t Insn = read32le(Loc) & 0x1FFF07F;
|
||||
uint32_t Imm12 = extractBits(Val, 12, 12) << 31;
|
||||
uint32_t Imm10_5 = extractBits(Val, 10, 5) << 25;
|
||||
uint32_t Imm4_1 = extractBits(Val, 4, 1) << 8;
|
||||
uint32_t Imm11 = extractBits(Val, 11, 11) << 7;
|
||||
Insn |= Imm12 | Imm10_5 | Imm4_1 | Imm11;
|
||||
|
||||
write32le(Loc, Insn);
|
||||
return;
|
||||
}
|
||||
|
||||
// auipc + jalr pair
|
||||
case R_RISCV_CALL: {
|
||||
checkInt(Loc, Val, 32, Type);
|
||||
if (isInt<32>(Val)) {
|
||||
relocateOne(Loc, R_RISCV_PCREL_HI20, Val);
|
||||
relocateOne(Loc + 4, R_RISCV_PCREL_LO12_I, Val);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_PCREL_HI20:
|
||||
case R_RISCV_HI20: {
|
||||
checkInt(Loc, Val, 32, Type);
|
||||
uint32_t Hi = Val + 0x800;
|
||||
write32le(Loc, (read32le(Loc) & 0xFFF) | (Hi & 0xFFFFF000));
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_PCREL_LO12_I:
|
||||
case R_RISCV_LO12_I: {
|
||||
checkInt(Loc, Val, 32, Type);
|
||||
uint32_t Hi = Val + 0x800;
|
||||
uint32_t Lo = Val - (Hi & 0xFFFFF000);
|
||||
write32le(Loc, (read32le(Loc) & 0xFFFFF) | ((Lo & 0xFFF) << 20));
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_PCREL_LO12_S:
|
||||
case R_RISCV_LO12_S: {
|
||||
checkInt(Loc, Val, 32, Type);
|
||||
uint32_t Hi = Val + 0x800;
|
||||
uint32_t Lo = Val - (Hi & 0xFFFFF000);
|
||||
uint32_t Imm11_5 = extractBits(Lo, 11, 5) << 25;
|
||||
uint32_t Imm4_0 = extractBits(Lo, 4, 0) << 7;
|
||||
write32le(Loc, (read32le(Loc) & 0x1FFF07F) | Imm11_5 | Imm4_0);
|
||||
return;
|
||||
}
|
||||
|
||||
case R_RISCV_ADD8:
|
||||
*Loc += Val;
|
||||
return;
|
||||
case R_RISCV_ADD16:
|
||||
write16le(Loc, read16le(Loc) + Val);
|
||||
return;
|
||||
case R_RISCV_ADD32:
|
||||
write32le(Loc, read32le(Loc) + Val);
|
||||
return;
|
||||
case R_RISCV_ADD64:
|
||||
write64le(Loc, read64le(Loc) + Val);
|
||||
return;
|
||||
case R_RISCV_SUB6:
|
||||
*Loc = (*Loc & 0xc0) | (((*Loc & 0x3f) - Val) & 0x3f);
|
||||
return;
|
||||
case R_RISCV_SUB8:
|
||||
*Loc -= Val;
|
||||
return;
|
||||
case R_RISCV_SUB16:
|
||||
write16le(Loc, read16le(Loc) - Val);
|
||||
return;
|
||||
case R_RISCV_SUB32:
|
||||
write32le(Loc, read32le(Loc) - Val);
|
||||
return;
|
||||
case R_RISCV_SUB64:
|
||||
write64le(Loc, read64le(Loc) - Val);
|
||||
return;
|
||||
case R_RISCV_SET6:
|
||||
*Loc = (*Loc & 0xc0) | (Val & 0x3f);
|
||||
return;
|
||||
case R_RISCV_SET8:
|
||||
*Loc = Val;
|
||||
return;
|
||||
case R_RISCV_SET16:
|
||||
write16le(Loc, Val);
|
||||
return;
|
||||
case R_RISCV_SET32:
|
||||
case R_RISCV_32_PCREL:
|
||||
write32le(Loc, Val);
|
||||
return;
|
||||
|
||||
case R_RISCV_ALIGN:
|
||||
case R_RISCV_RELAX:
|
||||
return; // Ignored (for now)
|
||||
case R_RISCV_NONE:
|
||||
return; // Do nothing
|
||||
|
||||
// These are handled by the dynamic linker
|
||||
case R_RISCV_RELATIVE:
|
||||
case R_RISCV_COPY:
|
||||
case R_RISCV_JUMP_SLOT:
|
||||
// GP-relative relocations are only produced after relaxation, which
|
||||
// we don't support for now
|
||||
case R_RISCV_GPREL_I:
|
||||
case R_RISCV_GPREL_S:
|
||||
default:
|
||||
error(getErrorLocation(Loc) +
|
||||
"unimplemented relocation: " + toString(Type));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
TargetInfo *elf::getRISCVTargetInfo() {
|
||||
static RISCV Target;
|
||||
return &Target;
|
||||
}
|
||||
1
deps/lld/ELF/Arch/SPARCV9.cpp
vendored
1
deps/lld/ELF/Arch/SPARCV9.cpp
vendored
@ -35,6 +35,7 @@ public:
|
||||
SPARCV9::SPARCV9() {
|
||||
CopyRel = R_SPARC_COPY;
|
||||
GotRel = R_SPARC_GLOB_DAT;
|
||||
NoneRel = R_SPARC_NONE;
|
||||
PltRel = R_SPARC_JMP_SLOT;
|
||||
RelativeRel = R_SPARC_RELATIVE;
|
||||
GotEntrySize = 8;
|
||||
|
||||
25
deps/lld/ELF/Arch/X86.cpp
vendored
25
deps/lld/ELF/Arch/X86.cpp
vendored
@ -48,6 +48,7 @@ public:
|
||||
X86::X86() {
|
||||
CopyRel = R_386_COPY;
|
||||
GotRel = R_386_GLOB_DAT;
|
||||
NoneRel = R_386_NONE;
|
||||
PltRel = R_386_JUMP_SLOT;
|
||||
IRelativeRel = R_386_IRELATIVE;
|
||||
RelativeRel = R_386_RELATIVE;
|
||||
@ -59,7 +60,11 @@ X86::X86() {
|
||||
PltEntrySize = 16;
|
||||
PltHeaderSize = 16;
|
||||
TlsGdRelaxSkip = 2;
|
||||
TrapInstr = 0xcccccccc; // 0xcc = INT3
|
||||
TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
|
||||
|
||||
// Align to the non-PAE large page size (known as a superpage or huge page).
|
||||
// FreeBSD automatically promotes large, superpage-aligned allocations.
|
||||
DefaultImageBase = 0x400000;
|
||||
}
|
||||
|
||||
static bool hasBaseReg(uint8_t ModRM) { return (ModRM & 0xc7) != 0x5; }
|
||||
@ -152,7 +157,7 @@ RelExpr X86::adjustRelaxExpr(RelType Type, const uint8_t *Data,
|
||||
}
|
||||
|
||||
void X86::writeGotPltHeader(uint8_t *Buf) const {
|
||||
write32le(Buf, InX::Dynamic->getVA());
|
||||
write32le(Buf, In.Dynamic->getVA());
|
||||
}
|
||||
|
||||
void X86::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
|
||||
@ -183,8 +188,8 @@ void X86::writePltHeader(uint8_t *Buf) const {
|
||||
};
|
||||
memcpy(Buf, V, sizeof(V));
|
||||
|
||||
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
|
||||
uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
|
||||
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
|
||||
uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
|
||||
write32le(Buf + 2, GotPlt + 4);
|
||||
write32le(Buf + 8, GotPlt + 8);
|
||||
return;
|
||||
@ -196,7 +201,7 @@ void X86::writePltHeader(uint8_t *Buf) const {
|
||||
0x90, 0x90, 0x90, 0x90, // nop
|
||||
};
|
||||
memcpy(Buf, PltData, sizeof(PltData));
|
||||
uint32_t GotPlt = InX::GotPlt->getVA();
|
||||
uint32_t GotPlt = In.GotPlt->getVA();
|
||||
write32le(Buf + 2, GotPlt + 4);
|
||||
write32le(Buf + 8, GotPlt + 8);
|
||||
}
|
||||
@ -213,7 +218,7 @@ void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
|
||||
if (Config->Pic) {
|
||||
// jmp *foo@GOT(%ebx)
|
||||
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
|
||||
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
|
||||
Buf[1] = 0xa3;
|
||||
write32le(Buf + 2, GotPltEntryAddr - Ebx);
|
||||
} else {
|
||||
@ -447,8 +452,8 @@ void RetpolinePic::writePltHeader(uint8_t *Buf) const {
|
||||
};
|
||||
memcpy(Buf, Insn, sizeof(Insn));
|
||||
|
||||
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
|
||||
uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
|
||||
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
|
||||
uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
|
||||
write32le(Buf + 2, GotPlt + 4);
|
||||
write32le(Buf + 9, GotPlt + 8);
|
||||
}
|
||||
@ -467,7 +472,7 @@ void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
};
|
||||
memcpy(Buf, Insn, sizeof(Insn));
|
||||
|
||||
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
|
||||
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
|
||||
unsigned Off = getPltEntryOffset(Index);
|
||||
write32le(Buf + 3, GotPltEntryAddr - Ebx);
|
||||
write32le(Buf + 8, -Off - 12 + 32);
|
||||
@ -506,7 +511,7 @@ void RetpolineNoPic::writePltHeader(uint8_t *Buf) const {
|
||||
};
|
||||
memcpy(Buf, Insn, sizeof(Insn));
|
||||
|
||||
uint32_t GotPlt = InX::GotPlt->getVA();
|
||||
uint32_t GotPlt = In.GotPlt->getVA();
|
||||
write32le(Buf + 2, GotPlt + 4);
|
||||
write32le(Buf + 8, GotPlt + 8);
|
||||
}
|
||||
|
||||
91
deps/lld/ELF/Arch/X86_64.cpp
vendored
91
deps/lld/ELF/Arch/X86_64.cpp
vendored
@ -43,8 +43,8 @@ public:
|
||||
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
|
||||
bool adjustPrologueForCrossSplitStack(uint8_t *Loc,
|
||||
uint8_t *End) const override;
|
||||
bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
|
||||
uint8_t StOther) const override;
|
||||
|
||||
private:
|
||||
void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
|
||||
@ -55,6 +55,7 @@ private:
|
||||
template <class ELFT> X86_64<ELFT>::X86_64() {
|
||||
CopyRel = R_X86_64_COPY;
|
||||
GotRel = R_X86_64_GLOB_DAT;
|
||||
NoneRel = R_X86_64_NONE;
|
||||
PltRel = R_X86_64_JUMP_SLOT;
|
||||
RelativeRel = R_X86_64_RELATIVE;
|
||||
IRelativeRel = R_X86_64_IRELATIVE;
|
||||
@ -66,7 +67,7 @@ template <class ELFT> X86_64<ELFT>::X86_64() {
|
||||
PltEntrySize = 16;
|
||||
PltHeaderSize = 16;
|
||||
TlsGdRelaxSkip = 2;
|
||||
TrapInstr = 0xcccccccc; // 0xcc = INT3
|
||||
TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
|
||||
|
||||
// Align to the large page size (known as a superpage or huge page).
|
||||
// FreeBSD automatically promotes large, superpage-aligned allocations.
|
||||
@ -124,7 +125,7 @@ template <class ELFT> void X86_64<ELFT>::writeGotPltHeader(uint8_t *Buf) const {
|
||||
// required, but it is documented in the psabi and the glibc dynamic linker
|
||||
// seems to use it (note that this is relevant for linking ld.so, not any
|
||||
// other program).
|
||||
write64le(Buf, InX::Dynamic->getVA());
|
||||
write64le(Buf, In.Dynamic->getVA());
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
@ -140,8 +141,8 @@ template <class ELFT> void X86_64<ELFT>::writePltHeader(uint8_t *Buf) const {
|
||||
0x0f, 0x1f, 0x40, 0x00, // nop
|
||||
};
|
||||
memcpy(Buf, PltData, sizeof(PltData));
|
||||
uint64_t GotPlt = InX::GotPlt->getVA();
|
||||
uint64_t Plt = InX::Plt->getVA();
|
||||
uint64_t GotPlt = In.GotPlt->getVA();
|
||||
uint64_t Plt = In.Plt->getVA();
|
||||
write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8
|
||||
write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16
|
||||
}
|
||||
@ -263,15 +264,6 @@ void X86_64<ELFT>::relaxTlsIeToLe(uint8_t *Loc, RelType Type,
|
||||
template <class ELFT>
|
||||
void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
|
||||
uint64_t Val) const {
|
||||
// Convert
|
||||
// leaq bar@tlsld(%rip), %rdi
|
||||
// callq __tls_get_addr@PLT
|
||||
// leaq bar@dtpoff(%rax), %rcx
|
||||
// to
|
||||
// .word 0x6666
|
||||
// .byte 0x66
|
||||
// mov %fs:0,%rax
|
||||
// leaq bar@tpoff(%rax), %rcx
|
||||
if (Type == R_X86_64_DTPOFF64) {
|
||||
write64le(Loc, Val);
|
||||
return;
|
||||
@ -286,7 +278,37 @@ void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
|
||||
0x66, // .byte 0x66
|
||||
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
|
||||
};
|
||||
memcpy(Loc - 3, Inst, sizeof(Inst));
|
||||
|
||||
if (Loc[4] == 0xe8) {
|
||||
// Convert
|
||||
// leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc>
|
||||
// callq __tls_get_addr@PLT # e8 <disp32>
|
||||
// leaq bar@dtpoff(%rax), %rcx
|
||||
// to
|
||||
// .word 0x6666
|
||||
// .byte 0x66
|
||||
// mov %fs:0,%rax
|
||||
// leaq bar@tpoff(%rax), %rcx
|
||||
memcpy(Loc - 3, Inst, sizeof(Inst));
|
||||
return;
|
||||
}
|
||||
|
||||
if (Loc[4] == 0xff && Loc[5] == 0x15) {
|
||||
// Convert
|
||||
// leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc>
|
||||
// call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32>
|
||||
// to
|
||||
// .long 0x66666666
|
||||
// movq %fs:0,%rax
|
||||
// See "Table 11.9: LD -> LE Code Transition (LP64)" in
|
||||
// https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
|
||||
Loc[-3] = 0x66;
|
||||
memcpy(Loc - 2, Inst, sizeof(Inst));
|
||||
return;
|
||||
}
|
||||
|
||||
error(getErrorLocation(Loc - 3) +
|
||||
"expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
@ -481,23 +503,27 @@ namespace {
|
||||
// B) Or a load of a stack pointer offset with an lea to r10 or r11.
|
||||
template <>
|
||||
bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
|
||||
uint8_t *End) const {
|
||||
uint8_t *End,
|
||||
uint8_t StOther) const {
|
||||
if (Loc + 8 >= End)
|
||||
return false;
|
||||
|
||||
// Replace "cmp %fs:0x70,%rsp" and subsequent branch
|
||||
// with "stc, nopl 0x0(%rax,%rax,1)"
|
||||
if (Loc + 8 < End && memcmp(Loc, "\x64\x48\x3b\x24\x25", 4) == 0) {
|
||||
if (memcmp(Loc, "\x64\x48\x3b\x24\x25", 5) == 0) {
|
||||
memcpy(Loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Adjust "lea -0x200(%rsp),%r10" to lea "-0x4200(%rsp),%r10"
|
||||
if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x94\x24\x00\xfe\xff", 7) == 0) {
|
||||
memcpy(Loc, "\x4c\x8d\x94\x24\x00\xbe\xff", 7);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Adjust "lea -0x200(%rsp),%r11" to lea "-0x4200(%rsp),%r11"
|
||||
if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x9c\x24\x00\xfe\xff", 7) == 0) {
|
||||
memcpy(Loc, "\x4c\x8d\x9c\x24\x00\xbe\xff", 7);
|
||||
// Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
|
||||
// be r10 or r11. The lea instruction feeds a subsequent compare which checks
|
||||
// if there is X available stack space. Making X larger effectively reserves
|
||||
// that much additional space. The stack grows downward so subtract the value.
|
||||
if (memcmp(Loc, "\x4c\x8d\x94\x24", 4) == 0 ||
|
||||
memcmp(Loc, "\x4c\x8d\x9c\x24", 4) == 0) {
|
||||
// The offset bytes are encoded four bytes after the start of the
|
||||
// instruction.
|
||||
write32le(Loc + 4, read32le(Loc + 4) - 0x4000);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -505,7 +531,8 @@ bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
|
||||
|
||||
template <>
|
||||
bool X86_64<ELF32LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
|
||||
uint8_t *End) const {
|
||||
uint8_t *End,
|
||||
uint8_t StOther) const {
|
||||
llvm_unreachable("Target doesn't support split stacks.");
|
||||
}
|
||||
|
||||
@ -566,8 +593,8 @@ template <class ELFT> void Retpoline<ELFT>::writePltHeader(uint8_t *Buf) const {
|
||||
};
|
||||
memcpy(Buf, Insn, sizeof(Insn));
|
||||
|
||||
uint64_t GotPlt = InX::GotPlt->getVA();
|
||||
uint64_t Plt = InX::Plt->getVA();
|
||||
uint64_t GotPlt = In.GotPlt->getVA();
|
||||
uint64_t Plt = In.Plt->getVA();
|
||||
write32le(Buf + 2, GotPlt - Plt - 6 + 8);
|
||||
write32le(Buf + 9, GotPlt - Plt - 13 + 16);
|
||||
}
|
||||
@ -586,7 +613,7 @@ void Retpoline<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
};
|
||||
memcpy(Buf, Insn, sizeof(Insn));
|
||||
|
||||
uint64_t Off = TargetInfo::getPltEntryOffset(Index);
|
||||
uint64_t Off = getPltEntryOffset(Index);
|
||||
|
||||
write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
|
||||
write32le(Buf + 8, -Off - 12 + 32);
|
||||
@ -629,7 +656,7 @@ void RetpolineZNow<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
memcpy(Buf, Insn, sizeof(Insn));
|
||||
|
||||
write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
|
||||
write32le(Buf + 8, -TargetInfo::getPltEntryOffset(Index) - 12);
|
||||
write32le(Buf + 8, -getPltEntryOffset(Index) - 12);
|
||||
}
|
||||
|
||||
template <class ELFT> static TargetInfo *getTargetInfo() {
|
||||
|
||||
4
deps/lld/ELF/CMakeLists.txt
vendored
4
deps/lld/ELF/CMakeLists.txt
vendored
@ -15,17 +15,19 @@ add_lld_library(lldELF
|
||||
Arch/Hexagon.cpp
|
||||
Arch/Mips.cpp
|
||||
Arch/MipsArchTree.cpp
|
||||
Arch/MSP430.cpp
|
||||
Arch/PPC.cpp
|
||||
Arch/PPC64.cpp
|
||||
Arch/RISCV.cpp
|
||||
Arch/SPARCV9.cpp
|
||||
Arch/X86.cpp
|
||||
Arch/X86_64.cpp
|
||||
CallGraphSort.cpp
|
||||
DWARF.cpp
|
||||
Driver.cpp
|
||||
DriverUtils.cpp
|
||||
EhFrame.cpp
|
||||
Filesystem.cpp
|
||||
GdbIndex.cpp
|
||||
ICF.cpp
|
||||
InputFiles.cpp
|
||||
InputSection.cpp
|
||||
|
||||
51
deps/lld/ELF/CallGraphSort.cpp
vendored
51
deps/lld/ELF/CallGraphSort.cpp
vendored
@ -57,10 +57,7 @@ struct Edge {
|
||||
};
|
||||
|
||||
struct Cluster {
|
||||
Cluster(int Sec, size_t S) {
|
||||
Sections.push_back(Sec);
|
||||
Size = S;
|
||||
}
|
||||
Cluster(int Sec, size_t S) : Sections{Sec}, Size(S) {}
|
||||
|
||||
double getDensity() const {
|
||||
if (Size == 0)
|
||||
@ -72,7 +69,7 @@ struct Cluster {
|
||||
size_t Size = 0;
|
||||
uint64_t Weight = 0;
|
||||
uint64_t InitialWeight = 0;
|
||||
std::vector<Edge> Preds;
|
||||
Edge BestPred = {-1, 0};
|
||||
};
|
||||
|
||||
class CallGraphSort {
|
||||
@ -96,12 +93,14 @@ constexpr int MAX_DENSITY_DEGRADATION = 8;
|
||||
constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
|
||||
} // end anonymous namespace
|
||||
|
||||
typedef std::pair<const InputSectionBase *, const InputSectionBase *>
|
||||
SectionPair;
|
||||
|
||||
// Take the edge list in Config->CallGraphProfile, resolve symbol names to
|
||||
// Symbols, and generate a graph between InputSections with the provided
|
||||
// weights.
|
||||
CallGraphSort::CallGraphSort() {
|
||||
llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
|
||||
uint64_t> &Profile = Config->CallGraphProfile;
|
||||
MapVector<SectionPair, uint64_t> &Profile = Config->CallGraphProfile;
|
||||
DenseMap<const InputSectionBase *, int> SecToCluster;
|
||||
|
||||
auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
|
||||
@ -114,7 +113,7 @@ CallGraphSort::CallGraphSort() {
|
||||
};
|
||||
|
||||
// Create the graph.
|
||||
for (const auto &C : Profile) {
|
||||
for (std::pair<SectionPair, uint64_t> &C : Profile) {
|
||||
const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
|
||||
const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
|
||||
uint64_t Weight = C.second;
|
||||
@ -136,8 +135,12 @@ CallGraphSort::CallGraphSort() {
|
||||
if (From == To)
|
||||
continue;
|
||||
|
||||
// Add an edge
|
||||
Clusters[To].Preds.push_back({From, Weight});
|
||||
// Remember the best edge.
|
||||
Cluster &ToC = Clusters[To];
|
||||
if (ToC.BestPred.From == -1 || ToC.BestPred.Weight < Weight) {
|
||||
ToC.BestPred.From = From;
|
||||
ToC.BestPred.Weight = Weight;
|
||||
}
|
||||
}
|
||||
for (Cluster &C : Clusters)
|
||||
C.InitialWeight = C.Weight;
|
||||
@ -146,9 +149,7 @@ CallGraphSort::CallGraphSort() {
|
||||
// It's bad to merge clusters which would degrade the density too much.
|
||||
static bool isNewDensityBad(Cluster &A, Cluster &B) {
|
||||
double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
|
||||
if (NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION)
|
||||
return true;
|
||||
return false;
|
||||
return NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION;
|
||||
}
|
||||
|
||||
static void mergeClusters(Cluster &Into, Cluster &From) {
|
||||
@ -167,9 +168,9 @@ void CallGraphSort::groupClusters() {
|
||||
std::vector<int> SortedSecs(Clusters.size());
|
||||
std::vector<Cluster *> SecToCluster(Clusters.size());
|
||||
|
||||
for (int SI = 0, SE = Clusters.size(); SI != SE; ++SI) {
|
||||
SortedSecs[SI] = SI;
|
||||
SecToCluster[SI] = &Clusters[SI];
|
||||
for (size_t I = 0; I < Clusters.size(); ++I) {
|
||||
SortedSecs[I] = I;
|
||||
SecToCluster[I] = &Clusters[I];
|
||||
}
|
||||
|
||||
std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
|
||||
@ -181,21 +182,11 @@ void CallGraphSort::groupClusters() {
|
||||
// been merged into another cluster yet.
|
||||
Cluster &C = Clusters[SI];
|
||||
|
||||
int BestPred = -1;
|
||||
uint64_t BestWeight = 0;
|
||||
|
||||
for (Edge &E : C.Preds) {
|
||||
if (BestPred == -1 || E.Weight > BestWeight) {
|
||||
BestPred = E.From;
|
||||
BestWeight = E.Weight;
|
||||
}
|
||||
}
|
||||
|
||||
// don't consider merging if the edge is unlikely.
|
||||
if (BestWeight * 10 <= C.InitialWeight)
|
||||
// Don't consider merging if the edge is unlikely.
|
||||
if (C.BestPred.From == -1 || C.BestPred.Weight * 10 <= C.InitialWeight)
|
||||
continue;
|
||||
|
||||
Cluster *PredC = SecToCluster[BestPred];
|
||||
Cluster *PredC = SecToCluster[C.BestPred.From];
|
||||
if (PredC == &C)
|
||||
continue;
|
||||
|
||||
@ -229,7 +220,7 @@ DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
|
||||
groupClusters();
|
||||
|
||||
// Generate order.
|
||||
llvm::DenseMap<const InputSectionBase *, int> OrderMap;
|
||||
DenseMap<const InputSectionBase *, int> OrderMap;
|
||||
ssize_t CurOrder = 1;
|
||||
|
||||
for (const Cluster &C : Clusters)
|
||||
|
||||
12
deps/lld/ELF/Config.h
vendored
12
deps/lld/ELF/Config.h
vendored
@ -47,7 +47,7 @@ enum class ICFLevel { None, Safe, All };
|
||||
enum class StripPolicy { None, All, Debug };
|
||||
|
||||
// For --unresolved-symbols.
|
||||
enum class UnresolvedPolicy { ReportError, Warn, Ignore, IgnoreAll };
|
||||
enum class UnresolvedPolicy { ReportError, Warn, Ignore };
|
||||
|
||||
// For --orphan-handling.
|
||||
enum class OrphanHandlingPolicy { Place, Warn, Error };
|
||||
@ -127,6 +127,7 @@ struct Configuration {
|
||||
bool AsNeeded = false;
|
||||
bool Bsymbolic;
|
||||
bool BsymbolicFunctions;
|
||||
bool CallGraphProfileSort;
|
||||
bool CheckSections;
|
||||
bool CompressDebugSections;
|
||||
bool Cref;
|
||||
@ -134,11 +135,13 @@ struct Configuration {
|
||||
bool Demangle = true;
|
||||
bool DisableVerify;
|
||||
bool EhFrameHdr;
|
||||
bool EmitLLVM;
|
||||
bool EmitRelocs;
|
||||
bool EnableNewDtags;
|
||||
bool ExecuteOnly;
|
||||
bool ExportDynamic;
|
||||
bool FixCortexA53Errata843419;
|
||||
bool FormatBinary = false;
|
||||
bool GcSections;
|
||||
bool GdbIndex;
|
||||
bool GnuHash = false;
|
||||
@ -156,6 +159,7 @@ struct Configuration {
|
||||
bool OFormatBinary;
|
||||
bool Omagic;
|
||||
bool OptRemarksWithHotness;
|
||||
bool PicThunk;
|
||||
bool Pie;
|
||||
bool PrintGcSections;
|
||||
bool PrintIcfSections;
|
||||
@ -170,19 +174,24 @@ struct Configuration {
|
||||
bool Trace;
|
||||
bool ThinLTOEmitImportsFiles;
|
||||
bool ThinLTOIndexOnly;
|
||||
bool TocOptimize;
|
||||
bool UndefinedVersion;
|
||||
bool UseAndroidRelrTags = false;
|
||||
bool WarnBackrefs;
|
||||
bool WarnCommon;
|
||||
bool WarnIfuncTextrel;
|
||||
bool WarnMissingEntry;
|
||||
bool WarnSymbolOrdering;
|
||||
bool WriteAddends;
|
||||
bool ZCombreloc;
|
||||
bool ZCopyreloc;
|
||||
bool ZExecstack;
|
||||
bool ZGlobal;
|
||||
bool ZHazardplt;
|
||||
bool ZInitfirst;
|
||||
bool ZInterpose;
|
||||
bool ZKeepTextSectionPrefix;
|
||||
bool ZNodefaultlib;
|
||||
bool ZNodelete;
|
||||
bool ZNodlopen;
|
||||
bool ZNow;
|
||||
@ -212,6 +221,7 @@ struct Configuration {
|
||||
unsigned LTOO;
|
||||
unsigned Optimize;
|
||||
unsigned ThinLTOJobs;
|
||||
int32_t SplitStackAdjustSize;
|
||||
|
||||
// The following config options do not directly correspond to any
|
||||
// particualr command line options.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
//===- GdbIndex.cpp -------------------------------------------------------===//
|
||||
//===- DWARF.cpp ----------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Linker
|
||||
//
|
||||
@ -14,8 +14,9 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "GdbIndex.h"
|
||||
#include "DWARF.h"
|
||||
#include "Symbols.h"
|
||||
#include "Target.h"
|
||||
#include "lld/Common/Memory.h"
|
||||
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
@ -29,24 +30,28 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *Obj) {
|
||||
for (InputSectionBase *Sec : Obj->getSections()) {
|
||||
if (!Sec)
|
||||
continue;
|
||||
if (LLDDWARFSection *M = StringSwitch<LLDDWARFSection *>(Sec->Name)
|
||||
.Case(".debug_info", &InfoSection)
|
||||
.Case(".debug_ranges", &RangeSection)
|
||||
.Case(".debug_line", &LineSection)
|
||||
.Default(nullptr)) {
|
||||
Sec->maybeDecompress();
|
||||
M->Data = toStringRef(Sec->Data);
|
||||
|
||||
if (LLDDWARFSection *M =
|
||||
StringSwitch<LLDDWARFSection *>(Sec->Name)
|
||||
.Case(".debug_addr", &AddrSection)
|
||||
.Case(".debug_gnu_pubnames", &GnuPubNamesSection)
|
||||
.Case(".debug_gnu_pubtypes", &GnuPubTypesSection)
|
||||
.Case(".debug_info", &InfoSection)
|
||||
.Case(".debug_ranges", &RangeSection)
|
||||
.Case(".debug_rnglists", &RngListsSection)
|
||||
.Case(".debug_line", &LineSection)
|
||||
.Default(nullptr)) {
|
||||
M->Data = toStringRef(Sec->data());
|
||||
M->Sec = Sec;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Sec->Name == ".debug_abbrev")
|
||||
AbbrevSection = toStringRef(Sec->Data);
|
||||
else if (Sec->Name == ".debug_gnu_pubnames")
|
||||
GnuPubNamesSection = toStringRef(Sec->Data);
|
||||
else if (Sec->Name == ".debug_gnu_pubtypes")
|
||||
GnuPubTypesSection = toStringRef(Sec->Data);
|
||||
AbbrevSection = toStringRef(Sec->data());
|
||||
else if (Sec->Name == ".debug_str")
|
||||
StrSection = toStringRef(Sec->Data);
|
||||
StrSection = toStringRef(Sec->data());
|
||||
else if (Sec->Name == ".debug_line_str")
|
||||
LineStringSection = toStringRef(Sec->data());
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,7 +78,10 @@ LLDDwarfObj<ELFT>::findAux(const InputSectionBase &Sec, uint64_t Pos,
|
||||
// Broken debug info can point to a non-Defined symbol.
|
||||
auto *DR = dyn_cast<Defined>(&File->getRelocTargetSym(Rel));
|
||||
if (!DR) {
|
||||
error("unsupported relocation target while parsing debug info");
|
||||
RelType Type = Rel.getType(Config->IsMips64EL);
|
||||
if (Type != Target->NoneRel)
|
||||
error(toString(File) + ": relocation " + lld::toString(Type) + " at 0x" +
|
||||
llvm::utohexstr(Rel.r_offset) + " has unsupported target");
|
||||
return None;
|
||||
}
|
||||
uint64_t Val = DR->Value + getAddend<ELFT>(Rel);
|
||||
93
deps/lld/ELF/GdbIndex.h → deps/lld/ELF/DWARF.h
vendored
93
deps/lld/ELF/GdbIndex.h → deps/lld/ELF/DWARF.h
vendored
@ -1,4 +1,4 @@
|
||||
//===- GdbIndex.h --------------------------------------------*- C++ -*-===//
|
||||
//===- DWARF.h -----------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Linker
|
||||
//
|
||||
@ -7,10 +7,11 @@
|
||||
//
|
||||
//===-------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLD_ELF_GDB_INDEX_H
|
||||
#define LLD_ELF_GDB_INDEX_H
|
||||
#ifndef LLD_ELF_DWARF_H
|
||||
#define LLD_ELF_DWARF_H
|
||||
|
||||
#include "InputFiles.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
||||
#include "llvm/Object/ELF.h"
|
||||
|
||||
@ -24,44 +25,66 @@ struct LLDDWARFSection final : public llvm::DWARFSection {
|
||||
};
|
||||
|
||||
template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
|
||||
LLDDWARFSection InfoSection;
|
||||
LLDDWARFSection RangeSection;
|
||||
LLDDWARFSection LineSection;
|
||||
StringRef AbbrevSection;
|
||||
StringRef GnuPubNamesSection;
|
||||
StringRef GnuPubTypesSection;
|
||||
StringRef StrSection;
|
||||
public:
|
||||
explicit LLDDwarfObj(ObjFile<ELFT> *Obj);
|
||||
|
||||
void forEachInfoSections(
|
||||
llvm::function_ref<void(const llvm::DWARFSection &)> F) const override {
|
||||
F(InfoSection);
|
||||
}
|
||||
|
||||
const llvm::DWARFSection &getRangeSection() const override {
|
||||
return RangeSection;
|
||||
}
|
||||
|
||||
const llvm::DWARFSection &getRnglistsSection() const override {
|
||||
return RngListsSection;
|
||||
}
|
||||
|
||||
const llvm::DWARFSection &getLineSection() const override {
|
||||
return LineSection;
|
||||
}
|
||||
|
||||
const llvm::DWARFSection &getAddrSection() const override {
|
||||
return AddrSection;
|
||||
}
|
||||
|
||||
const llvm::DWARFSection &getGnuPubNamesSection() const override {
|
||||
return GnuPubNamesSection;
|
||||
}
|
||||
|
||||
const llvm::DWARFSection &getGnuPubTypesSection() const override {
|
||||
return GnuPubTypesSection;
|
||||
}
|
||||
|
||||
StringRef getFileName() const override { return ""; }
|
||||
StringRef getAbbrevSection() const override { return AbbrevSection; }
|
||||
StringRef getStringSection() const override { return StrSection; }
|
||||
StringRef getLineStringSection() const override { return LineStringSection; }
|
||||
|
||||
bool isLittleEndian() const override {
|
||||
return ELFT::TargetEndianness == llvm::support::little;
|
||||
}
|
||||
|
||||
llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &Sec,
|
||||
uint64_t Pos) const override;
|
||||
|
||||
private:
|
||||
template <class RelTy>
|
||||
llvm::Optional<llvm::RelocAddrEntry> findAux(const InputSectionBase &Sec,
|
||||
uint64_t Pos,
|
||||
ArrayRef<RelTy> Rels) const;
|
||||
|
||||
public:
|
||||
explicit LLDDwarfObj(ObjFile<ELFT> *Obj);
|
||||
const llvm::DWARFSection &getInfoSection() const override {
|
||||
return InfoSection;
|
||||
}
|
||||
const llvm::DWARFSection &getRangeSection() const override {
|
||||
return RangeSection;
|
||||
}
|
||||
const llvm::DWARFSection &getLineSection() const override {
|
||||
return LineSection;
|
||||
}
|
||||
StringRef getFileName() const override { return ""; }
|
||||
StringRef getAbbrevSection() const override { return AbbrevSection; }
|
||||
StringRef getStringSection() const override { return StrSection; }
|
||||
StringRef getGnuPubNamesSection() const override {
|
||||
return GnuPubNamesSection;
|
||||
}
|
||||
StringRef getGnuPubTypesSection() const override {
|
||||
return GnuPubTypesSection;
|
||||
}
|
||||
bool isLittleEndian() const override {
|
||||
return ELFT::TargetEndianness == llvm::support::little;
|
||||
}
|
||||
llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &Sec,
|
||||
uint64_t Pos) const override;
|
||||
LLDDWARFSection GnuPubNamesSection;
|
||||
LLDDWARFSection GnuPubTypesSection;
|
||||
LLDDWARFSection InfoSection;
|
||||
LLDDWARFSection RangeSection;
|
||||
LLDDWARFSection RngListsSection;
|
||||
LLDDWARFSection LineSection;
|
||||
LLDDWARFSection AddrSection;
|
||||
StringRef AbbrevSection;
|
||||
StringRef StrSection;
|
||||
StringRef LineStringSection;
|
||||
};
|
||||
|
||||
} // namespace elf
|
||||
397
deps/lld/ELF/Driver.cpp
vendored
397
deps/lld/ELF/Driver.cpp
vendored
@ -63,6 +63,7 @@ using namespace llvm;
|
||||
using namespace llvm::ELF;
|
||||
using namespace llvm::object;
|
||||
using namespace llvm::sys;
|
||||
using namespace llvm::support;
|
||||
|
||||
using namespace lld;
|
||||
using namespace lld::elf;
|
||||
@ -74,7 +75,7 @@ static void setConfigs(opt::InputArgList &Args);
|
||||
|
||||
bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
|
||||
raw_ostream &Error) {
|
||||
errorHandler().LogName = sys::path::filename(Args[0]);
|
||||
errorHandler().LogName = args::getFilenameWithoutExe(Args[0]);
|
||||
errorHandler().ErrorLimitExceededMsg =
|
||||
"too many errors emitted, stopping now (use "
|
||||
"-error-limit=0 to see all errors)";
|
||||
@ -84,7 +85,6 @@ bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
|
||||
|
||||
InputSections.clear();
|
||||
OutputSections.clear();
|
||||
Tar = nullptr;
|
||||
BinaryFiles.clear();
|
||||
BitcodeFiles.clear();
|
||||
ObjectFiles.clear();
|
||||
@ -94,6 +94,10 @@ bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
|
||||
Driver = make<LinkerDriver>();
|
||||
Script = make<LinkerScript>();
|
||||
Symtab = make<SymbolTable>();
|
||||
|
||||
Tar = nullptr;
|
||||
memset(&In, 0, sizeof(In));
|
||||
|
||||
Config->ProgName = Args[0];
|
||||
|
||||
Driver->main(Args);
|
||||
@ -125,9 +129,11 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) {
|
||||
.Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
|
||||
.Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS})
|
||||
.Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS})
|
||||
.Case("elf32ppc", {ELF32BEKind, EM_PPC})
|
||||
.Case("elf32lriscv", {ELF32LEKind, EM_RISCV})
|
||||
.Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC})
|
||||
.Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
|
||||
.Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
|
||||
.Case("elf64lriscv", {ELF64LEKind, EM_RISCV})
|
||||
.Case("elf64ppc", {ELF64BEKind, EM_PPC64})
|
||||
.Case("elf64lppc", {ELF64LEKind, EM_PPC64})
|
||||
.Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64})
|
||||
@ -183,7 +189,7 @@ void LinkerDriver::addFile(StringRef Path, bool WithLOption) {
|
||||
return;
|
||||
MemoryBufferRef MBRef = *Buffer;
|
||||
|
||||
if (InBinary) {
|
||||
if (Config->FormatBinary) {
|
||||
Files.push_back(make<BinaryFile>(MBRef));
|
||||
return;
|
||||
}
|
||||
@ -218,7 +224,7 @@ void LinkerDriver::addFile(StringRef Path, bool WithLOption) {
|
||||
return;
|
||||
}
|
||||
case file_magic::elf_shared_object:
|
||||
if (Config->Relocatable) {
|
||||
if (Config->Static || Config->Relocatable) {
|
||||
error("attempted static link of dynamic object " + Path);
|
||||
return;
|
||||
}
|
||||
@ -269,14 +275,17 @@ static void initLLVM() {
|
||||
|
||||
// Some command line options or some combinations of them are not allowed.
|
||||
// This function checks for such errors.
|
||||
static void checkOptions(opt::InputArgList &Args) {
|
||||
static void checkOptions() {
|
||||
// The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
|
||||
// table which is a relatively new feature.
|
||||
if (Config->EMachine == EM_MIPS && Config->GnuHash)
|
||||
error("the .gnu.hash section is not compatible with the MIPS target.");
|
||||
error("the .gnu.hash section is not compatible with the MIPS target");
|
||||
|
||||
if (Config->FixCortexA53Errata843419 && Config->EMachine != EM_AARCH64)
|
||||
error("--fix-cortex-a53-843419 is only supported on AArch64 targets.");
|
||||
error("--fix-cortex-a53-843419 is only supported on AArch64 targets");
|
||||
|
||||
if (Config->TocOptimize && Config->EMachine != EM_PPC64)
|
||||
error("--toc-optimize is only supported on the PowerPC64 target");
|
||||
|
||||
if (Config->Pie && Config->Shared)
|
||||
error("-shared and -pie may not be used together");
|
||||
@ -336,12 +345,13 @@ static bool getZFlag(opt::InputArgList &Args, StringRef K1, StringRef K2,
|
||||
return Default;
|
||||
}
|
||||
|
||||
static bool isKnown(StringRef S) {
|
||||
static bool isKnownZFlag(StringRef S) {
|
||||
return S == "combreloc" || S == "copyreloc" || S == "defs" ||
|
||||
S == "execstack" || S == "hazardplt" || S == "initfirst" ||
|
||||
S == "execstack" || S == "global" || S == "hazardplt" ||
|
||||
S == "initfirst" || S == "interpose" ||
|
||||
S == "keep-text-section-prefix" || S == "lazy" || S == "muldefs" ||
|
||||
S == "nocombreloc" || S == "nocopyreloc" || S == "nodelete" ||
|
||||
S == "nodlopen" || S == "noexecstack" ||
|
||||
S == "nocombreloc" || S == "nocopyreloc" || S == "nodefaultlib" ||
|
||||
S == "nodelete" || S == "nodlopen" || S == "noexecstack" ||
|
||||
S == "nokeep-text-section-prefix" || S == "norelro" || S == "notext" ||
|
||||
S == "now" || S == "origin" || S == "relro" || S == "retpolineplt" ||
|
||||
S == "rodynamic" || S == "text" || S == "wxneeded" ||
|
||||
@ -351,7 +361,7 @@ static bool isKnown(StringRef S) {
|
||||
// Report an error for an unknown -z option.
|
||||
static void checkZOptions(opt::InputArgList &Args) {
|
||||
for (auto *Arg : Args.filtered(OPT_z))
|
||||
if (!isKnown(Arg->getValue()))
|
||||
if (!isKnownZFlag(Arg->getValue()))
|
||||
error("unknown -z value: " + StringRef(Arg->getValue()));
|
||||
}
|
||||
|
||||
@ -386,6 +396,23 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
|
||||
if (Args.hasArg(OPT_v) || Args.hasArg(OPT_version))
|
||||
message(getLLDVersion() + " (compatible with GNU linkers)");
|
||||
|
||||
if (const char *Path = getReproduceOption(Args)) {
|
||||
// Note that --reproduce is a debug option so you can ignore it
|
||||
// if you are trying to understand the whole picture of the code.
|
||||
Expected<std::unique_ptr<TarWriter>> ErrOrWriter =
|
||||
TarWriter::create(Path, path::stem(Path));
|
||||
if (ErrOrWriter) {
|
||||
Tar = std::move(*ErrOrWriter);
|
||||
Tar->append("response.txt", createResponseFile(Args));
|
||||
Tar->append("version.txt", getLLDVersion() + "\n");
|
||||
} else {
|
||||
error("--reproduce: " + toString(ErrOrWriter.takeError()));
|
||||
}
|
||||
}
|
||||
|
||||
readConfigs(Args);
|
||||
checkZOptions(Args);
|
||||
|
||||
// The behavior of -v or --version is a bit strange, but this is
|
||||
// needed for compatibility with GNU linkers.
|
||||
if (Args.hasArg(OPT_v) && !Args.hasArg(OPT_INPUT))
|
||||
@ -393,24 +420,6 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
|
||||
if (Args.hasArg(OPT_version))
|
||||
return;
|
||||
|
||||
if (const char *Path = getReproduceOption(Args)) {
|
||||
// Note that --reproduce is a debug option so you can ignore it
|
||||
// if you are trying to understand the whole picture of the code.
|
||||
Expected<std::unique_ptr<TarWriter>> ErrOrWriter =
|
||||
TarWriter::create(Path, path::stem(Path));
|
||||
if (ErrOrWriter) {
|
||||
Tar = ErrOrWriter->get();
|
||||
Tar->append("response.txt", createResponseFile(Args));
|
||||
Tar->append("version.txt", getLLDVersion() + "\n");
|
||||
make<std::unique_ptr<TarWriter>>(std::move(*ErrOrWriter));
|
||||
} else {
|
||||
error(Twine("--reproduce: failed to open ") + Path + ": " +
|
||||
toString(ErrOrWriter.takeError()));
|
||||
}
|
||||
}
|
||||
|
||||
readConfigs(Args);
|
||||
checkZOptions(Args);
|
||||
initLLVM();
|
||||
createFiles(Args);
|
||||
if (errorCount())
|
||||
@ -418,7 +427,7 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
|
||||
|
||||
inferMachineType();
|
||||
setConfigs(Args);
|
||||
checkOptions(Args);
|
||||
checkOptions();
|
||||
if (errorCount())
|
||||
return;
|
||||
|
||||
@ -448,9 +457,6 @@ static std::string getRpath(opt::InputArgList &Args) {
|
||||
// Determines what we should do if there are remaining unresolved
|
||||
// symbols after the name resolution.
|
||||
static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &Args) {
|
||||
if (Args.hasArg(OPT_relocatable))
|
||||
return UnresolvedPolicy::IgnoreAll;
|
||||
|
||||
UnresolvedPolicy ErrorOrWarn = Args.hasFlag(OPT_error_unresolved_symbols,
|
||||
OPT_warn_unresolved_symbols, true)
|
||||
? UnresolvedPolicy::ReportError
|
||||
@ -497,14 +503,11 @@ static Target2Policy getTarget2(opt::InputArgList &Args) {
|
||||
}
|
||||
|
||||
static bool isOutputFormatBinary(opt::InputArgList &Args) {
|
||||
if (auto *Arg = Args.getLastArg(OPT_oformat)) {
|
||||
StringRef S = Arg->getValue();
|
||||
if (S == "binary")
|
||||
return true;
|
||||
if (S.startswith("elf"))
|
||||
return false;
|
||||
StringRef S = Args.getLastArgValue(OPT_oformat, "elf");
|
||||
if (S == "binary")
|
||||
return true;
|
||||
if (!S.startswith("elf"))
|
||||
error("unknown --oformat value: " + S);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -645,38 +648,56 @@ static std::pair<bool, bool> getPackDynRelocs(opt::InputArgList &Args) {
|
||||
|
||||
static void readCallGraph(MemoryBufferRef MB) {
|
||||
// Build a map from symbol name to section
|
||||
DenseMap<StringRef, const Symbol *> SymbolNameToSymbol;
|
||||
DenseMap<StringRef, Symbol *> Map;
|
||||
for (InputFile *File : ObjectFiles)
|
||||
for (Symbol *Sym : File->getSymbols())
|
||||
SymbolNameToSymbol[Sym->getName()] = Sym;
|
||||
Map[Sym->getName()] = Sym;
|
||||
|
||||
for (StringRef L : args::getLines(MB)) {
|
||||
SmallVector<StringRef, 3> Fields;
|
||||
L.split(Fields, ' ');
|
||||
uint64_t Count;
|
||||
if (Fields.size() != 3 || !to_integer(Fields[2], Count))
|
||||
fatal(MB.getBufferIdentifier() + ": parse error");
|
||||
const Symbol *FromSym = SymbolNameToSymbol.lookup(Fields[0]);
|
||||
const Symbol *ToSym = SymbolNameToSymbol.lookup(Fields[1]);
|
||||
if (Config->WarnSymbolOrdering) {
|
||||
if (!FromSym)
|
||||
warn(MB.getBufferIdentifier() + ": no such symbol: " + Fields[0]);
|
||||
if (!ToSym)
|
||||
warn(MB.getBufferIdentifier() + ": no such symbol: " + Fields[1]);
|
||||
auto FindSection = [&](StringRef Name) -> InputSectionBase * {
|
||||
Symbol *Sym = Map.lookup(Name);
|
||||
if (!Sym) {
|
||||
if (Config->WarnSymbolOrdering)
|
||||
warn(MB.getBufferIdentifier() + ": no such symbol: " + Name);
|
||||
return nullptr;
|
||||
}
|
||||
maybeWarnUnorderableSymbol(Sym);
|
||||
|
||||
if (Defined *DR = dyn_cast_or_null<Defined>(Sym))
|
||||
return dyn_cast_or_null<InputSectionBase>(DR->Section);
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
for (StringRef Line : args::getLines(MB)) {
|
||||
SmallVector<StringRef, 3> Fields;
|
||||
Line.split(Fields, ' ');
|
||||
uint64_t Count;
|
||||
|
||||
if (Fields.size() != 3 || !to_integer(Fields[2], Count)) {
|
||||
error(MB.getBufferIdentifier() + ": parse error");
|
||||
return;
|
||||
}
|
||||
|
||||
if (InputSectionBase *From = FindSection(Fields[0]))
|
||||
if (InputSectionBase *To = FindSection(Fields[1]))
|
||||
Config->CallGraphProfile[std::make_pair(From, To)] += Count;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT> static void readCallGraphsFromObjectFiles() {
|
||||
for (auto File : ObjectFiles) {
|
||||
auto *Obj = cast<ObjFile<ELFT>>(File);
|
||||
|
||||
for (const Elf_CGProfile_Impl<ELFT> &CGPE : Obj->CGProfile) {
|
||||
auto *FromSym = dyn_cast<Defined>(&Obj->getSymbol(CGPE.cgp_from));
|
||||
auto *ToSym = dyn_cast<Defined>(&Obj->getSymbol(CGPE.cgp_to));
|
||||
if (!FromSym || !ToSym)
|
||||
continue;
|
||||
|
||||
auto *From = dyn_cast_or_null<InputSectionBase>(FromSym->Section);
|
||||
auto *To = dyn_cast_or_null<InputSectionBase>(ToSym->Section);
|
||||
if (From && To)
|
||||
Config->CallGraphProfile[{From, To}] += CGPE.cgp_weight;
|
||||
}
|
||||
if (!FromSym || !ToSym || Count == 0)
|
||||
continue;
|
||||
warnUnorderableSymbol(FromSym);
|
||||
warnUnorderableSymbol(ToSym);
|
||||
const Defined *FromSymD = dyn_cast<Defined>(FromSym);
|
||||
const Defined *ToSymD = dyn_cast<Defined>(ToSym);
|
||||
if (!FromSymD || !ToSymD)
|
||||
continue;
|
||||
const auto *FromSB = dyn_cast_or_null<InputSectionBase>(FromSymD->Section);
|
||||
const auto *ToSB = dyn_cast_or_null<InputSectionBase>(ToSymD->Section);
|
||||
if (!FromSB || !ToSB)
|
||||
continue;
|
||||
Config->CallGraphProfile[std::make_pair(FromSB, ToSB)] += Count;
|
||||
}
|
||||
}
|
||||
|
||||
@ -753,7 +774,10 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
|
||||
Config->DynamicLinker = getDynamicLinker(Args);
|
||||
Config->EhFrameHdr =
|
||||
Args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
|
||||
Config->EmitLLVM = Args.hasArg(OPT_plugin_opt_emit_llvm, false);
|
||||
Config->EmitRelocs = Args.hasArg(OPT_emit_relocs);
|
||||
Config->CallGraphProfileSort = Args.hasFlag(
|
||||
OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
|
||||
Config->EnableNewDtags =
|
||||
Args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true);
|
||||
Config->Entry = Args.getLastArgValue(OPT_entry);
|
||||
@ -808,6 +832,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
|
||||
Config->SingleRoRx = Args.hasArg(OPT_no_rosegment);
|
||||
Config->SoName = Args.getLastArgValue(OPT_soname);
|
||||
Config->SortSection = getSortSection(Args);
|
||||
Config->SplitStackAdjustSize = args::getInteger(Args, OPT_split_stack_adjust_size, 16384);
|
||||
Config->Strip = getStrip(Args);
|
||||
Config->Sysroot = Args.getLastArgValue(OPT_sysroot);
|
||||
Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
|
||||
@ -837,15 +862,20 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
|
||||
Config->WarnBackrefs =
|
||||
Args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false);
|
||||
Config->WarnCommon = Args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
|
||||
Config->WarnIfuncTextrel =
|
||||
Args.hasFlag(OPT_warn_ifunc_textrel, OPT_no_warn_ifunc_textrel, false);
|
||||
Config->WarnSymbolOrdering =
|
||||
Args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
|
||||
Config->ZCombreloc = getZFlag(Args, "combreloc", "nocombreloc", true);
|
||||
Config->ZCopyreloc = getZFlag(Args, "copyreloc", "nocopyreloc", true);
|
||||
Config->ZExecstack = getZFlag(Args, "execstack", "noexecstack", false);
|
||||
Config->ZGlobal = hasZOption(Args, "global");
|
||||
Config->ZHazardplt = hasZOption(Args, "hazardplt");
|
||||
Config->ZInitfirst = hasZOption(Args, "initfirst");
|
||||
Config->ZInterpose = hasZOption(Args, "interpose");
|
||||
Config->ZKeepTextSectionPrefix = getZFlag(
|
||||
Args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
|
||||
Config->ZNodefaultlib = hasZOption(Args, "nodefaultlib");
|
||||
Config->ZNodelete = hasZOption(Args, "nodelete");
|
||||
Config->ZNodlopen = hasZOption(Args, "nodlopen");
|
||||
Config->ZNow = getZFlag(Args, "now", "lazy", false);
|
||||
@ -876,6 +906,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
|
||||
if (Config->ThinLTOJobs == 0)
|
||||
error("--thinlto-jobs: number of threads must be > 0");
|
||||
|
||||
if (Config->SplitStackAdjustSize < 0)
|
||||
error("--split-stack-adjust-size: size must be >= 0");
|
||||
|
||||
// Parse ELF{32,64}{LE,BE} and CPU type.
|
||||
if (auto *Arg = Args.getLastArg(OPT_m)) {
|
||||
StringRef S = Arg->getValue();
|
||||
@ -964,22 +997,18 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
|
||||
// This function initialize such members. See Config.h for the details
|
||||
// of these values.
|
||||
static void setConfigs(opt::InputArgList &Args) {
|
||||
ELFKind Kind = Config->EKind;
|
||||
uint16_t Machine = Config->EMachine;
|
||||
ELFKind K = Config->EKind;
|
||||
uint16_t M = Config->EMachine;
|
||||
|
||||
Config->CopyRelocs = (Config->Relocatable || Config->EmitRelocs);
|
||||
Config->Is64 = (Kind == ELF64LEKind || Kind == ELF64BEKind);
|
||||
Config->IsLE = (Kind == ELF32LEKind || Kind == ELF64LEKind);
|
||||
Config->Endianness =
|
||||
Config->IsLE ? support::endianness::little : support::endianness::big;
|
||||
Config->IsMips64EL = (Kind == ELF64LEKind && Machine == EM_MIPS);
|
||||
Config->Is64 = (K == ELF64LEKind || K == ELF64BEKind);
|
||||
Config->IsLE = (K == ELF32LEKind || K == ELF64LEKind);
|
||||
Config->Endianness = Config->IsLE ? endianness::little : endianness::big;
|
||||
Config->IsMips64EL = (K == ELF64LEKind && M == EM_MIPS);
|
||||
Config->Pic = Config->Pie || Config->Shared;
|
||||
Config->PicThunk = Args.hasArg(OPT_pic_veneer, Config->Pic);
|
||||
Config->Wordsize = Config->Is64 ? 8 : 4;
|
||||
|
||||
// There is an ILP32 ABI for x86-64, although it's not very popular.
|
||||
// It is called the x32 ABI.
|
||||
bool IsX32 = (Kind == ELF32LEKind && Machine == EM_X86_64);
|
||||
|
||||
// ELF defines two different ways to store relocation addends as shown below:
|
||||
//
|
||||
// Rel: Addends are stored to the location where relocations are applied.
|
||||
@ -993,8 +1022,9 @@ static void setConfigs(opt::InputArgList &Args) {
|
||||
// You cannot choose which one, Rel or Rela, you want to use. Instead each
|
||||
// ABI defines which one you need to use. The following expression expresses
|
||||
// that.
|
||||
Config->IsRela =
|
||||
(Config->Is64 || IsX32 || Machine == EM_PPC) && Machine != EM_MIPS;
|
||||
Config->IsRela = M == EM_AARCH64 || M == EM_AMDGPU || M == EM_HEXAGON ||
|
||||
M == EM_PPC || M == EM_PPC64 || M == EM_RISCV ||
|
||||
M == EM_X86_64;
|
||||
|
||||
// If the output uses REL relocations we must store the dynamic relocation
|
||||
// addends to the output sections. We also store addends for RELA relocations
|
||||
@ -1004,10 +1034,13 @@ static void setConfigs(opt::InputArgList &Args) {
|
||||
Config->WriteAddends = Args.hasFlag(OPT_apply_dynamic_relocs,
|
||||
OPT_no_apply_dynamic_relocs, false) ||
|
||||
!Config->IsRela;
|
||||
|
||||
Config->TocOptimize =
|
||||
Args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, M == EM_PPC64);
|
||||
}
|
||||
|
||||
// Returns a value of "-format" option.
|
||||
static bool getBinaryOption(StringRef S) {
|
||||
static bool isFormatBinary(StringRef S) {
|
||||
if (S == "binary")
|
||||
return true;
|
||||
if (S == "elf" || S == "default")
|
||||
@ -1034,7 +1067,10 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) {
|
||||
StringRef From;
|
||||
StringRef To;
|
||||
std::tie(From, To) = StringRef(Arg->getValue()).split('=');
|
||||
readDefsym(From, MemoryBufferRef(To, "-defsym"));
|
||||
if (From.empty() || To.empty())
|
||||
error("-defsym: syntax error: " + StringRef(Arg->getValue()));
|
||||
else
|
||||
readDefsym(From, MemoryBufferRef(To, "-defsym"));
|
||||
break;
|
||||
}
|
||||
case OPT_script:
|
||||
@ -1049,7 +1085,7 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) {
|
||||
Config->AsNeeded = true;
|
||||
break;
|
||||
case OPT_format:
|
||||
InBinary = getBinaryOption(Arg->getValue());
|
||||
Config->FormatBinary = isFormatBinary(Arg->getValue());
|
||||
break;
|
||||
case OPT_no_as_needed:
|
||||
Config->AsNeeded = false;
|
||||
@ -1220,33 +1256,34 @@ template <class ELFT> static void handleUndefined(StringRef Name) {
|
||||
Symtab->fetchLazy<ELFT>(Sym);
|
||||
}
|
||||
|
||||
template <class ELFT> static bool shouldDemote(Symbol &Sym) {
|
||||
// If all references to a DSO happen to be weak, the DSO is not added to
|
||||
// DT_NEEDED. If that happens, we need to eliminate shared symbols created
|
||||
// from the DSO. Otherwise, they become dangling references that point to a
|
||||
// non-existent DSO.
|
||||
if (auto *S = dyn_cast<SharedSymbol>(&Sym))
|
||||
return !S->getFile<ELFT>().IsNeeded;
|
||||
template <class ELFT> static void handleLibcall(StringRef Name) {
|
||||
Symbol *Sym = Symtab->find(Name);
|
||||
if (!Sym || !Sym->isLazy())
|
||||
return;
|
||||
|
||||
// We are done processing archives, so lazy symbols that were used but not
|
||||
// found can be converted to undefined. We could also just delete the other
|
||||
// lazy symbols, but that seems to be more work than it is worth.
|
||||
return Sym.isLazy() && Sym.IsUsedInRegularObj;
|
||||
MemoryBufferRef MB;
|
||||
if (auto *LO = dyn_cast<LazyObject>(Sym))
|
||||
MB = LO->File->MB;
|
||||
else
|
||||
MB = cast<LazyArchive>(Sym)->getMemberBuffer();
|
||||
|
||||
if (isBitcode(MB))
|
||||
Symtab->fetchLazy<ELFT>(Sym);
|
||||
}
|
||||
|
||||
// Some files, such as .so or files between -{start,end}-lib may be removed
|
||||
// after their symbols are added to the symbol table. If that happens, we
|
||||
// need to remove symbols that refer files that no longer exist, so that
|
||||
// they won't appear in the symbol table of the output file.
|
||||
//
|
||||
// We remove symbols by demoting them to undefined symbol.
|
||||
template <class ELFT> static void demoteSymbols() {
|
||||
// If all references to a DSO happen to be weak, the DSO is not added
|
||||
// to DT_NEEDED. If that happens, we need to eliminate shared symbols
|
||||
// created from the DSO. Otherwise, they become dangling references
|
||||
// that point to a non-existent DSO.
|
||||
template <class ELFT> static void demoteSharedSymbols() {
|
||||
for (Symbol *Sym : Symtab->getSymbols()) {
|
||||
if (shouldDemote<ELFT>(*Sym)) {
|
||||
bool Used = Sym->Used;
|
||||
replaceSymbol<Undefined>(Sym, nullptr, Sym->getName(), Sym->Binding,
|
||||
Sym->StOther, Sym->Type);
|
||||
Sym->Used = Used;
|
||||
if (auto *S = dyn_cast<SharedSymbol>(Sym)) {
|
||||
if (!S->getFile<ELFT>().IsNeeded) {
|
||||
bool Used = S->Used;
|
||||
replaceSymbol<Undefined>(S, nullptr, S->getName(), STB_WEAK, S->StOther,
|
||||
S->Type);
|
||||
S->Used = Used;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1315,6 +1352,85 @@ static void findKeepUniqueSections(opt::InputArgList &Args) {
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT> static Symbol *addUndefined(StringRef Name) {
|
||||
return Symtab->addUndefined<ELFT>(Name, STB_GLOBAL, STV_DEFAULT, 0, false,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
// The --wrap option is a feature to rename symbols so that you can write
|
||||
// wrappers for existing functions. If you pass `-wrap=foo`, all
|
||||
// occurrences of symbol `foo` are resolved to `wrap_foo` (so, you are
|
||||
// expected to write `wrap_foo` function as a wrapper). The original
|
||||
// symbol becomes accessible as `real_foo`, so you can call that from your
|
||||
// wrapper.
|
||||
//
|
||||
// This data structure is instantiated for each -wrap option.
|
||||
struct WrappedSymbol {
|
||||
Symbol *Sym;
|
||||
Symbol *Real;
|
||||
Symbol *Wrap;
|
||||
};
|
||||
|
||||
// Handles -wrap option.
|
||||
//
|
||||
// This function instantiates wrapper symbols. At this point, they seem
|
||||
// like they are not being used at all, so we explicitly set some flags so
|
||||
// that LTO won't eliminate them.
|
||||
template <class ELFT>
|
||||
static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &Args) {
|
||||
std::vector<WrappedSymbol> V;
|
||||
DenseSet<StringRef> Seen;
|
||||
|
||||
for (auto *Arg : Args.filtered(OPT_wrap)) {
|
||||
StringRef Name = Arg->getValue();
|
||||
if (!Seen.insert(Name).second)
|
||||
continue;
|
||||
|
||||
Symbol *Sym = Symtab->find(Name);
|
||||
if (!Sym)
|
||||
continue;
|
||||
|
||||
Symbol *Real = addUndefined<ELFT>(Saver.save("__real_" + Name));
|
||||
Symbol *Wrap = addUndefined<ELFT>(Saver.save("__wrap_" + Name));
|
||||
V.push_back({Sym, Real, Wrap});
|
||||
|
||||
// We want to tell LTO not to inline symbols to be overwritten
|
||||
// because LTO doesn't know the final symbol contents after renaming.
|
||||
Real->CanInline = false;
|
||||
Sym->CanInline = false;
|
||||
|
||||
// Tell LTO not to eliminate these symbols.
|
||||
Sym->IsUsedInRegularObj = true;
|
||||
Wrap->IsUsedInRegularObj = true;
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
// Do renaming for -wrap by updating pointers to symbols.
|
||||
//
|
||||
// When this function is executed, only InputFiles and symbol table
|
||||
// contain pointers to symbol objects. We visit them to replace pointers,
|
||||
// so that wrapped symbols are swapped as instructed by the command line.
|
||||
template <class ELFT> static void wrapSymbols(ArrayRef<WrappedSymbol> Wrapped) {
|
||||
DenseMap<Symbol *, Symbol *> Map;
|
||||
for (const WrappedSymbol &W : Wrapped) {
|
||||
Map[W.Sym] = W.Wrap;
|
||||
Map[W.Real] = W.Sym;
|
||||
}
|
||||
|
||||
// Update pointers in input files.
|
||||
parallelForEach(ObjectFiles, [&](InputFile *File) {
|
||||
std::vector<Symbol *> &Syms = File->getMutableSymbols();
|
||||
for (size_t I = 0, E = Syms.size(); I != E; ++I)
|
||||
if (Symbol *S = Map.lookup(Syms[I]))
|
||||
Syms[I] = S;
|
||||
});
|
||||
|
||||
// Update pointers in the symbol table.
|
||||
for (const WrappedSymbol &W : Wrapped)
|
||||
Symtab->wrap(W.Sym, W.Real, W.Wrap);
|
||||
}
|
||||
|
||||
static const char *LibcallRoutineNames[] = {
|
||||
#define HANDLE_LIBCALL(code, name) name,
|
||||
#include "llvm/IR/RuntimeLibcalls.def"
|
||||
@ -1325,6 +1441,8 @@ static const char *LibcallRoutineNames[] = {
|
||||
// all linker scripts have already been parsed.
|
||||
template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
Target = getTarget();
|
||||
InX<ELFT>::VerSym = nullptr;
|
||||
InX<ELFT>::VerNeed = nullptr;
|
||||
|
||||
Config->MaxPageSize = getMaxPageSize(Args);
|
||||
Config->ImageBase = getImageBase(Args);
|
||||
@ -1380,8 +1498,8 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
|
||||
// Some symbols (such as __ehdr_start) are defined lazily only when there
|
||||
// are undefined symbols for them, so we add these to trigger that logic.
|
||||
for (StringRef Sym : Script->ReferencedSymbols)
|
||||
Symtab->addUndefined<ELFT>(Sym);
|
||||
for (StringRef Name : Script->ReferencedSymbols)
|
||||
addUndefined<ELFT>(Name);
|
||||
|
||||
// Handle the `--undefined <sym>` options.
|
||||
for (StringRef S : Config->Undefined)
|
||||
@ -1396,11 +1514,20 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
// in a bitcode file in an archive member, we need to arrange to use LTO to
|
||||
// compile those archive members by adding them to the link beforehand.
|
||||
//
|
||||
// With this the symbol table should be complete. After this, no new names
|
||||
// except a few linker-synthesized ones will be added to the symbol table.
|
||||
// However, adding all libcall symbols to the link can have undesired
|
||||
// consequences. For example, the libgcc implementation of
|
||||
// __sync_val_compare_and_swap_8 on 32-bit ARM pulls in an .init_array entry
|
||||
// that aborts the program if the Linux kernel does not support 64-bit
|
||||
// atomics, which would prevent the program from running even if it does not
|
||||
// use 64-bit atomics.
|
||||
//
|
||||
// Therefore, we only add libcall symbols to the link before LTO if we have
|
||||
// to, i.e. if the symbol's definition is in bitcode. Any other required
|
||||
// libcall symbols will be added to the link after LTO when we add the LTO
|
||||
// object file to the link.
|
||||
if (!BitcodeFiles.empty())
|
||||
for (const char *S : LibcallRoutineNames)
|
||||
handleUndefined<ELFT>(S);
|
||||
handleLibcall<ELFT>(S);
|
||||
|
||||
// Return if there were name resolution errors.
|
||||
if (errorCount())
|
||||
@ -1424,6 +1551,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
Out::ElfHeader = make<OutputSection>("", 0, SHF_ALLOC);
|
||||
Out::ElfHeader->Size = sizeof(typename ELFT::Ehdr);
|
||||
|
||||
// Create wrapped symbols for -wrap option.
|
||||
std::vector<WrappedSymbol> Wrapped = addWrappedSymbols<ELFT>(Args);
|
||||
|
||||
// We need to create some reserved symbols such as _end. Create them.
|
||||
if (!Config->Relocatable)
|
||||
addReservedSymbols();
|
||||
@ -1436,12 +1566,11 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
if (!Config->Relocatable)
|
||||
Symtab->scanVersionScript();
|
||||
|
||||
// Create wrapped symbols for -wrap option.
|
||||
for (auto *Arg : Args.filtered(OPT_wrap))
|
||||
Symtab->addSymbolWrap<ELFT>(Arg->getValue());
|
||||
|
||||
// Do link-time optimization if given files are LLVM bitcode files.
|
||||
// This compiles bitcode files into real object files.
|
||||
//
|
||||
// With this the symbol table should be complete. After this, no new names
|
||||
// except a few linker-synthesized ones will be added to the symbol table.
|
||||
Symtab->addCombinedLTOObject<ELFT>();
|
||||
if (errorCount())
|
||||
return;
|
||||
@ -1452,8 +1581,15 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
if (Config->ThinLTOIndexOnly)
|
||||
return;
|
||||
|
||||
// Likewise, --plugin-opt=emit-llvm is an option to make LTO create
|
||||
// an output file in bitcode and exit, so that you can just get a
|
||||
// combined bitcode file.
|
||||
if (Config->EmitLLVM)
|
||||
return;
|
||||
|
||||
// Apply symbol renames for -wrap.
|
||||
Symtab->applySymbolWrap();
|
||||
if (!Wrapped.empty())
|
||||
wrapSymbols<ELFT>(Wrapped);
|
||||
|
||||
// Now that we have a complete list of input files.
|
||||
// Beyond this point, no new files are added.
|
||||
@ -1481,27 +1617,19 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
// supports them.
|
||||
if (Config->ARMHasBlx == false)
|
||||
warn("lld uses blx instruction, no object with architecture supporting "
|
||||
"feature detected.");
|
||||
if (Config->ARMJ1J2BranchEncoding == false)
|
||||
warn("lld uses extended branch encoding, no object with architecture "
|
||||
"supporting feature detected.");
|
||||
if (Config->ARMHasMovtMovw == false)
|
||||
warn("lld may use movt/movw, no object with architecture supporting "
|
||||
"feature detected.");
|
||||
"feature detected");
|
||||
}
|
||||
|
||||
// This adds a .comment section containing a version string. We have to add it
|
||||
// before decompressAndMergeSections because the .comment section is a
|
||||
// mergeable section.
|
||||
// before mergeSections because the .comment section is a mergeable section.
|
||||
if (!Config->Relocatable)
|
||||
InputSections.push_back(createCommentSection());
|
||||
|
||||
// Do size optimizations: garbage collection, merging of SHF_MERGE sections
|
||||
// and identical code folding.
|
||||
decompressSections();
|
||||
splitSections<ELFT>();
|
||||
markLive<ELFT>();
|
||||
demoteSymbols<ELFT>();
|
||||
demoteSharedSymbols<ELFT>();
|
||||
mergeSections();
|
||||
if (Config->ICF != ICFLevel::None) {
|
||||
findKeepUniqueSections<ELFT>(Args);
|
||||
@ -1509,9 +1637,12 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
|
||||
}
|
||||
|
||||
// Read the callgraph now that we know what was gced or icfed
|
||||
if (auto *Arg = Args.getLastArg(OPT_call_graph_ordering_file))
|
||||
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
|
||||
readCallGraph(*Buffer);
|
||||
if (Config->CallGraphProfileSort) {
|
||||
if (auto *Arg = Args.getLastArg(OPT_call_graph_ordering_file))
|
||||
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
|
||||
readCallGraph(*Buffer);
|
||||
readCallGraphsFromObjectFiles<ELFT>();
|
||||
}
|
||||
|
||||
// Write the result to the file.
|
||||
writeResult<ELFT>();
|
||||
|
||||
3
deps/lld/ELF/Driver.h
vendored
3
deps/lld/ELF/Driver.h
vendored
@ -42,9 +42,6 @@ private:
|
||||
// True if we are in --start-lib and --end-lib.
|
||||
bool InLib = false;
|
||||
|
||||
// True if we are in -format=binary and -format=elf.
|
||||
bool InBinary = false;
|
||||
|
||||
std::vector<InputFile *> Files;
|
||||
};
|
||||
|
||||
|
||||
5
deps/lld/ELF/DriverUtils.cpp
vendored
5
deps/lld/ELF/DriverUtils.cpp
vendored
@ -139,8 +139,9 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) {
|
||||
}
|
||||
|
||||
void elf::printHelp() {
|
||||
ELFOptTable().PrintHelp(outs(), Config->ProgName.data(), "lld",
|
||||
false /*ShowHidden*/, true /*ShowAllAliases*/);
|
||||
ELFOptTable().PrintHelp(
|
||||
outs(), (Config->ProgName + " [options] file...").str().c_str(), "lld",
|
||||
false /*ShowHidden*/, true /*ShowAllAliases*/);
|
||||
outs() << "\n";
|
||||
|
||||
// Scripts generated by Libtool versions up to at least 2.4.6 (the most
|
||||
|
||||
4
deps/lld/ELF/EhFrame.cpp
vendored
4
deps/lld/ELF/EhFrame.cpp
vendored
@ -44,7 +44,7 @@ public:
|
||||
private:
|
||||
template <class P> void failOn(const P *Loc, const Twine &Msg) {
|
||||
fatal("corrupted .eh_frame: " + Msg + "\n>>> defined in " +
|
||||
IS->getObjMsg((const uint8_t *)Loc - IS->Data.data()));
|
||||
IS->getObjMsg((const uint8_t *)Loc - IS->data().data()));
|
||||
}
|
||||
|
||||
uint8_t readByte();
|
||||
@ -59,7 +59,7 @@ private:
|
||||
}
|
||||
|
||||
size_t elf::readEhRecordSize(InputSectionBase *S, size_t Off) {
|
||||
return EhReader(S, S->Data.slice(Off)).readEhRecordSize();
|
||||
return EhReader(S, S->data().slice(Off)).readEhRecordSize();
|
||||
}
|
||||
|
||||
// .eh_frame section is a sequence of records. Each record starts with
|
||||
|
||||
35
deps/lld/ELF/ICF.cpp
vendored
35
deps/lld/ELF/ICF.cpp
vendored
@ -252,7 +252,10 @@ bool ICF<ELFT>::constantEq(const InputSection *SecA, ArrayRef<RelTy> RA,
|
||||
|
||||
auto *DA = dyn_cast<Defined>(&SA);
|
||||
auto *DB = dyn_cast<Defined>(&SB);
|
||||
if (!DA || !DB)
|
||||
|
||||
// Placeholder symbols generated by linker scripts look the same now but
|
||||
// may have different values later.
|
||||
if (!DA || !DB || DA->ScriptDefined || DB->ScriptDefined)
|
||||
return false;
|
||||
|
||||
// Relocations referring to absolute symbols are constant-equal if their
|
||||
@ -298,7 +301,7 @@ bool ICF<ELFT>::constantEq(const InputSection *SecA, ArrayRef<RelTy> RA,
|
||||
template <class ELFT>
|
||||
bool ICF<ELFT>::equalsConstant(const InputSection *A, const InputSection *B) {
|
||||
if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags ||
|
||||
A->getSize() != B->getSize() || A->Data != B->Data)
|
||||
A->getSize() != B->getSize() || A->data() != B->data())
|
||||
return false;
|
||||
|
||||
// If two sections have different output sections, we cannot merge them.
|
||||
@ -420,6 +423,22 @@ void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> Fn) {
|
||||
++Cnt;
|
||||
}
|
||||
|
||||
// Combine the hashes of the sections referenced by the given section into its
|
||||
// hash.
|
||||
template <class ELFT, class RelTy>
|
||||
static void combineRelocHashes(unsigned Cnt, InputSection *IS,
|
||||
ArrayRef<RelTy> Rels) {
|
||||
uint32_t Hash = IS->Class[Cnt % 2];
|
||||
for (RelTy Rel : Rels) {
|
||||
Symbol &S = IS->template getFile<ELFT>()->getRelocTargetSym(Rel);
|
||||
if (auto *D = dyn_cast<Defined>(&S))
|
||||
if (auto *RelSec = dyn_cast_or_null<InputSection>(D->Section))
|
||||
Hash += RelSec->Class[Cnt % 2];
|
||||
}
|
||||
// Set MSB to 1 to avoid collisions with non-hash IDs.
|
||||
IS->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
|
||||
}
|
||||
|
||||
static void print(const Twine &S) {
|
||||
if (Config->PrintIcfSections)
|
||||
message(S);
|
||||
@ -435,10 +454,18 @@ template <class ELFT> void ICF<ELFT>::run() {
|
||||
|
||||
// Initially, we use hash values to partition sections.
|
||||
parallelForEach(Sections, [&](InputSection *S) {
|
||||
// Set MSB to 1 to avoid collisions with non-hash IDs.
|
||||
S->Class[0] = xxHash64(S->Data) | (1U << 31);
|
||||
S->Class[0] = xxHash64(S->data());
|
||||
});
|
||||
|
||||
for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
|
||||
parallelForEach(Sections, [&](InputSection *S) {
|
||||
if (S->AreRelocsRela)
|
||||
combineRelocHashes<ELFT>(Cnt, S, S->template relas<ELFT>());
|
||||
else
|
||||
combineRelocHashes<ELFT>(Cnt, S, S->template rels<ELFT>());
|
||||
});
|
||||
}
|
||||
|
||||
// From now on, sections in Sections vector are ordered so that sections
|
||||
// in the same equivalence class are consecutive in the vector.
|
||||
std::stable_sort(Sections.begin(), Sections.end(),
|
||||
|
||||
149
deps/lld/ELF/InputFiles.cpp
vendored
149
deps/lld/ELF/InputFiles.cpp
vendored
@ -46,7 +46,7 @@ std::vector<LazyObjFile *> elf::LazyObjFiles;
|
||||
std::vector<InputFile *> elf::ObjectFiles;
|
||||
std::vector<InputFile *> elf::SharedFiles;
|
||||
|
||||
TarWriter *elf::Tar;
|
||||
std::unique_ptr<TarWriter> elf::Tar;
|
||||
|
||||
InputFile::InputFile(Kind K, MemoryBufferRef M)
|
||||
: MB(M), GroupId(NextGroupId), FileKind(K) {
|
||||
@ -125,11 +125,7 @@ std::string InputFile::getSrcMsg(const Symbol &Sym, InputSectionBase &Sec,
|
||||
|
||||
template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
|
||||
Dwarf = llvm::make_unique<DWARFContext>(make_unique<LLDDwarfObj<ELFT>>(this));
|
||||
const DWARFObject &Obj = Dwarf->getDWARFObj();
|
||||
DWARFDataExtractor LineData(Obj, Obj.getLineSection(), Config->IsLE,
|
||||
Config->Wordsize);
|
||||
|
||||
for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf->compile_units()) {
|
||||
for (std::unique_ptr<DWARFUnit> &CU : Dwarf->compile_units()) {
|
||||
auto Report = [](Error Err) {
|
||||
handleAllErrors(std::move(Err),
|
||||
[](ErrorInfoBase &Info) { warn(Info.message()); });
|
||||
@ -324,17 +320,6 @@ StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
|
||||
return Signature;
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
ArrayRef<typename ObjFile<ELFT>::Elf_Word>
|
||||
ObjFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) {
|
||||
const ELFFile<ELFT> &Obj = this->getObj();
|
||||
ArrayRef<Elf_Word> Entries =
|
||||
CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
|
||||
if (Entries.empty() || Entries[0] != GRP_COMDAT)
|
||||
fatal(toString(this) + ": unsupported SHT_GROUP format");
|
||||
return Entries.slice(1);
|
||||
}
|
||||
|
||||
template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
|
||||
// On a regular link we don't merge sections if -O0 (default is -O1). This
|
||||
// sometimes makes the linker significantly faster, although the output will
|
||||
@ -416,6 +401,11 @@ void ObjFile<ELFT>::initializeSections(
|
||||
continue;
|
||||
const Elf_Shdr &Sec = ObjSections[I];
|
||||
|
||||
if (Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
|
||||
CGProfile = check(
|
||||
this->getObj().template getSectionContentsAsArray<Elf_CGProfile>(
|
||||
&Sec));
|
||||
|
||||
// SHF_EXCLUDE'ed sections are discarded by the linker. However,
|
||||
// if -r is given, we'll let the final link discard such sections.
|
||||
// This is compatible with GNU.
|
||||
@ -439,22 +429,34 @@ void ObjFile<ELFT>::initializeSections(
|
||||
case SHT_GROUP: {
|
||||
// De-duplicate section groups by their signatures.
|
||||
StringRef Signature = getShtGroupSignature(ObjSections, Sec);
|
||||
bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
|
||||
this->Sections[I] = &InputSection::Discarded;
|
||||
|
||||
// If it is a new section group, we want to keep group members.
|
||||
// Group leader sections, which contain indices of group members, are
|
||||
// discarded because they are useless beyond this point. The only
|
||||
// exception is the -r option because in order to produce re-linkable
|
||||
// object files, we want to pass through basically everything.
|
||||
|
||||
ArrayRef<Elf_Word> Entries =
|
||||
CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
|
||||
if (Entries.empty())
|
||||
fatal(toString(this) + ": empty SHT_GROUP");
|
||||
|
||||
// The first word of a SHT_GROUP section contains flags. Currently,
|
||||
// the standard defines only "GRP_COMDAT" flag for the COMDAT group.
|
||||
// An group with the empty flag doesn't define anything; such sections
|
||||
// are just skipped.
|
||||
if (Entries[0] == 0)
|
||||
continue;
|
||||
|
||||
if (Entries[0] != GRP_COMDAT)
|
||||
fatal(toString(this) + ": unsupported SHT_GROUP format");
|
||||
|
||||
bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
|
||||
if (IsNew) {
|
||||
if (Config->Relocatable)
|
||||
this->Sections[I] = createInputSection(Sec);
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, discard group members.
|
||||
for (uint32_t SecIndex : getShtGroupEntries(Sec)) {
|
||||
for (uint32_t SecIndex : Entries.slice(1)) {
|
||||
if (SecIndex >= Size)
|
||||
fatal(toString(this) +
|
||||
": invalid section index in group: " + Twine(SecIndex));
|
||||
@ -478,11 +480,13 @@ void ObjFile<ELFT>::initializeSections(
|
||||
// .ARM.exidx sections have a reverse dependency on the InputSection they
|
||||
// have a SHF_LINK_ORDER dependency, this is identified by the sh_link.
|
||||
if (Sec.sh_flags & SHF_LINK_ORDER) {
|
||||
if (Sec.sh_link >= this->Sections.size())
|
||||
InputSectionBase *LinkSec = nullptr;
|
||||
if (Sec.sh_link < this->Sections.size())
|
||||
LinkSec = this->Sections[Sec.sh_link];
|
||||
if (!LinkSec)
|
||||
fatal(toString(this) +
|
||||
": invalid sh_link index: " + Twine(Sec.sh_link));
|
||||
|
||||
InputSectionBase *LinkSec = this->Sections[Sec.sh_link];
|
||||
InputSection *IS = cast<InputSection>(this->Sections[I]);
|
||||
LinkSec->DependentSections.push_back(IS);
|
||||
if (!isa<InputSection>(LinkSec))
|
||||
@ -598,7 +602,7 @@ InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
|
||||
// as a given section.
|
||||
static InputSection *toRegularSection(MergeInputSection *Sec) {
|
||||
return make<InputSection>(Sec->File, Sec->Flags, Sec->Type, Sec->Alignment,
|
||||
Sec->Data, Sec->Name);
|
||||
Sec->data(), Sec->Name);
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
@ -618,9 +622,9 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
|
||||
// FIXME: Retain the first attribute section we see. The eglibc ARM
|
||||
// dynamic loaders require the presence of an attribute section for dlopen
|
||||
// to work. In a full implementation we would merge all attribute sections.
|
||||
if (InX::ARMAttributes == nullptr) {
|
||||
InX::ARMAttributes = make<InputSection>(*this, Sec, Name);
|
||||
return InX::ARMAttributes;
|
||||
if (In.ARMAttributes == nullptr) {
|
||||
In.ARMAttributes = make<InputSection>(*this, Sec, Name);
|
||||
return In.ARMAttributes;
|
||||
}
|
||||
return &InputSection::Discarded;
|
||||
}
|
||||
@ -638,8 +642,16 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
|
||||
// This section contains relocation information.
|
||||
// If -r is given, we do not interpret or apply relocation
|
||||
// but just copy relocation sections to output.
|
||||
if (Config->Relocatable)
|
||||
return make<InputSection>(*this, Sec, Name);
|
||||
if (Config->Relocatable) {
|
||||
InputSection *RelocSec = make<InputSection>(*this, Sec, Name);
|
||||
// We want to add a dependency to target, similar like we do for
|
||||
// -emit-relocs below. This is useful for the case when linker script
|
||||
// contains the "/DISCARD/". It is perhaps uncommon to use a script with
|
||||
// -r, but we faced it in the Linux kernel and have to handle such case
|
||||
// and not to crash.
|
||||
Target->DependentSections.push_back(RelocSec);
|
||||
return RelocSec;
|
||||
}
|
||||
|
||||
if (Target->FirstRelocation)
|
||||
fatal(toString(this) +
|
||||
@ -704,7 +716,7 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
|
||||
// for split stack will include a .note.GNU-split-stack section.
|
||||
if (Name == ".note.GNU-split-stack") {
|
||||
if (Config->Relocatable) {
|
||||
error("Cannot mix split-stack and non-split-stack in a relocatable link");
|
||||
error("cannot mix split-stack and non-split-stack in a relocatable link");
|
||||
return &InputSection::Discarded;
|
||||
}
|
||||
this->SplitStack = true;
|
||||
@ -724,7 +736,8 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
|
||||
// sections. Drop those sections to avoid duplicate symbol errors.
|
||||
// FIXME: This is glibc PR20543, we should remove this hack once that has been
|
||||
// fixed for a while.
|
||||
if (Name.startswith(".gnu.linkonce."))
|
||||
if (Name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" ||
|
||||
Name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx")
|
||||
return &InputSection::Discarded;
|
||||
|
||||
// If we are creating a new .build-id section, strip existing .build-id
|
||||
@ -806,7 +819,7 @@ template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
|
||||
if (Sec == &InputSection::Discarded)
|
||||
return Symtab->addUndefined<ELFT>(Name, Binding, StOther, Type,
|
||||
/*CanOmitFromDynSym=*/false, this);
|
||||
return Symtab->addRegular(Name, StOther, Type, Value, Size, Binding, Sec,
|
||||
return Symtab->addDefined(Name, StOther, Type, Value, Size, Binding, Sec,
|
||||
this);
|
||||
}
|
||||
}
|
||||
@ -940,8 +953,7 @@ std::vector<const typename ELFT::Verdef *> SharedFile<ELFT>::parseVerdefs() {
|
||||
auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef);
|
||||
Verdef += CurVerdef->vd_next;
|
||||
unsigned VerdefIndex = CurVerdef->vd_ndx;
|
||||
if (Verdefs.size() <= VerdefIndex)
|
||||
Verdefs.resize(VerdefIndex + 1);
|
||||
Verdefs.resize(VerdefIndex + 1);
|
||||
Verdefs[VerdefIndex] = CurVerdef;
|
||||
}
|
||||
|
||||
@ -993,7 +1005,17 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() {
|
||||
for (size_t I = 0; I < Syms.size(); ++I) {
|
||||
const Elf_Sym &Sym = Syms[I];
|
||||
|
||||
// ELF spec requires that all local symbols precede weak or global
|
||||
// symbols in each symbol table, and the index of first non-local symbol
|
||||
// is stored to sh_info. If a local symbol appears after some non-local
|
||||
// symbol, that's a violation of the spec.
|
||||
StringRef Name = CHECK(Sym.getName(this->StringTable), this);
|
||||
if (Sym.getBinding() == STB_LOCAL) {
|
||||
warn("found local symbol '" + Name +
|
||||
"' in global part of symbol table in file " + toString(this));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Sym.isUndefined()) {
|
||||
Symbol *S = Symtab->addUndefined<ELFT>(Name, Sym.getBinding(),
|
||||
Sym.st_other, Sym.getType(),
|
||||
@ -1002,16 +1024,6 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// ELF spec requires that all local symbols precede weak or global
|
||||
// symbols in each symbol table, and the index of first non-local symbol
|
||||
// is stored to sh_info. If a local symbol appears after some non-local
|
||||
// symbol, that's a violation of the spec.
|
||||
if (Sym.getBinding() == STB_LOCAL) {
|
||||
warn("found local symbol '" + Name +
|
||||
"' in global part of symbol table in file " + toString(this));
|
||||
continue;
|
||||
}
|
||||
|
||||
// MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly
|
||||
// assigns VER_NDX_LOCAL to this section global symbol. Here is a
|
||||
// workaround for this bug.
|
||||
@ -1054,6 +1066,9 @@ static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
|
||||
switch (T.getArch()) {
|
||||
case Triple::aarch64:
|
||||
return EM_AARCH64;
|
||||
case Triple::amdgcn:
|
||||
case Triple::r600:
|
||||
return EM_AMDGPU;
|
||||
case Triple::arm:
|
||||
case Triple::thumb:
|
||||
return EM_ARM;
|
||||
@ -1064,9 +1079,12 @@ static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
|
||||
case Triple::mips64:
|
||||
case Triple::mips64el:
|
||||
return EM_MIPS;
|
||||
case Triple::msp430:
|
||||
return EM_MSP430;
|
||||
case Triple::ppc:
|
||||
return EM_PPC;
|
||||
case Triple::ppc64:
|
||||
case Triple::ppc64le:
|
||||
return EM_PPC64;
|
||||
case Triple::x86:
|
||||
return T.isOSIAMCU() ? EM_IAMCU : EM_386;
|
||||
@ -1178,7 +1196,7 @@ static ELFKind getELFKind(MemoryBufferRef MB) {
|
||||
}
|
||||
|
||||
void BinaryFile::parse() {
|
||||
ArrayRef<uint8_t> Data = toArrayRef(MB.getBuffer());
|
||||
ArrayRef<uint8_t> Data = arrayRefFromStringRef(MB.getBuffer());
|
||||
auto *Section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
|
||||
8, Data, ".data");
|
||||
Sections.push_back(Section);
|
||||
@ -1192,11 +1210,11 @@ void BinaryFile::parse() {
|
||||
if (!isAlnum(S[I]))
|
||||
S[I] = '_';
|
||||
|
||||
Symtab->addRegular(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT, 0, 0,
|
||||
Symtab->addDefined(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT, 0, 0,
|
||||
STB_GLOBAL, Section, nullptr);
|
||||
Symtab->addRegular(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT,
|
||||
Symtab->addDefined(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT,
|
||||
Data.size(), 0, STB_GLOBAL, Section, nullptr);
|
||||
Symtab->addRegular(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT,
|
||||
Symtab->addDefined(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT,
|
||||
Data.size(), 0, STB_GLOBAL, nullptr, nullptr);
|
||||
}
|
||||
|
||||
@ -1262,25 +1280,11 @@ template <class ELFT> void LazyObjFile::parse() {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (getELFKind(this->MB)) {
|
||||
case ELF32LEKind:
|
||||
addElfSymbols<ELF32LE>();
|
||||
if (getELFKind(this->MB) != Config->EKind) {
|
||||
error("incompatible file: " + this->MB.getBufferIdentifier());
|
||||
return;
|
||||
case ELF32BEKind:
|
||||
addElfSymbols<ELF32BE>();
|
||||
return;
|
||||
case ELF64LEKind:
|
||||
addElfSymbols<ELF64LE>();
|
||||
return;
|
||||
case ELF64BEKind:
|
||||
addElfSymbols<ELF64BE>();
|
||||
return;
|
||||
default:
|
||||
llvm_unreachable("getELFKind");
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT> void LazyObjFile::addElfSymbols() {
|
||||
ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
|
||||
ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
|
||||
|
||||
@ -1305,12 +1309,9 @@ std::string elf::replaceThinLTOSuffix(StringRef Path) {
|
||||
StringRef Suffix = Config->ThinLTOObjectSuffixReplace.first;
|
||||
StringRef Repl = Config->ThinLTOObjectSuffixReplace.second;
|
||||
|
||||
if (!Path.endswith(Suffix)) {
|
||||
error("-thinlto-object-suffix-replace=" + Suffix + ";" + Repl +
|
||||
" was given, but " + Path + " does not end with the suffix");
|
||||
return "";
|
||||
}
|
||||
return (Path.drop_back(Suffix.size()) + Repl).str();
|
||||
if (Path.consume_back(Suffix))
|
||||
return (Path + Repl).str();
|
||||
return Path;
|
||||
}
|
||||
|
||||
template void ArchiveFile::parse<ELF32LE>();
|
||||
|
||||
13
deps/lld/ELF/InputFiles.h
vendored
13
deps/lld/ELF/InputFiles.h
vendored
@ -50,7 +50,7 @@ class Symbol;
|
||||
|
||||
// If -reproduce option is given, all input files are written
|
||||
// to this tar archive.
|
||||
extern llvm::TarWriter *Tar;
|
||||
extern std::unique_ptr<llvm::TarWriter> Tar;
|
||||
|
||||
// Opens a given file.
|
||||
llvm::Optional<MemoryBufferRef> readFile(StringRef Path);
|
||||
@ -86,7 +86,9 @@ public:
|
||||
|
||||
// Returns object file symbols. It is a runtime error to call this
|
||||
// function on files of other types.
|
||||
ArrayRef<Symbol *> getSymbols() {
|
||||
ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); }
|
||||
|
||||
std::vector<Symbol *> &getMutableSymbols() {
|
||||
assert(FileKind == BinaryKind || FileKind == ObjKind ||
|
||||
FileKind == BitcodeKind);
|
||||
return Symbols;
|
||||
@ -169,10 +171,10 @@ template <class ELFT> class ObjFile : public ELFFileBase<ELFT> {
|
||||
typedef typename ELFT::Sym Elf_Sym;
|
||||
typedef typename ELFT::Shdr Elf_Shdr;
|
||||
typedef typename ELFT::Word Elf_Word;
|
||||
typedef typename ELFT::CGProfile Elf_CGProfile;
|
||||
|
||||
StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
|
||||
const Elf_Shdr &Sec);
|
||||
ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec);
|
||||
|
||||
public:
|
||||
static bool classof(const InputFile *F) { return F->kind() == Base::ObjKind; }
|
||||
@ -218,6 +220,9 @@ public:
|
||||
// Pointer to this input file's .llvm_addrsig section, if it has one.
|
||||
const Elf_Shdr *AddrsigSec = nullptr;
|
||||
|
||||
// SHT_LLVM_CALL_GRAPH_PROFILE table
|
||||
ArrayRef<Elf_CGProfile> CGProfile;
|
||||
|
||||
private:
|
||||
void
|
||||
initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);
|
||||
@ -272,8 +277,6 @@ public:
|
||||
bool AddedToLink = false;
|
||||
|
||||
private:
|
||||
template <class ELFT> void addElfSymbols();
|
||||
|
||||
uint64_t OffsetInArchive;
|
||||
};
|
||||
|
||||
|
||||
461
deps/lld/ELF/InputSection.cpp
vendored
461
deps/lld/ELF/InputSection.cpp
vendored
@ -21,7 +21,6 @@
|
||||
#include "Thunks.h"
|
||||
#include "lld/Common/ErrorHandler.h"
|
||||
#include "lld/Common/Memory.h"
|
||||
#include "llvm/Object/Decompressor.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Compression.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
@ -64,11 +63,11 @@ InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags,
|
||||
StringRef Name, Kind SectionKind)
|
||||
: SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info,
|
||||
Link),
|
||||
File(File), Data(Data) {
|
||||
File(File), RawData(Data) {
|
||||
// In order to reduce memory allocation, we assume that mergeable
|
||||
// sections are smaller than 4 GiB, which is not an unreasonable
|
||||
// assumption as of 2017.
|
||||
if (SectionKind == SectionBase::Merge && Data.size() > UINT32_MAX)
|
||||
if (SectionKind == SectionBase::Merge && RawData.size() > UINT32_MAX)
|
||||
error(toString(this) + ": section too large");
|
||||
|
||||
NumRelocations = 0;
|
||||
@ -80,6 +79,17 @@ InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags,
|
||||
if (!isPowerOf2_64(V))
|
||||
fatal(toString(File) + ": section sh_addralign is not a power of 2");
|
||||
this->Alignment = V;
|
||||
|
||||
// In ELF, each section can be compressed by zlib, and if compressed,
|
||||
// section name may be mangled by appending "z" (e.g. ".zdebug_info").
|
||||
// If that's the case, demangle section name so that we can handle a
|
||||
// section as if it weren't compressed.
|
||||
if ((Flags & SHF_COMPRESSED) || Name.startswith(".zdebug")) {
|
||||
if (!zlib::isAvailable())
|
||||
error(toString(File) + ": contains a compressed section, " +
|
||||
"but zlib is not available");
|
||||
parseCompressedHeader();
|
||||
}
|
||||
}
|
||||
|
||||
// Drop SHF_GROUP bit unless we are producing a re-linkable object file.
|
||||
@ -128,13 +138,25 @@ InputSectionBase::InputSectionBase(ObjFile<ELFT> &File,
|
||||
size_t InputSectionBase::getSize() const {
|
||||
if (auto *S = dyn_cast<SyntheticSection>(this))
|
||||
return S->getSize();
|
||||
if (UncompressedSize >= 0)
|
||||
return UncompressedSize;
|
||||
return RawData.size();
|
||||
}
|
||||
|
||||
return Data.size();
|
||||
void InputSectionBase::uncompress() const {
|
||||
size_t Size = UncompressedSize;
|
||||
UncompressedBuf.reset(new char[Size]);
|
||||
|
||||
if (Error E =
|
||||
zlib::uncompress(toStringRef(RawData), UncompressedBuf.get(), Size))
|
||||
fatal(toString(this) +
|
||||
": uncompress failed: " + llvm::toString(std::move(E)));
|
||||
RawData = makeArrayRef((uint8_t *)UncompressedBuf.get(), Size);
|
||||
}
|
||||
|
||||
uint64_t InputSectionBase::getOffsetInFile() const {
|
||||
const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart();
|
||||
const uint8_t *SecStart = Data.begin();
|
||||
const uint8_t *SecStart = data().begin();
|
||||
return SecStart - FileStart;
|
||||
}
|
||||
|
||||
@ -180,34 +202,70 @@ OutputSection *SectionBase::getOutputSection() {
|
||||
return Sec ? Sec->getParent() : nullptr;
|
||||
}
|
||||
|
||||
// Decompress section contents if required. Note that this function
|
||||
// is called from parallelForEach, so it must be thread-safe.
|
||||
void InputSectionBase::maybeDecompress() {
|
||||
if (DecompressBuf)
|
||||
return;
|
||||
if (!(Flags & SHF_COMPRESSED) && !Name.startswith(".zdebug"))
|
||||
// When a section is compressed, `RawData` consists with a header followed
|
||||
// by zlib-compressed data. This function parses a header to initialize
|
||||
// `UncompressedSize` member and remove the header from `RawData`.
|
||||
void InputSectionBase::parseCompressedHeader() {
|
||||
typedef typename ELF64LE::Chdr Chdr64;
|
||||
typedef typename ELF32LE::Chdr Chdr32;
|
||||
|
||||
// Old-style header
|
||||
if (Name.startswith(".zdebug")) {
|
||||
if (!toStringRef(RawData).startswith("ZLIB")) {
|
||||
error(toString(this) + ": corrupted compressed section header");
|
||||
return;
|
||||
}
|
||||
RawData = RawData.slice(4);
|
||||
|
||||
if (RawData.size() < 8) {
|
||||
error(toString(this) + ": corrupted compressed section header");
|
||||
return;
|
||||
}
|
||||
|
||||
UncompressedSize = read64be(RawData.data());
|
||||
RawData = RawData.slice(8);
|
||||
|
||||
// Restore the original section name.
|
||||
// (e.g. ".zdebug_info" -> ".debug_info")
|
||||
Name = Saver.save("." + Name.substr(2));
|
||||
return;
|
||||
}
|
||||
|
||||
// Decompress a section.
|
||||
Decompressor Dec = check(Decompressor::create(Name, toStringRef(Data),
|
||||
Config->IsLE, Config->Is64));
|
||||
|
||||
size_t Size = Dec.getDecompressedSize();
|
||||
DecompressBuf.reset(new char[Size + Name.size()]());
|
||||
if (Error E = Dec.decompress({DecompressBuf.get(), Size}))
|
||||
fatal(toString(this) +
|
||||
": decompress failed: " + llvm::toString(std::move(E)));
|
||||
|
||||
Data = makeArrayRef((uint8_t *)DecompressBuf.get(), Size);
|
||||
assert(Flags & SHF_COMPRESSED);
|
||||
Flags &= ~(uint64_t)SHF_COMPRESSED;
|
||||
|
||||
// A section name may have been altered if compressed. If that's
|
||||
// the case, restore the original name. (i.e. ".zdebug_" -> ".debug_")
|
||||
if (Name.startswith(".zdebug")) {
|
||||
DecompressBuf[Size] = '.';
|
||||
memcpy(&DecompressBuf[Size + 1], Name.data() + 2, Name.size() - 2);
|
||||
Name = StringRef(&DecompressBuf[Size], Name.size() - 1);
|
||||
// New-style 64-bit header
|
||||
if (Config->Is64) {
|
||||
if (RawData.size() < sizeof(Chdr64)) {
|
||||
error(toString(this) + ": corrupted compressed section");
|
||||
return;
|
||||
}
|
||||
|
||||
auto *Hdr = reinterpret_cast<const Chdr64 *>(RawData.data());
|
||||
if (Hdr->ch_type != ELFCOMPRESS_ZLIB) {
|
||||
error(toString(this) + ": unsupported compression type");
|
||||
return;
|
||||
}
|
||||
|
||||
UncompressedSize = Hdr->ch_size;
|
||||
RawData = RawData.slice(sizeof(*Hdr));
|
||||
return;
|
||||
}
|
||||
|
||||
// New-style 32-bit header
|
||||
if (RawData.size() < sizeof(Chdr32)) {
|
||||
error(toString(this) + ": corrupted compressed section");
|
||||
return;
|
||||
}
|
||||
|
||||
auto *Hdr = reinterpret_cast<const Chdr32 *>(RawData.data());
|
||||
if (Hdr->ch_type != ELFCOMPRESS_ZLIB) {
|
||||
error(toString(this) + ": unsupported compression type");
|
||||
return;
|
||||
}
|
||||
|
||||
UncompressedSize = Hdr->ch_size;
|
||||
RawData = RawData.slice(sizeof(*Hdr));
|
||||
}
|
||||
|
||||
InputSection *InputSectionBase::getLinkOrderDep() const {
|
||||
@ -230,14 +288,17 @@ Defined *InputSectionBase::getEnclosingFunction(uint64_t Offset) {
|
||||
// Returns a source location string. Used to construct an error message.
|
||||
template <class ELFT>
|
||||
std::string InputSectionBase::getLocation(uint64_t Offset) {
|
||||
std::string SecAndOffset = (Name + "+0x" + utohexstr(Offset)).str();
|
||||
|
||||
// We don't have file for synthetic sections.
|
||||
if (getFile<ELFT>() == nullptr)
|
||||
return (Config->OutputFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")")
|
||||
return (Config->OutputFile + ":(" + SecAndOffset + ")")
|
||||
.str();
|
||||
|
||||
// First check if we can get desired values from debugging information.
|
||||
if (Optional<DILineInfo> Info = getFile<ELFT>()->getDILineInfo(this, Offset))
|
||||
return Info->FileName + ":" + std::to_string(Info->Line);
|
||||
return Info->FileName + ":" + std::to_string(Info->Line) + ":(" +
|
||||
SecAndOffset + ")";
|
||||
|
||||
// File->SourceFile contains STT_FILE symbol that contains a
|
||||
// source file name. If it's missing, we use an object file name.
|
||||
@ -246,10 +307,10 @@ std::string InputSectionBase::getLocation(uint64_t Offset) {
|
||||
SrcFile = toString(File);
|
||||
|
||||
if (Defined *D = getEnclosingFunction<ELFT>(Offset))
|
||||
return SrcFile + ":(function " + toString(*D) + ")";
|
||||
return SrcFile + ":(function " + toString(*D) + ": " + SecAndOffset + ")";
|
||||
|
||||
// If there's no symbol, print out the offset in the section.
|
||||
return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str();
|
||||
return (SrcFile + ":(" + SecAndOffset + ")");
|
||||
}
|
||||
|
||||
// This function is intended to be used for constructing an error message.
|
||||
@ -259,9 +320,6 @@ std::string InputSectionBase::getLocation(uint64_t Offset) {
|
||||
//
|
||||
// Returns an empty string if there's no way to get line info.
|
||||
std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) {
|
||||
// Synthetic sections don't have input files.
|
||||
if (!File)
|
||||
return "";
|
||||
return File->getSrcMsg(Sym, *this, Offset);
|
||||
}
|
||||
|
||||
@ -275,9 +333,6 @@ std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) {
|
||||
//
|
||||
// path/to/foo.o:(function bar) in archive path/to/bar.a
|
||||
std::string InputSectionBase::getObjMsg(uint64_t Off) {
|
||||
// Synthetic sections don't have input files.
|
||||
if (!File)
|
||||
return ("<internal>:(" + Name + "+0x" + utohexstr(Off) + ")").str();
|
||||
std::string Filename = File->getName();
|
||||
|
||||
std::string Archive;
|
||||
@ -362,7 +417,7 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
|
||||
// Output section VA is zero for -r, so r_offset is an offset within the
|
||||
// section, but for --emit-relocs it is an virtual address.
|
||||
P->r_offset = Sec->getVA(Rel.r_offset);
|
||||
P->setSymbolAndType(InX::SymTab->getSymbolIndex(&Sym), Type,
|
||||
P->setSymbolAndType(In.SymTab->getSymbolIndex(&Sym), Type,
|
||||
Config->IsMips64EL);
|
||||
|
||||
if (Sym.Type == STT_SECTION) {
|
||||
@ -380,14 +435,14 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
|
||||
error("STT_SECTION symbol should be defined");
|
||||
continue;
|
||||
}
|
||||
SectionBase *Section = D->Section;
|
||||
if (Section == &InputSection::Discarded) {
|
||||
SectionBase *Section = D->Section->Repl;
|
||||
if (!Section->Live) {
|
||||
P->setSymbolAndType(0, 0, false);
|
||||
continue;
|
||||
}
|
||||
|
||||
int64_t Addend = getAddend<ELFT>(Rel);
|
||||
const uint8_t *BufLoc = Sec->Data.begin() + Rel.r_offset;
|
||||
const uint8_t *BufLoc = Sec->data().begin() + Rel.r_offset;
|
||||
if (!RelTy::IsRela)
|
||||
Addend = Target->getImplicitAddend(BufLoc, Type);
|
||||
|
||||
@ -487,6 +542,62 @@ static uint64_t getARMStaticBase(const Symbol &Sym) {
|
||||
return OS->PtLoad->FirstSec->Addr;
|
||||
}
|
||||
|
||||
// For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), the symbol actually
|
||||
// points the corresponding R_RISCV_PCREL_HI20 relocation, and the target VA
|
||||
// is calculated using PCREL_HI20's symbol.
|
||||
//
|
||||
// This function returns the R_RISCV_PCREL_HI20 relocation from
|
||||
// R_RISCV_PCREL_LO12's symbol and addend.
|
||||
static Relocation *getRISCVPCRelHi20(const Symbol *Sym, uint64_t Addend) {
|
||||
const Defined *D = cast<Defined>(Sym);
|
||||
InputSection *IS = cast<InputSection>(D->Section);
|
||||
|
||||
if (Addend != 0)
|
||||
warn("Non-zero addend in R_RISCV_PCREL_LO12 relocation to " +
|
||||
IS->getObjMsg(D->Value) + " is ignored");
|
||||
|
||||
// Relocations are sorted by offset, so we can use std::equal_range to do
|
||||
// binary search.
|
||||
auto Range = std::equal_range(IS->Relocations.begin(), IS->Relocations.end(),
|
||||
D->Value, RelocationOffsetComparator{});
|
||||
for (auto It = std::get<0>(Range); It != std::get<1>(Range); ++It)
|
||||
if (isRelExprOneOf<R_PC>(It->Expr))
|
||||
return &*It;
|
||||
|
||||
error("R_RISCV_PCREL_LO12 relocation points to " + IS->getObjMsg(D->Value) +
|
||||
" without an associated R_RISCV_PCREL_HI20 relocation");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// A TLS symbol's virtual address is relative to the TLS segment. Add a
|
||||
// target-specific adjustment to produce a thread-pointer-relative offset.
|
||||
static int64_t getTlsTpOffset() {
|
||||
switch (Config->EMachine) {
|
||||
case EM_ARM:
|
||||
case EM_AARCH64:
|
||||
// Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
|
||||
// followed by a variable amount of alignment padding, followed by the TLS
|
||||
// segment.
|
||||
//
|
||||
// NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld
|
||||
// effectively increases the TCB size to 8 words for Android compatibility.
|
||||
// It accomplishes this by increasing the segment's alignment.
|
||||
return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
|
||||
case EM_386:
|
||||
case EM_X86_64:
|
||||
// Variant 2. The TLS segment is located just before the thread pointer.
|
||||
return -Out::TlsPhdr->p_memsz;
|
||||
case EM_PPC64:
|
||||
// The thread pointer points to a fixed offset from the start of the
|
||||
// executable's TLS segment. An offset of 0x7000 allows a signed 16-bit
|
||||
// offset to reach 0x1000 of TCB/thread-library data and 0xf000 of the
|
||||
// program's TLS segment.
|
||||
return -0x7000;
|
||||
default:
|
||||
llvm_unreachable("unhandled Config->EMachine");
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
|
||||
uint64_t P, const Symbol &Sym, RelExpr Expr) {
|
||||
switch (Expr) {
|
||||
@ -501,38 +612,37 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
|
||||
case R_ARM_SBREL:
|
||||
return Sym.getVA(A) - getARMStaticBase(Sym);
|
||||
case R_GOT:
|
||||
case R_GOT_PLT:
|
||||
case R_RELAX_TLS_GD_TO_IE_ABS:
|
||||
return Sym.getGotVA() + A;
|
||||
case R_GOTONLY_PC:
|
||||
return InX::Got->getVA() + A - P;
|
||||
return In.Got->getVA() + A - P;
|
||||
case R_GOTONLY_PC_FROM_END:
|
||||
return InX::Got->getVA() + A - P + InX::Got->getSize();
|
||||
return In.Got->getVA() + A - P + In.Got->getSize();
|
||||
case R_GOTREL:
|
||||
return Sym.getVA(A) - InX::Got->getVA();
|
||||
return Sym.getVA(A) - In.Got->getVA();
|
||||
case R_GOTREL_FROM_END:
|
||||
return Sym.getVA(A) - InX::Got->getVA() - InX::Got->getSize();
|
||||
return Sym.getVA(A) - In.Got->getVA() - In.Got->getSize();
|
||||
case R_GOT_FROM_END:
|
||||
case R_RELAX_TLS_GD_TO_IE_END:
|
||||
return Sym.getGotOffset() + A - InX::Got->getSize();
|
||||
return Sym.getGotOffset() + A - In.Got->getSize();
|
||||
case R_TLSLD_GOT_OFF:
|
||||
case R_GOT_OFF:
|
||||
case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
|
||||
return Sym.getGotOffset() + A;
|
||||
case R_GOT_PAGE_PC:
|
||||
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
||||
case R_AARCH64_GOT_PAGE_PC:
|
||||
case R_AARCH64_GOT_PAGE_PC_PLT:
|
||||
case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
||||
return getAArch64Page(Sym.getGotVA() + A) - getAArch64Page(P);
|
||||
case R_GOT_PC:
|
||||
case R_RELAX_TLS_GD_TO_IE:
|
||||
return Sym.getGotVA() + A - P;
|
||||
case R_HINT:
|
||||
case R_NONE:
|
||||
case R_TLSDESC_CALL:
|
||||
case R_TLSLD_HINT:
|
||||
llvm_unreachable("cannot relocate hint relocs");
|
||||
case R_HEXAGON_GOT:
|
||||
return Sym.getGotVA() - In.GotPlt->getVA();
|
||||
case R_MIPS_GOTREL:
|
||||
return Sym.getVA(A) - InX::MipsGot->getGp(File);
|
||||
return Sym.getVA(A) - In.MipsGot->getGp(File);
|
||||
case R_MIPS_GOT_GP:
|
||||
return InX::MipsGot->getGp(File) + A;
|
||||
return In.MipsGot->getGp(File) + A;
|
||||
case R_MIPS_GOT_GP_PC: {
|
||||
// R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target
|
||||
// is _gp_disp symbol. In that case we should use the following
|
||||
@ -541,7 +651,7 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
|
||||
// microMIPS variants of these relocations use slightly different
|
||||
// expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi()
|
||||
// to correctly handle less-sugnificant bit of the microMIPS symbol.
|
||||
uint64_t V = InX::MipsGot->getGp(File) + A - P;
|
||||
uint64_t V = In.MipsGot->getGp(File) + A - P;
|
||||
if (Type == R_MIPS_LO16 || Type == R_MICROMIPS_LO16)
|
||||
V += 4;
|
||||
if (Type == R_MICROMIPS_LO16 || Type == R_MICROMIPS_HI16)
|
||||
@ -552,31 +662,34 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
|
||||
// If relocation against MIPS local symbol requires GOT entry, this entry
|
||||
// should be initialized by 'page address'. This address is high 16-bits
|
||||
// of sum the symbol's value and the addend.
|
||||
return InX::MipsGot->getVA() +
|
||||
InX::MipsGot->getPageEntryOffset(File, Sym, A) -
|
||||
InX::MipsGot->getGp(File);
|
||||
return In.MipsGot->getVA() + In.MipsGot->getPageEntryOffset(File, Sym, A) -
|
||||
In.MipsGot->getGp(File);
|
||||
case R_MIPS_GOT_OFF:
|
||||
case R_MIPS_GOT_OFF32:
|
||||
// In case of MIPS if a GOT relocation has non-zero addend this addend
|
||||
// should be applied to the GOT entry content not to the GOT entry offset.
|
||||
// That is why we use separate expression type.
|
||||
return InX::MipsGot->getVA() +
|
||||
InX::MipsGot->getSymEntryOffset(File, Sym, A) -
|
||||
InX::MipsGot->getGp(File);
|
||||
return In.MipsGot->getVA() + In.MipsGot->getSymEntryOffset(File, Sym, A) -
|
||||
In.MipsGot->getGp(File);
|
||||
case R_MIPS_TLSGD:
|
||||
return InX::MipsGot->getVA() + InX::MipsGot->getGlobalDynOffset(File, Sym) -
|
||||
InX::MipsGot->getGp(File);
|
||||
return In.MipsGot->getVA() + In.MipsGot->getGlobalDynOffset(File, Sym) -
|
||||
In.MipsGot->getGp(File);
|
||||
case R_MIPS_TLSLD:
|
||||
return InX::MipsGot->getVA() + InX::MipsGot->getTlsIndexOffset(File) -
|
||||
InX::MipsGot->getGp(File);
|
||||
case R_PAGE_PC:
|
||||
case R_PLT_PAGE_PC: {
|
||||
uint64_t Dest;
|
||||
if (Sym.isUndefWeak())
|
||||
Dest = getAArch64Page(A);
|
||||
else
|
||||
Dest = getAArch64Page(Sym.getVA(A));
|
||||
return Dest - getAArch64Page(P);
|
||||
return In.MipsGot->getVA() + In.MipsGot->getTlsIndexOffset(File) -
|
||||
In.MipsGot->getGp(File);
|
||||
case R_AARCH64_PAGE_PC: {
|
||||
uint64_t Val = Sym.isUndefWeak() ? P + A : Sym.getVA(A);
|
||||
return getAArch64Page(Val) - getAArch64Page(P);
|
||||
}
|
||||
case R_AARCH64_PLT_PAGE_PC: {
|
||||
uint64_t Val = Sym.isUndefWeak() ? P + A : Sym.getPltVA() + A;
|
||||
return getAArch64Page(Val) - getAArch64Page(P);
|
||||
}
|
||||
case R_RISCV_PC_INDIRECT: {
|
||||
if (const Relocation *HiRel = getRISCVPCRelHi20(&Sym, A))
|
||||
return getRelocTargetVA(File, HiRel->Type, HiRel->Addend, Sym.getVA(),
|
||||
*HiRel->Sym, HiRel->Expr);
|
||||
return 0;
|
||||
}
|
||||
case R_PC: {
|
||||
uint64_t Dest;
|
||||
@ -608,16 +721,12 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
|
||||
return 0;
|
||||
|
||||
// PPC64 V2 ABI describes two entry points to a function. The global entry
|
||||
// point sets up the TOC base pointer. When calling a local function, the
|
||||
// call should branch to the local entry point rather than the global entry
|
||||
// point. Section 3.4.1 describes using the 3 most significant bits of the
|
||||
// st_other field to find out how many instructions there are between the
|
||||
// local and global entry point.
|
||||
uint8_t StOther = (Sym.StOther >> 5) & 7;
|
||||
if (StOther == 0 || StOther == 1)
|
||||
return SymVA - P;
|
||||
|
||||
return SymVA - P + (1LL << StOther);
|
||||
// point is used for calls where the caller and callee (may) have different
|
||||
// TOC base pointers and r2 needs to be modified to hold the TOC base for
|
||||
// the callee. For local calls the caller and callee share the same
|
||||
// TOC base and so the TOC pointer initialization code should be skipped by
|
||||
// branching to the local entry point.
|
||||
return SymVA - P + getPPC64GlobalEntryToLocalEntryOffset(Sym.StOther);
|
||||
}
|
||||
case R_PPC_TOC:
|
||||
return getPPC64TocBase() + A;
|
||||
@ -634,48 +743,32 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
|
||||
// statically to zero.
|
||||
if (Sym.isTls() && Sym.isUndefWeak())
|
||||
return 0;
|
||||
|
||||
// For TLS variant 1 the TCB is a fixed size, whereas for TLS variant 2 the
|
||||
// TCB is on unspecified size and content. Targets that implement variant 1
|
||||
// should set TcbSize.
|
||||
if (Target->TcbSize) {
|
||||
// PPC64 V2 ABI has the thread pointer offset into the middle of the TLS
|
||||
// storage area by TlsTpOffset for efficient addressing TCB and up to
|
||||
// 4KB – 8 B of other thread library information (placed before the TCB).
|
||||
// Subtracting this offset will get the address of the first TLS block.
|
||||
if (Target->TlsTpOffset)
|
||||
return Sym.getVA(A) - Target->TlsTpOffset;
|
||||
|
||||
// If thread pointer is not offset into the middle, the first thing in the
|
||||
// TLS storage area is the TCB. Add the TcbSize to get the address of the
|
||||
// first TLS block.
|
||||
return Sym.getVA(A) + alignTo(Target->TcbSize, Out::TlsPhdr->p_align);
|
||||
}
|
||||
return Sym.getVA(A) - Out::TlsPhdr->p_memsz;
|
||||
return Sym.getVA(A) + getTlsTpOffset();
|
||||
case R_RELAX_TLS_GD_TO_LE_NEG:
|
||||
case R_NEG_TLS:
|
||||
return Out::TlsPhdr->p_memsz - Sym.getVA(A);
|
||||
case R_SIZE:
|
||||
return Sym.getSize() + A;
|
||||
case R_TLSDESC:
|
||||
return InX::Got->getGlobalDynAddr(Sym) + A;
|
||||
case R_TLSDESC_PAGE:
|
||||
return getAArch64Page(InX::Got->getGlobalDynAddr(Sym) + A) -
|
||||
return In.Got->getGlobalDynAddr(Sym) + A;
|
||||
case R_AARCH64_TLSDESC_PAGE:
|
||||
return getAArch64Page(In.Got->getGlobalDynAddr(Sym) + A) -
|
||||
getAArch64Page(P);
|
||||
case R_TLSGD_GOT:
|
||||
return InX::Got->getGlobalDynOffset(Sym) + A;
|
||||
return In.Got->getGlobalDynOffset(Sym) + A;
|
||||
case R_TLSGD_GOT_FROM_END:
|
||||
return InX::Got->getGlobalDynOffset(Sym) + A - InX::Got->getSize();
|
||||
return In.Got->getGlobalDynOffset(Sym) + A - In.Got->getSize();
|
||||
case R_TLSGD_PC:
|
||||
return InX::Got->getGlobalDynAddr(Sym) + A - P;
|
||||
return In.Got->getGlobalDynAddr(Sym) + A - P;
|
||||
case R_TLSLD_GOT_FROM_END:
|
||||
return InX::Got->getTlsIndexOff() + A - InX::Got->getSize();
|
||||
return In.Got->getTlsIndexOff() + A - In.Got->getSize();
|
||||
case R_TLSLD_GOT:
|
||||
return InX::Got->getTlsIndexOff() + A;
|
||||
return In.Got->getTlsIndexOff() + A;
|
||||
case R_TLSLD_PC:
|
||||
return InX::Got->getTlsIndexVA() + A - P;
|
||||
return In.Got->getTlsIndexVA() + A - P;
|
||||
default:
|
||||
llvm_unreachable("invalid expression");
|
||||
}
|
||||
llvm_unreachable("Invalid expression");
|
||||
}
|
||||
|
||||
// This function applies relocations to sections without SHF_ALLOC bit.
|
||||
@ -808,10 +901,10 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
|
||||
case R_RELAX_TLS_GD_TO_LE_NEG:
|
||||
Target->relaxTlsGdToLe(BufLoc, Type, TargetVA);
|
||||
break;
|
||||
case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
||||
case R_RELAX_TLS_GD_TO_IE:
|
||||
case R_RELAX_TLS_GD_TO_IE_ABS:
|
||||
case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
|
||||
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
|
||||
case R_RELAX_TLS_GD_TO_IE_END:
|
||||
Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
|
||||
break;
|
||||
@ -848,16 +941,20 @@ static void switchMorestackCallsToMorestackNonSplit(
|
||||
// __morestack inside that function should be switched to
|
||||
// __morestack_non_split.
|
||||
Symbol *MoreStackNonSplit = Symtab->find("__morestack_non_split");
|
||||
if (!MoreStackNonSplit) {
|
||||
error("Mixing split-stack objects requires a definition of "
|
||||
"__morestack_non_split");
|
||||
return;
|
||||
}
|
||||
|
||||
// Sort both collections to compare addresses efficiently.
|
||||
llvm::sort(MorestackCalls.begin(), MorestackCalls.end(),
|
||||
[](const Relocation *L, const Relocation *R) {
|
||||
return L->Offset < R->Offset;
|
||||
});
|
||||
llvm::sort(MorestackCalls, [](const Relocation *L, const Relocation *R) {
|
||||
return L->Offset < R->Offset;
|
||||
});
|
||||
std::vector<Defined *> Functions(Prologues.begin(), Prologues.end());
|
||||
llvm::sort(
|
||||
Functions.begin(), Functions.end(),
|
||||
[](const Defined *L, const Defined *R) { return L->Value < R->Value; });
|
||||
llvm::sort(Functions, [](const Defined *L, const Defined *R) {
|
||||
return L->Value < R->Value;
|
||||
});
|
||||
|
||||
auto It = MorestackCalls.begin();
|
||||
for (Defined *F : Functions) {
|
||||
@ -872,8 +969,8 @@ static void switchMorestackCallsToMorestackNonSplit(
|
||||
}
|
||||
}
|
||||
|
||||
static bool enclosingPrologueAdjusted(uint64_t Offset,
|
||||
const DenseSet<Defined *> &Prologues) {
|
||||
static bool enclosingPrologueAttempted(uint64_t Offset,
|
||||
const DenseSet<Defined *> &Prologues) {
|
||||
for (Defined *F : Prologues)
|
||||
if (F->Value <= Offset && Offset < F->Value + F->Size)
|
||||
return true;
|
||||
@ -889,7 +986,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
|
||||
uint8_t *End) {
|
||||
if (!getFile<ELFT>()->SplitStack)
|
||||
return;
|
||||
DenseSet<Defined *> AdjustedPrologues;
|
||||
DenseSet<Defined *> Prologues;
|
||||
std::vector<Relocation *> MorestackCalls;
|
||||
|
||||
for (Relocation &Rel : Relocations) {
|
||||
@ -898,15 +995,9 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
|
||||
if (Rel.Sym->isLocal())
|
||||
continue;
|
||||
|
||||
Defined *D = dyn_cast<Defined>(Rel.Sym);
|
||||
// A reference to an undefined symbol was an error, and should not
|
||||
// have gotten to this point.
|
||||
if (!D)
|
||||
continue;
|
||||
|
||||
// Ignore calls into the split-stack api.
|
||||
if (D->getName().startswith("__morestack")) {
|
||||
if (D->getName().equals("__morestack"))
|
||||
if (Rel.Sym->getName().startswith("__morestack")) {
|
||||
if (Rel.Sym->getName().equals("__morestack"))
|
||||
MorestackCalls.push_back(&Rel);
|
||||
continue;
|
||||
}
|
||||
@ -914,24 +1005,36 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
|
||||
// A relocation to non-function isn't relevant. Sometimes
|
||||
// __morestack is not marked as a function, so this check comes
|
||||
// after the name check.
|
||||
if (D->Type != STT_FUNC)
|
||||
if (Rel.Sym->Type != STT_FUNC)
|
||||
continue;
|
||||
|
||||
if (enclosingPrologueAdjusted(Rel.Offset, AdjustedPrologues))
|
||||
// If the callee's-file was compiled with split stack, nothing to do. In
|
||||
// this context, a "Defined" symbol is one "defined by the binary currently
|
||||
// being produced". So an "undefined" symbol might be provided by a shared
|
||||
// library. It is not possible to tell how such symbols were compiled, so be
|
||||
// conservative.
|
||||
if (Defined *D = dyn_cast<Defined>(Rel.Sym))
|
||||
if (InputSection *IS = cast_or_null<InputSection>(D->Section))
|
||||
if (!IS || !IS->getFile<ELFT>() || IS->getFile<ELFT>()->SplitStack)
|
||||
continue;
|
||||
|
||||
if (enclosingPrologueAttempted(Rel.Offset, Prologues))
|
||||
continue;
|
||||
|
||||
if (Defined *F = getEnclosingFunction<ELFT>(Rel.Offset)) {
|
||||
if (Target->adjustPrologueForCrossSplitStack(Buf + F->Value, End)) {
|
||||
AdjustedPrologues.insert(F);
|
||||
Prologues.insert(F);
|
||||
if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value),
|
||||
End, F->StOther))
|
||||
continue;
|
||||
}
|
||||
if (!getFile<ELFT>()->SomeNoSplitStack)
|
||||
error(lld::toString(this) + ": " + F->getName() +
|
||||
" (with -fsplit-stack) calls " + Rel.Sym->getName() +
|
||||
" (without -fsplit-stack), but couldn't adjust its prologue");
|
||||
}
|
||||
if (!getFile<ELFT>()->SomeNoSplitStack)
|
||||
error("function call at " + getErrorLocation(Buf + Rel.Offset) +
|
||||
"crosses a split-stack boundary, but unable " +
|
||||
"to adjust the enclosing function's prologue");
|
||||
}
|
||||
switchMorestackCallsToMorestackNonSplit(AdjustedPrologues, MorestackCalls);
|
||||
|
||||
if (Target->NeedsMoreStackNonSplit)
|
||||
switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
|
||||
}
|
||||
|
||||
template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
|
||||
@ -960,10 +1063,23 @@ template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If this is a compressed section, uncompress section contents directly
|
||||
// to the buffer.
|
||||
if (UncompressedSize >= 0 && !UncompressedBuf) {
|
||||
size_t Size = UncompressedSize;
|
||||
if (Error E = zlib::uncompress(toStringRef(RawData),
|
||||
(char *)(Buf + OutSecOff), Size))
|
||||
fatal(toString(this) +
|
||||
": uncompress failed: " + llvm::toString(std::move(E)));
|
||||
uint8_t *BufEnd = Buf + OutSecOff + Size;
|
||||
relocate<ELFT>(Buf, BufEnd);
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy section contents from source object file to output file
|
||||
// and then apply relocations.
|
||||
memcpy(Buf + OutSecOff, Data.data(), Data.size());
|
||||
uint8_t *BufEnd = Buf + OutSecOff + Data.size();
|
||||
memcpy(Buf + OutSecOff, data().data(), data().size());
|
||||
uint8_t *BufEnd = Buf + OutSecOff + data().size();
|
||||
relocate<ELFT>(Buf, BufEnd);
|
||||
}
|
||||
|
||||
@ -1014,7 +1130,7 @@ template <class ELFT> void EhInputSection::split() {
|
||||
template <class ELFT, class RelTy>
|
||||
void EhInputSection::split(ArrayRef<RelTy> Rels) {
|
||||
unsigned RelI = 0;
|
||||
for (size_t Off = 0, End = Data.size(); Off != End;) {
|
||||
for (size_t Off = 0, End = data().size(); Off != End;) {
|
||||
size_t Size = readEhRecordSize(this, Off);
|
||||
Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI));
|
||||
// The empty record is the end marker.
|
||||
@ -1094,65 +1210,32 @@ void MergeInputSection::splitIntoPieces() {
|
||||
assert(Pieces.empty());
|
||||
|
||||
if (Flags & SHF_STRINGS)
|
||||
splitStrings(Data, Entsize);
|
||||
splitStrings(data(), Entsize);
|
||||
else
|
||||
splitNonStrings(Data, Entsize);
|
||||
|
||||
OffsetMap.reserve(Pieces.size());
|
||||
for (size_t I = 0, E = Pieces.size(); I != E; ++I)
|
||||
OffsetMap[Pieces[I].InputOff] = I;
|
||||
}
|
||||
|
||||
template <class It, class T, class Compare>
|
||||
static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) {
|
||||
size_t Size = std::distance(First, Last);
|
||||
assert(Size != 0);
|
||||
while (Size != 1) {
|
||||
size_t H = Size / 2;
|
||||
const It MI = First + H;
|
||||
Size -= H;
|
||||
First = Comp(Value, *MI) ? First : First + H;
|
||||
}
|
||||
return Comp(Value, *First) ? First : First + 1;
|
||||
}
|
||||
|
||||
// Do binary search to get a section piece at a given input offset.
|
||||
static SectionPiece *findSectionPiece(MergeInputSection *Sec, uint64_t Offset) {
|
||||
if (Sec->Data.size() <= Offset)
|
||||
fatal(toString(Sec) + ": entry is past the end of the section");
|
||||
|
||||
// Find the element this offset points to.
|
||||
auto I = fastUpperBound(
|
||||
Sec->Pieces.begin(), Sec->Pieces.end(), Offset,
|
||||
[](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; });
|
||||
--I;
|
||||
return &*I;
|
||||
splitNonStrings(data(), Entsize);
|
||||
}
|
||||
|
||||
SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) {
|
||||
// Find a piece starting at a given offset.
|
||||
auto It = OffsetMap.find(Offset);
|
||||
if (It != OffsetMap.end())
|
||||
return &Pieces[It->second];
|
||||
if (this->data().size() <= Offset)
|
||||
fatal(toString(this) + ": offset is outside the section");
|
||||
|
||||
// If Offset is not at beginning of a section piece, it is not in the map.
|
||||
// In that case we need to search from the original section piece vector.
|
||||
return findSectionPiece(this, Offset);
|
||||
// In that case we need to do a binary search of the original section piece vector.
|
||||
auto It2 =
|
||||
llvm::upper_bound(Pieces, Offset, [](uint64_t Offset, SectionPiece P) {
|
||||
return Offset < P.InputOff;
|
||||
});
|
||||
return &It2[-1];
|
||||
}
|
||||
|
||||
// Returns the offset in an output section for a given input offset.
|
||||
// Because contents of a mergeable section is not contiguous in output,
|
||||
// it is not just an addition to a base output offset.
|
||||
uint64_t MergeInputSection::getParentOffset(uint64_t Offset) const {
|
||||
// Find a string starting at a given offset.
|
||||
auto It = OffsetMap.find(Offset);
|
||||
if (It != OffsetMap.end())
|
||||
return Pieces[It->second].OutputOff;
|
||||
|
||||
// If Offset is not at beginning of a section piece, it is not in the map.
|
||||
// In that case we need to search from the original section piece vector.
|
||||
const SectionPiece &Piece =
|
||||
*findSectionPiece(const_cast<MergeInputSection *>(this), Offset);
|
||||
*(const_cast<MergeInputSection *>(this)->getSectionPiece (Offset));
|
||||
uint64_t Addend = Offset - Piece.InputOff;
|
||||
return Piece.OutputOff + Addend;
|
||||
}
|
||||
|
||||
38
deps/lld/ELF/InputSection.h
vendored
38
deps/lld/ELF/InputSection.h
vendored
@ -115,7 +115,12 @@ public:
|
||||
return cast_or_null<ObjFile<ELFT>>(File);
|
||||
}
|
||||
|
||||
ArrayRef<uint8_t> Data;
|
||||
ArrayRef<uint8_t> data() const {
|
||||
if (UncompressedSize >= 0 && !UncompressedBuf)
|
||||
uncompress();
|
||||
return RawData;
|
||||
}
|
||||
|
||||
uint64_t getOffsetInFile() const;
|
||||
|
||||
// True if this section has already been placed to a linker script
|
||||
@ -169,11 +174,6 @@ public:
|
||||
template <class ELFT>
|
||||
Defined *getEnclosingFunction(uint64_t Offset);
|
||||
|
||||
// Compilers emit zlib-compressed debug sections if the -gz option
|
||||
// is given. This function checks if this section is compressed, and
|
||||
// if so, decompress in memory.
|
||||
void maybeDecompress();
|
||||
|
||||
// Returns a source location string. Used to construct an error message.
|
||||
template <class ELFT> std::string getLocation(uint64_t Offset);
|
||||
std::string getSrcMsg(const Symbol &Sym, uint64_t Offset);
|
||||
@ -200,15 +200,21 @@ public:
|
||||
|
||||
|
||||
template <typename T> llvm::ArrayRef<T> getDataAs() const {
|
||||
size_t S = Data.size();
|
||||
size_t S = data().size();
|
||||
assert(S % sizeof(T) == 0);
|
||||
return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T));
|
||||
return llvm::makeArrayRef<T>((const T *)data().data(), S / sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
// A pointer that owns decompressed data if a section is compressed by zlib.
|
||||
protected:
|
||||
void parseCompressedHeader();
|
||||
void uncompress() const;
|
||||
|
||||
mutable ArrayRef<uint8_t> RawData;
|
||||
|
||||
// A pointer that owns uncompressed data if a section is compressed by zlib.
|
||||
// Since the feature is not used often, this is usually a nullptr.
|
||||
std::unique_ptr<char[]> DecompressBuf;
|
||||
mutable std::unique_ptr<char[]> UncompressedBuf;
|
||||
int64_t UncompressedSize = -1;
|
||||
};
|
||||
|
||||
// SectionPiece represents a piece of splittable section contents.
|
||||
@ -247,7 +253,6 @@ public:
|
||||
// Splittable sections are handled as a sequence of data
|
||||
// rather than a single large blob of data.
|
||||
std::vector<SectionPiece> Pieces;
|
||||
llvm::DenseMap<uint32_t, uint32_t> OffsetMap;
|
||||
|
||||
// Returns I'th piece's data. This function is very hot when
|
||||
// string merging is enabled, so we want to inline.
|
||||
@ -255,8 +260,8 @@ public:
|
||||
llvm::CachedHashStringRef getData(size_t I) const {
|
||||
size_t Begin = Pieces[I].InputOff;
|
||||
size_t End =
|
||||
(Pieces.size() - 1 == I) ? Data.size() : Pieces[I + 1].InputOff;
|
||||
return {toStringRef(Data.slice(Begin, End - Begin)), Pieces[I].Hash};
|
||||
(Pieces.size() - 1 == I) ? data().size() : Pieces[I + 1].InputOff;
|
||||
return {toStringRef(data().slice(Begin, End - Begin)), Pieces[I].Hash};
|
||||
}
|
||||
|
||||
// Returns the SectionPiece at a given input section offset.
|
||||
@ -277,7 +282,9 @@ struct EhSectionPiece {
|
||||
unsigned FirstRelocation)
|
||||
: InputOff(Off), Sec(Sec), Size(Size), FirstRelocation(FirstRelocation) {}
|
||||
|
||||
ArrayRef<uint8_t> data() { return {Sec->Data.data() + this->InputOff, Size}; }
|
||||
ArrayRef<uint8_t> data() {
|
||||
return {Sec->data().data() + this->InputOff, Size};
|
||||
}
|
||||
|
||||
size_t InputOff;
|
||||
ssize_t OutputOff = -1;
|
||||
@ -353,6 +360,7 @@ private:
|
||||
|
||||
// The list of all input sections.
|
||||
extern std::vector<InputSectionBase *> InputSections;
|
||||
|
||||
} // namespace elf
|
||||
|
||||
std::string toString(const elf::InputSectionBase *);
|
||||
|
||||
48
deps/lld/ELF/LTO.cpp
vendored
48
deps/lld/ELF/LTO.cpp
vendored
@ -67,9 +67,10 @@ static std::string getThinLTOOutputFile(StringRef ModulePath) {
|
||||
static lto::Config createConfig() {
|
||||
lto::Config C;
|
||||
|
||||
// LLD supports the new relocations.
|
||||
// LLD supports the new relocations and address-significance tables.
|
||||
C.Options = InitTargetOptionsFromCodeGenFlags();
|
||||
C.Options.RelaxELFRelocations = true;
|
||||
C.Options.EmitAddrsig = true;
|
||||
|
||||
// Always emit a section per function/datum with LTO.
|
||||
C.Options.FunctionSections = true;
|
||||
@ -87,6 +88,7 @@ static lto::Config createConfig() {
|
||||
C.DiagHandler = diagnosticHandler;
|
||||
C.OptLevel = Config->LTOO;
|
||||
C.CPU = GetCPUStr();
|
||||
C.MAttrs = GetMAttrs();
|
||||
|
||||
// Set up a custom pipeline if we've been asked to.
|
||||
C.OptPipeline = Config->LTONewPmPasses;
|
||||
@ -101,6 +103,14 @@ static lto::Config createConfig() {
|
||||
C.DebugPassManager = Config->LTODebugPassManager;
|
||||
C.DwoDir = Config->DwoDir;
|
||||
|
||||
if (Config->EmitLLVM) {
|
||||
C.PostInternalizeModuleHook = [](size_t Task, const Module &M) {
|
||||
if (std::unique_ptr<raw_fd_ostream> OS = openFile(Config->OutputFile))
|
||||
WriteBitcodeToFile(M, *OS, false);
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
if (Config->SaveTemps)
|
||||
checkError(C.addSaveTemps(Config->OutputFile.str() + ".",
|
||||
/*UseInputModulePath*/ true));
|
||||
@ -108,18 +118,14 @@ static lto::Config createConfig() {
|
||||
}
|
||||
|
||||
BitcodeCompiler::BitcodeCompiler() {
|
||||
// Initialize IndexFile.
|
||||
if (!Config->ThinLTOIndexOnlyArg.empty())
|
||||
IndexFile = openFile(Config->ThinLTOIndexOnlyArg);
|
||||
|
||||
// Initialize LTOObj.
|
||||
lto::ThinBackend Backend;
|
||||
|
||||
if (Config->ThinLTOIndexOnly) {
|
||||
StringRef Path = Config->ThinLTOIndexOnlyArg;
|
||||
if (!Path.empty())
|
||||
IndexFile = openFile(Path);
|
||||
|
||||
auto OnIndexWrite = [&](const std::string &Identifier) {
|
||||
ObjectToIndexFileState[Identifier] = true;
|
||||
};
|
||||
|
||||
auto OnIndexWrite = [&](StringRef S) { ThinIndices.erase(S); };
|
||||
Backend = lto::createWriteIndexesThinBackend(
|
||||
Config->ThinLTOPrefixReplace.first, Config->ThinLTOPrefixReplace.second,
|
||||
Config->ThinLTOEmitImportsFiles, IndexFile.get(), OnIndexWrite);
|
||||
@ -132,10 +138,10 @@ BitcodeCompiler::BitcodeCompiler() {
|
||||
|
||||
// Initialize UsedStartStop.
|
||||
for (Symbol *Sym : Symtab->getSymbols()) {
|
||||
StringRef Name = Sym->getName();
|
||||
StringRef S = Sym->getName();
|
||||
for (StringRef Prefix : {"__start_", "__stop_"})
|
||||
if (Name.startswith(Prefix))
|
||||
UsedStartStop.insert(Name.substr(Prefix.size()));
|
||||
if (S.startswith(Prefix))
|
||||
UsedStartStop.insert(S.substr(Prefix.size()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,7 +157,7 @@ void BitcodeCompiler::add(BitcodeFile &F) {
|
||||
bool IsExec = !Config->Shared && !Config->Relocatable;
|
||||
|
||||
if (Config->ThinLTOIndexOnly)
|
||||
ObjectToIndexFileState.insert({Obj.getName(), false});
|
||||
ThinIndices.insert(Obj.getName());
|
||||
|
||||
ArrayRef<Symbol *> Syms = F.getSymbols();
|
||||
ArrayRef<lto::InputFile::Symbol> ObjSyms = Obj.symbols();
|
||||
@ -240,15 +246,11 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
|
||||
Cache));
|
||||
|
||||
// Emit empty index files for non-indexed files
|
||||
if (Config->ThinLTOIndexOnly) {
|
||||
for (auto &Identifier : ObjectToIndexFileState)
|
||||
if (!Identifier.getValue()) {
|
||||
std::string Path = getThinLTOOutputFile(Identifier.getKey());
|
||||
openFile(Path + ".thinlto.bc");
|
||||
|
||||
if (Config->ThinLTOEmitImportsFiles)
|
||||
openFile(Path + ".imports");
|
||||
}
|
||||
for (StringRef S : ThinIndices) {
|
||||
std::string Path = getThinLTOOutputFile(S);
|
||||
openFile(Path + ".thinlto.bc");
|
||||
if (Config->ThinLTOEmitImportsFiles)
|
||||
openFile(Path + ".imports");
|
||||
}
|
||||
|
||||
// If LazyObjFile has not been added to link, emit empty index files.
|
||||
|
||||
2
deps/lld/ELF/LTO.h
vendored
2
deps/lld/ELF/LTO.h
vendored
@ -55,7 +55,7 @@ private:
|
||||
std::vector<std::unique_ptr<MemoryBuffer>> Files;
|
||||
llvm::DenseSet<StringRef> UsedStartStop;
|
||||
std::unique_ptr<llvm::raw_fd_ostream> IndexFile;
|
||||
llvm::StringMap<bool> ObjectToIndexFileState;
|
||||
llvm::DenseSet<StringRef> ThinIndices;
|
||||
};
|
||||
} // namespace elf
|
||||
} // namespace lld
|
||||
|
||||
33
deps/lld/ELF/LinkerScript.cpp
vendored
33
deps/lld/ELF/LinkerScript.cpp
vendored
@ -169,7 +169,7 @@ void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
|
||||
// Define a symbol.
|
||||
Symbol *Sym;
|
||||
uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
|
||||
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, /*Type*/ 0, Visibility,
|
||||
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, Visibility,
|
||||
/*CanOmitFromDynSym*/ false,
|
||||
/*File*/ nullptr);
|
||||
ExprValue Value = Cmd->Expression();
|
||||
@ -202,13 +202,14 @@ static void declareSymbol(SymbolAssignment *Cmd) {
|
||||
// We can't calculate final value right now.
|
||||
Symbol *Sym;
|
||||
uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
|
||||
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, /*Type*/ 0, Visibility,
|
||||
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, Visibility,
|
||||
/*CanOmitFromDynSym*/ false,
|
||||
/*File*/ nullptr);
|
||||
replaceSymbol<Defined>(Sym, nullptr, Cmd->Name, STB_GLOBAL, Visibility,
|
||||
STT_NOTYPE, 0, 0, nullptr);
|
||||
Cmd->Sym = cast<Defined>(Sym);
|
||||
Cmd->Provide = false;
|
||||
Sym->ScriptDefined = true;
|
||||
}
|
||||
|
||||
// This method is used to handle INSERT AFTER statement. Here we rebuild
|
||||
@ -414,18 +415,16 @@ LinkerScript::computeInputSections(const InputSectionDescription *Cmd) {
|
||||
|
||||
void LinkerScript::discard(ArrayRef<InputSection *> V) {
|
||||
for (InputSection *S : V) {
|
||||
if (S == InX::ShStrTab || S == InX::Dynamic || S == InX::DynSymTab ||
|
||||
S == InX::DynStrTab || S == InX::RelaPlt || S == InX::RelaDyn ||
|
||||
S == InX::RelrDyn)
|
||||
if (S == In.ShStrTab || S == In.RelaDyn || S == In.RelrDyn)
|
||||
error("discarding " + S->Name + " section is not allowed");
|
||||
|
||||
// You can discard .hash and .gnu.hash sections by linker scripts. Since
|
||||
// they are synthesized sections, we need to handle them differently than
|
||||
// other regular sections.
|
||||
if (S == InX::GnuHashTab)
|
||||
InX::GnuHashTab = nullptr;
|
||||
if (S == InX::HashTab)
|
||||
InX::HashTab = nullptr;
|
||||
if (S == In.GnuHashTab)
|
||||
In.GnuHashTab = nullptr;
|
||||
if (S == In.HashTab)
|
||||
In.HashTab = nullptr;
|
||||
|
||||
S->Assigned = false;
|
||||
S->Live = false;
|
||||
@ -701,6 +700,7 @@ uint64_t LinkerScript::advance(uint64_t Size, unsigned Alignment) {
|
||||
}
|
||||
|
||||
void LinkerScript::output(InputSection *S) {
|
||||
assert(Ctx->OutSec == S->getParent());
|
||||
uint64_t Before = advance(0, 1);
|
||||
uint64_t Pos = advance(S->getSize(), S->Alignment);
|
||||
S->OutSecOff = Pos - S->getSize() - Ctx->OutSec->Addr;
|
||||
@ -816,21 +816,8 @@ void LinkerScript::assignOffsets(OutputSection *Sec) {
|
||||
// Handle a single input section description command.
|
||||
// It calculates and assigns the offsets for each section and also
|
||||
// updates the output section size.
|
||||
auto *Cmd = cast<InputSectionDescription>(Base);
|
||||
for (InputSection *Sec : Cmd->Sections) {
|
||||
// We tentatively added all synthetic sections at the beginning and
|
||||
// removed empty ones afterwards (because there is no way to know
|
||||
// whether they were going be empty or not other than actually running
|
||||
// linker scripts.) We need to ignore remains of empty sections.
|
||||
if (auto *S = dyn_cast<SyntheticSection>(Sec))
|
||||
if (S->empty())
|
||||
continue;
|
||||
|
||||
if (!Sec->Live)
|
||||
continue;
|
||||
assert(Ctx->OutSec == Sec->getParent());
|
||||
for (InputSection *Sec : cast<InputSectionDescription>(Base)->Sections)
|
||||
output(Sec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user