Merge remote-tracking branch 'origin/llvm8'

This commit is contained in:
Andrew Kelley 2019-03-20 13:34:07 -04:00
commit 3c7555cb67
No known key found for this signature in database
GPG Key ID: 7C5F548F728501A9
773 changed files with 25680 additions and 9971 deletions

View File

@ -1,43 +1,11 @@
image: freebsd/latest
packages:
- cmake
- ninja
- llvm70
- py27-s3cmd
- wget
secrets:
- 6c60aaee-92e7-4e7d-812c-114817689b4d
sources:
- https://github.com/ziglang/zig
tasks:
- build: |
cd zig && mkdir build && cd build
cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release
ninja install
- test: |
cd zig/build
bin/zig test ../test/stage1/behavior.zig
bin/zig test ../std/special/compiler_rt.zig
bin/zig test ../test/stage1/behavior.zig --library c
bin/zig test ../std/special/compiler_rt.zig --library c
bin/zig test ../test/stage1/behavior.zig --release-fast
bin/zig test ../std/special/compiler_rt.zig --release-fast
bin/zig test ../test/stage1/behavior.zig --release-fast --library c
bin/zig test ../std/special/compiler_rt.zig --release-fast --library c
bin/zig test ../test/stage1/behavior.zig --release-small --library c
bin/zig test ../std/special/compiler_rt.zig --release-small --library c
bin/zig test ../test/stage1/behavior.zig --release-small
bin/zig test ../std/special/compiler_rt.zig --release-small
bin/zig test ../test/stage1/behavior.zig --release-safe
bin/zig test ../std/special/compiler_rt.zig --release-safe
bin/zig test ../test/stage1/behavior.zig --release-safe --library c
bin/zig test ../std/special/compiler_rt.zig --release-safe --library c
# TODO enable all tests
#bin/zig build --build-file ../build.zig test
# TODO integrate with the download page updater and make a
# static build available to download for FreeBSD.
# This will require setting up a cache of LLVM/Clang built
# statically.
- build: cd zig && ./ci/srht/freebsd_script

View File

@ -56,10 +56,8 @@ endif()
if(APPLE AND ZIG_STATIC)
list(REMOVE_ITEM LLVM_LIBRARIES "-lz")
list(REMOVE_ITEM LLVM_LIBRARIES "-lcurses")
find_library(ZLIB NAMES z zlib libz)
find_library(LIBNCURSES NAMES libncurses.a)
list(APPEND LLVM_LIBRARIES "${LIBNCURSES}" "${ZLIB}")
list(APPEND LLVM_LIBRARIES "${ZLIB}")
endif()
set(ZIG_CPP_LIB_DIR "${CMAKE_BINARY_DIR}/zig_cpp")
@ -117,6 +115,7 @@ else()
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/MachO/WriterMachO.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp"
)
set(EMBEDDED_LLD_ELF_SOURCES
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/AArch64ErrataFix.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AArch64.cpp"
@ -124,19 +123,21 @@ else()
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/ARM.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/AVR.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/Hexagon.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/MSP430.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/Mips.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/MipsArchTree.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/PPC64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/RISCV.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/SPARCV9.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/X86.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Arch/X86_64.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/CallGraphSort.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/DWARF.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Driver.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/DriverUtils.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/EhFrame.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/Filesystem.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/GdbIndex.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/ICF.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/InputFiles.cpp"
"${CMAKE_SOURCE_DIR}/deps/lld/ELF/InputSection.cpp"
@ -2800,13 +2801,6 @@ set(ZIG_LIBC_FILES
"glibc/sysdeps/mips/mips64/n64/crtn.S"
"glibc/sysdeps/mips/nptl/bits/pthreadtypes-arch.h"
"glibc/sysdeps/mips/start.S"
"glibc/sysdeps/nios2/bits/endian.h"
"glibc/sysdeps/nios2/crti.S"
"glibc/sysdeps/nios2/crtn.S"
"glibc/sysdeps/nios2/dl-sysdep.h"
"glibc/sysdeps/nios2/nptl/bits/pthreadtypes-arch.h"
"glibc/sysdeps/nios2/start.S"
"glibc/sysdeps/nios2/sysdep.h"
"glibc/sysdeps/nptl/bits/pthreadtypes.h"
"glibc/sysdeps/nptl/bits/thread-shared-types.h"
"glibc/sysdeps/nptl/libc-lock.h"
@ -2909,7 +2903,6 @@ set(ZIG_LIBC_FILES
"glibc/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h"
"glibc/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h"
"glibc/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h"
"glibc/sysdeps/unix/sysv/linux/nios2/sysdep.h"
"glibc/sysdeps/unix/sysv/linux/powerpc/bits/stat.h"
"glibc/sysdeps/unix/sysv/linux/powerpc/kernel-features.h"
"glibc/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h"

View File

@ -101,7 +101,6 @@ clarity.
|i386 | Tier 2 | Tier 2 | Tier 4 | Tier 2 | Tier 3 | Tier 3 | Tier 3 | Tier 3 |
|arm | Tier 2 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 |
|arm64 | Tier 2 | Tier 2 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 | Tier 3 |
|avr | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|bpf | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|hexagon | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|mips | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
@ -110,8 +109,9 @@ clarity.
|sparc | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|s390x | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|lanai | Tier 3 | Tier 3 | N/A | N/A | Tier 3 | Tier 3 | N/A | Tier 3 |
|wasm32 | Tier 4 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|wasm64 | Tier 4 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|wasm32 | Tier 3 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|wasm64 | Tier 3 | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
|avr | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | N/A | Tier 4 |
|riscv32 | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | Tier 4 | Tier 4 |
|riscv64 | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | Tier 4 | Tier 4 |
|xcore | Tier 4 | Tier 4 | N/A | N/A | Tier 4 | Tier 4 | N/A | Tier 4 |
@ -149,13 +149,13 @@ Note that you can
* cmake >= 2.8.5
* gcc >= 5.0.0 or clang >= 3.6.0
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same gcc or clang version above
* LLVM, Clang, LLD development libraries == 8.x, compiled with the same gcc or clang version above
##### Windows
* cmake >= 2.8.5
* Microsoft Visual Studio 2017
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same MSVC version above
* Microsoft Visual Studio 2017 (version 15.8)
* LLVM, Clang, LLD development libraries == 8.x, compiled with the same MSVC version above
#### Instructions
@ -173,11 +173,11 @@ bin/zig build --build-file ../build.zig test
##### MacOS
```
brew install cmake llvm@7
brew outdated llvm@7 || brew upgrade llvm@7
brew install cmake llvm@8
brew outdated llvm@8 || brew upgrade llvm@8
mkdir build
cd build
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@7/
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@8/
make install
bin/zig build --build-file ../build.zig test
```

View File

@ -62,10 +62,15 @@
#include "cuda.h"
#if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION"
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000
#error "Unsupported CUDA version!"
#endif
#pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
#if CUDA_VERSION >= 10000
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
#endif
// Make largest subset of device functions available during host
// compilation -- SM_35 for the time being.
#ifndef __CUDA_ARCH__
@ -419,6 +424,7 @@ __device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
#pragma pop_macro("dim3")
#pragma pop_macro("uint3")
#pragma pop_macro("__USE_FAST_MATH__")
#pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__")
#endif // __CUDA__
#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__

View File

@ -53,7 +53,7 @@ static __inline unsigned char __DEFAULT_FN_ATTRS
_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
unsigned int *__p)
{
return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p);
return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
}
#ifdef __x86_64__
@ -61,7 +61,7 @@ static __inline unsigned char __DEFAULT_FN_ATTRS
_addcarry_u64(unsigned char __cf, unsigned long long __x,
unsigned long long __y, unsigned long long *__p)
{
return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p);
return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
}
#endif

125
c_headers/altivec.h vendored
View File

@ -9492,49 +9492,51 @@ vec_splat_u32(signed char __a) {
/* vec_sr */
static __inline__ vector signed char __ATTRS_o_ai
vec_sr(vector signed char __a, vector unsigned char __b) {
vector unsigned char __res = (vector unsigned char)__a >> __b;
return (vector signed char)__res;
}
// vec_sr does modulo arithmetic on __b first, so __b is allowed to be more
// than the length of __a.
static __inline__ vector unsigned char __ATTRS_o_ai
vec_sr(vector unsigned char __a, vector unsigned char __b) {
return __a >> __b;
return __a >>
(__b % (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__));
}
static __inline__ vector signed short __ATTRS_o_ai
vec_sr(vector signed short __a, vector unsigned short __b) {
vector unsigned short __res = (vector unsigned short)__a >> __b;
return (vector signed short)__res;
static __inline__ vector signed char __ATTRS_o_ai
vec_sr(vector signed char __a, vector unsigned char __b) {
return (vector signed char)vec_sr((vector unsigned char)__a, __b);
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_sr(vector unsigned short __a, vector unsigned short __b) {
return __a >> __b;
return __a >>
(__b % (vector unsigned short)(sizeof(unsigned short) * __CHAR_BIT__));
}
static __inline__ vector signed int __ATTRS_o_ai
vec_sr(vector signed int __a, vector unsigned int __b) {
vector unsigned int __res = (vector unsigned int)__a >> __b;
return (vector signed int)__res;
static __inline__ vector short __ATTRS_o_ai vec_sr(vector short __a,
vector unsigned short __b) {
return (vector short)vec_sr((vector unsigned short)__a, __b);
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_sr(vector unsigned int __a, vector unsigned int __b) {
return __a >> __b;
return __a >>
(__b % (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__));
}
static __inline__ vector int __ATTRS_o_ai vec_sr(vector int __a,
vector unsigned int __b) {
return (vector int)vec_sr((vector unsigned int)__a, __b);
}
#ifdef __POWER8_VECTOR__
static __inline__ vector signed long long __ATTRS_o_ai
vec_sr(vector signed long long __a, vector unsigned long long __b) {
vector unsigned long long __res = (vector unsigned long long)__a >> __b;
return (vector signed long long)__res;
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
return __a >> __b;
return __a >> (__b % (vector unsigned long long)(sizeof(unsigned long long) *
__CHAR_BIT__));
}
static __inline__ vector long long __ATTRS_o_ai
vec_sr(vector long long __a, vector unsigned long long __b) {
return (vector long long)vec_sr((vector unsigned long long)__a, __b);
}
#endif
@ -9544,12 +9546,12 @@ vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
static __inline__ vector signed char __ATTRS_o_ai
vec_vsrb(vector signed char __a, vector unsigned char __b) {
return __a >> (vector signed char)__b;
return vec_sr(__a, __b);
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
return __a >> __b;
return vec_sr(__a, __b);
}
/* vec_vsrh */
@ -9558,12 +9560,12 @@ vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
static __inline__ vector short __ATTRS_o_ai
vec_vsrh(vector short __a, vector unsigned short __b) {
return __a >> (vector short)__b;
return vec_sr(__a, __b);
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
return __a >> __b;
return vec_sr(__a, __b);
}
/* vec_vsrw */
@ -9572,12 +9574,12 @@ vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
static __inline__ vector int __ATTRS_o_ai vec_vsrw(vector int __a,
vector unsigned int __b) {
return __a >> (vector int)__b;
return vec_sr(__a, __b);
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_vsrw(vector unsigned int __a, vector unsigned int __b) {
return __a >> __b;
return vec_sr(__a, __b);
}
/* vec_sra */
@ -16353,67 +16355,82 @@ vec_revb(vector unsigned __int128 __a) {
/* vec_xl */
typedef vector signed char unaligned_vec_schar __attribute__((aligned(1)));
typedef vector unsigned char unaligned_vec_uchar __attribute__((aligned(1)));
typedef vector signed short unaligned_vec_sshort __attribute__((aligned(1)));
typedef vector unsigned short unaligned_vec_ushort __attribute__((aligned(1)));
typedef vector signed int unaligned_vec_sint __attribute__((aligned(1)));
typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1)));
typedef vector float unaligned_vec_float __attribute__((aligned(1)));
static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset,
signed char *__ptr) {
return *(vector signed char *)(__ptr + __offset);
return *(unaligned_vec_schar *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector unsigned char
vec_xl(signed long long __offset, unsigned char *__ptr) {
return *(vector unsigned char *)(__ptr + __offset);
return *(unaligned_vec_uchar*)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset,
signed short *__ptr) {
return *(vector signed short *)(__ptr + __offset);
return *(unaligned_vec_sshort *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector unsigned short
vec_xl(signed long long __offset, unsigned short *__ptr) {
return *(vector unsigned short *)(__ptr + __offset);
return *(unaligned_vec_ushort *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset,
signed int *__ptr) {
return *(vector signed int *)(__ptr + __offset);
return *(unaligned_vec_sint *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset,
unsigned int *__ptr) {
return *(vector unsigned int *)(__ptr + __offset);
return *(unaligned_vec_uint *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset,
float *__ptr) {
return *(vector float *)(__ptr + __offset);
return *(unaligned_vec_float *)(__ptr + __offset);
}
#ifdef __VSX__
typedef vector signed long long unaligned_vec_sll __attribute__((aligned(1)));
typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1)));
typedef vector double unaligned_vec_double __attribute__((aligned(1)));
static inline __ATTRS_o_ai vector signed long long
vec_xl(signed long long __offset, signed long long *__ptr) {
return *(vector signed long long *)(__ptr + __offset);
return *(unaligned_vec_sll *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector unsigned long long
vec_xl(signed long long __offset, unsigned long long *__ptr) {
return *(vector unsigned long long *)(__ptr + __offset);
return *(unaligned_vec_ull *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset,
double *__ptr) {
return *(vector double *)(__ptr + __offset);
return *(unaligned_vec_double *)(__ptr + __offset);
}
#endif
#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1)));
typedef vector unsigned __int128 unaligned_vec_ui128
__attribute__((aligned(1)));
static inline __ATTRS_o_ai vector signed __int128
vec_xl(signed long long __offset, signed __int128 *__ptr) {
return *(vector signed __int128 *)(__ptr + __offset);
return *(unaligned_vec_si128 *)(__ptr + __offset);
}
static inline __ATTRS_o_ai vector unsigned __int128
vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
return *(vector unsigned __int128 *)(__ptr + __offset);
return *(unaligned_vec_ui128 *)(__ptr + __offset);
}
#endif
@ -16498,62 +16515,62 @@ vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) {
static inline __ATTRS_o_ai void vec_xst(vector signed char __vec,
signed long long __offset,
signed char *__ptr) {
*(vector signed char *)(__ptr + __offset) = __vec;
*(unaligned_vec_schar *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector unsigned char __vec,
signed long long __offset,
unsigned char *__ptr) {
*(vector unsigned char *)(__ptr + __offset) = __vec;
*(unaligned_vec_uchar *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector signed short __vec,
signed long long __offset,
signed short *__ptr) {
*(vector signed short *)(__ptr + __offset) = __vec;
*(unaligned_vec_sshort *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec,
signed long long __offset,
unsigned short *__ptr) {
*(vector unsigned short *)(__ptr + __offset) = __vec;
*(unaligned_vec_ushort *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector signed int __vec,
signed long long __offset,
signed int *__ptr) {
*(vector signed int *)(__ptr + __offset) = __vec;
*(unaligned_vec_sint *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec,
signed long long __offset,
unsigned int *__ptr) {
*(vector unsigned int *)(__ptr + __offset) = __vec;
*(unaligned_vec_uint *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector float __vec,
signed long long __offset,
float *__ptr) {
*(vector float *)(__ptr + __offset) = __vec;
*(unaligned_vec_float *)(__ptr + __offset) = __vec;
}
#ifdef __VSX__
static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec,
signed long long __offset,
signed long long *__ptr) {
*(vector signed long long *)(__ptr + __offset) = __vec;
*(unaligned_vec_sll *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec,
signed long long __offset,
unsigned long long *__ptr) {
*(vector unsigned long long *)(__ptr + __offset) = __vec;
*(unaligned_vec_ull *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector double __vec,
signed long long __offset,
double *__ptr) {
*(vector double *)(__ptr + __offset) = __vec;
*(unaligned_vec_double *)(__ptr + __offset) = __vec;
}
#endif
@ -16561,13 +16578,13 @@ static inline __ATTRS_o_ai void vec_xst(vector double __vec,
static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec,
signed long long __offset,
signed __int128 *__ptr) {
*(vector signed __int128 *)(__ptr + __offset) = __vec;
*(unaligned_vec_si128 *)(__ptr + __offset) = __vec;
}
static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec,
signed long long __offset,
unsigned __int128 *__ptr) {
*(vector unsigned __int128 *)(__ptr + __offset) = __vec;
*(unaligned_vec_ui128 *)(__ptr + __offset) = __vec;
}
#endif

View File

@ -27,7 +27,7 @@
#include <stdint.h>
typedef __fp16 float16_t;
#define __ai static inline __attribute__((__always_inline__, __nodebug__))
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)
#ifdef __LITTLE_ENDIAN__

2958
c_headers/arm_neon.h vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -29,180 +29,309 @@
#define __AVX512DQINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline __mmask8 __DEFAULT_FN_ATTRS
_knot_mask8(__mmask8 __M)
{
return __builtin_ia32_knotqi(__M);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kand_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kandn_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kor_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kxnor_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kxor_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
{
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kadd_mask8(__mmask8 __A, __mmask8 __B)
{
return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_kadd_mask16(__mmask16 __A, __mmask16 __B)
{
return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
}
#define _kshiftli_mask8(A, I) \
(__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))
#define _kshiftri_mask8(A, I) \
(__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_cvtmask8_u32(__mmask8 __A) {
return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_cvtu32_mask8(unsigned int __A) {
return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_load_mask8(__mmask8 *__A) {
return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A);
}
static __inline__ void __DEFAULT_FN_ATTRS
_store_mask8(__mmask8 *__A, __mmask8 __B) {
*(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A * (__v8du) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_mullo_epi64(__A, __B),
(__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_mullo_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_xor_pd(__m512d __A, __m512d __B) {
return (__m512d)((__v8du)__A ^ (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_xor_pd(__A, __B),
(__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_xor_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_xor_ps (__m512 __A, __m512 __B) {
return (__m512)((__v16su)__A ^ (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_xor_ps(__A, __B),
(__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_xor_ps(__A, __B),
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_or_pd(__m512d __A, __m512d __B) {
return (__m512d)((__v8du)__A | (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_or_pd(__A, __B),
(__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_or_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_or_ps(__m512 __A, __m512 __B) {
return (__m512)((__v16su)__A | (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_or_ps(__A, __B),
(__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_or_ps(__A, __B),
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_and_pd(__m512d __A, __m512d __B) {
return (__m512d)((__v8du)__A & (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_and_pd(__A, __B),
(__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_and_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_and_ps(__m512 __A, __m512 __B) {
return (__m512)((__v16su)__A & (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_and_ps(__A, __B),
(__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_and_ps(__A, __B),
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_andnot_pd(__m512d __A, __m512d __B) {
return (__m512d)(~(__v8du)__A & (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_andnot_pd(__A, __B),
(__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_andnot_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_andnot_ps(__m512 __A, __m512 __B) {
return (__m512)(~(__v16su)__A & (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_andnot_ps(__A, __B),
(__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_andnot_ps(__A, __B),
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epi64 (__m512d __A) {
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -210,7 +339,7 @@ _mm512_cvtpd_epi64 (__m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
(__v8di) __W,
@ -218,7 +347,7 @@ _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -241,7 +370,7 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U), (int)(R))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epu64 (__m512d __A) {
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -249,7 +378,7 @@ _mm512_cvtpd_epu64 (__m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
(__v8di) __W,
@ -257,7 +386,7 @@ _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -280,7 +409,7 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U), (int)(R))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epi64 (__m256 __A) {
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -288,7 +417,7 @@ _mm512_cvtps_epi64 (__m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
(__v8di) __W,
@ -296,7 +425,7 @@ _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -319,7 +448,7 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U), (int)(R))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epu64 (__m256 __A) {
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -327,7 +456,7 @@ _mm512_cvtps_epu64 (__m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
(__v8di) __W,
@ -335,7 +464,7 @@ _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -359,19 +488,19 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
(__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_cvtepi64_pd (__m512i __A) {
return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_cvtepi64_pd(__A),
(__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_cvtepi64_pd(__A),
@ -393,7 +522,7 @@ _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(U), (int)(R))
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_cvtepi64_ps (__m512i __A) {
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
(__v8sf) _mm256_setzero_ps(),
@ -401,7 +530,7 @@ _mm512_cvtepi64_ps (__m512i __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
(__v8sf) __W,
@ -409,7 +538,7 @@ _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
(__v8sf) _mm256_setzero_ps(),
@ -433,7 +562,7 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
(__mmask8)(U), (int)(R))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttpd_epi64 (__m512d __A) {
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -441,7 +570,7 @@ _mm512_cvttpd_epi64 (__m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
(__v8di) __W,
@ -449,7 +578,7 @@ _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -472,7 +601,7 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U), (int)(R))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttpd_epu64 (__m512d __A) {
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -480,7 +609,7 @@ _mm512_cvttpd_epu64 (__m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
(__v8di) __W,
@ -488,7 +617,7 @@ _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
(__v8di) _mm512_setzero_si512(),
@ -511,7 +640,7 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U), (int)(R))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttps_epi64 (__m256 __A) {
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -519,7 +648,7 @@ _mm512_cvttps_epi64 (__m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
(__v8di) __W,
@ -527,7 +656,7 @@ _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -550,7 +679,7 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U), (int)(R))
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttps_epu64 (__m256 __A) {
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -558,7 +687,7 @@ _mm512_cvttps_epu64 (__m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
(__v8di) __W,
@ -566,7 +695,7 @@ _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
(__v8di) _mm512_setzero_si512(),
@ -589,19 +718,19 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
(__v8di)_mm512_setzero_si512(), \
(__mmask8)(U), (int)(R))
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_cvtepu64_pd (__m512i __A) {
return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_cvtepu64_pd(__A),
(__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_cvtepu64_pd(__A),
@ -625,7 +754,7 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
(__mmask8)(U), (int)(R))
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_cvtepu64_ps (__m512i __A) {
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
(__v8sf) _mm256_setzero_ps(),
@ -633,7 +762,7 @@ _mm512_cvtepu64_ps (__m512i __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
(__v8sf) __W,
@ -641,7 +770,7 @@ _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
_MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
(__v8sf) _mm256_setzero_ps(),
@ -935,32 +1064,32 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(C), (int)(R))
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
_mm512_movepi32_mask (__m512i __A)
{
return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_movm_epi32 (__mmask16 __A)
{
return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_movm_epi64 (__mmask8 __A)
{
return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
}
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
_mm512_movepi64_mask (__m512i __A)
{
return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_broadcast_f32x2 (__m128 __A)
{
return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
@ -968,7 +1097,7 @@ _mm512_broadcast_f32x2 (__m128 __A)
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@ -976,7 +1105,7 @@ _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
(__v16sf)__O);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@ -984,7 +1113,7 @@ _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_broadcast_f32x8(__m256 __A)
{
return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
@ -992,7 +1121,7 @@ _mm512_broadcast_f32x8(__m256 __A)
0, 1, 2, 3, 4, 5, 6, 7);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@ -1000,7 +1129,7 @@ _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
(__v16sf)__O);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@ -1008,14 +1137,14 @@ _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
(__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_broadcast_f64x2(__m128d __A)
{
return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
@ -1023,7 +1152,7 @@ _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
(__v8df)__O);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
@ -1031,7 +1160,7 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_broadcast_i32x2 (__m128i __A)
{
return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
@ -1039,7 +1168,7 @@ _mm512_broadcast_i32x2 (__m128i __A)
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@ -1047,7 +1176,7 @@ _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
(__v16si)__O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@ -1055,7 +1184,7 @@ _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_broadcast_i32x8(__m256i __A)
{
return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
@ -1063,7 +1192,7 @@ _mm512_broadcast_i32x8(__m256i __A)
0, 1, 2, 3, 4, 5, 6, 7);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@ -1071,7 +1200,7 @@ _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
(__v16si)__O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@ -1079,14 +1208,14 @@ _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_broadcast_i64x2(__m128i __A)
{
return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@ -1094,7 +1223,7 @@ _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
(__v8di)__O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@ -1256,6 +1385,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
(__mmask8)(U))
#undef __DEFAULT_FN_ATTRS512
#undef __DEFAULT_FN_ATTRS
#endif

View File

@ -175,6 +175,7 @@ typedef enum
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
/* Create vectors with repeated elements */
@ -508,13 +509,13 @@ _mm512_castsi512_si256 (__m512i __A)
return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_int2mask(int __a)
{
return (__mmask16)__a;
}
static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS
_mm512_mask2int(__mmask16 __a)
{
return (int)__a;
@ -4328,6 +4329,15 @@ _mm512_loadu_si512 (void const *__P)
return ((struct __loadu_si512*)__P)->__v;
}
static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi32 (void const *__P)
{
struct __loadu_epi32 {
__m512i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi32*)__P)->__v;
}
static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
{
@ -4346,6 +4356,15 @@ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
(__mmask16) __U);
}
static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_loadu_epi64 (void const *__P)
{
struct __loadu_epi64 {
__m512i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi64*)__P)->__v;
}
static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
{
@ -4481,6 +4500,15 @@ _mm512_load_epi64 (void const *__P)
/* SIMD store ops */
static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi64 (void *__P, __m512i __A)
{
struct __storeu_epi64 {
__m512i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi64*)__P)->__v = __A;
}
static __inline void __DEFAULT_FN_ATTRS512
_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
{
@ -4497,6 +4525,15 @@ _mm512_storeu_si512 (void *__P, __m512i __A)
((struct __storeu_si512*)__P)->__v = __A;
}
static __inline void __DEFAULT_FN_ATTRS512
_mm512_storeu_epi32 (void *__P, __m512i __A)
{
struct __storeu_epi32 {
__m512i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi32*)__P)->__v = __A;
}
static __inline void __DEFAULT_FN_ATTRS512
_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
{
@ -4580,7 +4617,7 @@ _mm512_store_epi64 (void *__P, __m512i __A)
/* Mask ops */
static __inline __mmask16 __DEFAULT_FN_ATTRS512
static __inline __mmask16 __DEFAULT_FN_ATTRS
_mm512_knot(__mmask16 __M)
{
return __builtin_ia32_knothi(__M);
@ -5622,7 +5659,7 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R))
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kmov (__mmask16 __A)
{
return __A;
@ -7593,177 +7630,177 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
#define _mm512_i64gather_ps(index, addr, scale) \
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
(float const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
(__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
(float const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64gather_epi32(index, addr, scale) \
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
(int const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)-1, (int)(scale))
#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64gather_pd(index, addr, scale) \
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
(double const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
(__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64gather_epi64(index, addr, scale) \
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
(__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8di)(__m512i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i32gather_ps(index, addr, scale) \
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
(float const *)(addr), \
(void const *)(addr), \
(__v16sf)(__m512)(index), \
(__mmask16)-1, (int)(scale))
#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
(__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v16sf)(__m512)(index), \
(__mmask16)(mask), (int)(scale))
#define _mm512_i32gather_epi32(index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
(int const *)(addr), \
(void const *)(addr), \
(__v16si)(__m512i)(index), \
(__mmask16)-1, (int)(scale))
#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v16si)(__m512i)(index), \
(__mmask16)(mask), (int)(scale))
#define _mm512_i32gather_pd(index, addr, scale) \
(__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
(double const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
(__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i32gather_epi64(index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), (__mmask8)-1, \
(int)(scale))
#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
(__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm512_i64scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8sf)(__m256)(v1), (int)(scale))
#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8sf)(__m256)(v1), (int)(scale))
#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
#define _mm512_i64scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
(__v8di)(__m512i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))
#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
(__v8di)(__m512i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))
#define _mm512_i32scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
(__v16si)(__m512i)(index), \
(__v16sf)(__m512)(v1), (int)(scale))
#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
__builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
(__v16si)(__m512i)(index), \
(__v16sf)(__m512)(v1), (int)(scale))
#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
(__v16si)(__m512i)(index), \
(__v16si)(__m512i)(v1), (int)(scale))
#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
__builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
(__v16si)(__m512i)(index), \
(__v16si)(__m512i)(v1), (int)(scale))
#define _mm512_i32scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), \
(__v8df)(__m512d)(v1), (int)(scale))
#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))
#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), \
(__v8di)(__m512i)(v1), (int)(scale))
@ -8320,54 +8357,105 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kand (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kandn (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kor (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS
_mm512_kortestc (__mmask16 __A, __mmask16 __B)
{
return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ int __DEFAULT_FN_ATTRS512
static __inline__ int __DEFAULT_FN_ATTRS
_mm512_kortestz (__mmask16 __A, __mmask16 __B)
{
return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
{
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kxnor (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kxor (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
}
#define _kand_mask16 _mm512_kand
#define _kandn_mask16 _mm512_kandn
#define _knot_mask16 _mm512_knot
#define _kor_mask16 _mm512_kor
#define _kxnor_mask16 _mm512_kxnor
#define _kxor_mask16 _mm512_kxor
#define _kshiftli_mask16(A, I) \
(__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
#define _kshiftri_mask16(A, I) \
(__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_cvtmask16_u32(__mmask16 __A) {
return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_cvtu32_mask16(unsigned int __A) {
return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_load_mask16(__mmask16 *__A) {
return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
}
static __inline__ void __DEFAULT_FN_ATTRS
_store_mask16(__mmask16 *__A, __mmask16 __B) {
*(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
}
static __inline__ void __DEFAULT_FN_ATTRS512
_mm512_stream_si512 (__m512i * __P, __m512i __A)
{
@ -9594,5 +9682,6 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
#undef __DEFAULT_FN_ATTRS512
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS
#endif /* __AVX512FINTRIN_H */

View File

@ -33,78 +33,78 @@
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (int const *)(addr), \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16) -1, \
(__v16si)(__m512i)(index), (int const *)(addr), \
(__v16si)(__m512i)(index), (void const *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
(long long const *)(addr), (int)(scale), \
(void const *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(int const *)(addr), (int)(scale), (int)(hint))
(void const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
(int const *)(addr), (int)(scale), (int)(hint))
(void const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint))
(void *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (int *)(addr), \
(__v16si)(__m512i)(index), (void *)(addr), \
(int)(scale), (int)(hint))
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(long long *)(addr), (int)(scale), \
(void *)(addr), (int)(scale), \
(int)(hint))
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint))
(void *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint))
(void *)(addr), (int)(scale), (int)(hint))
#undef __DEFAULT_FN_ATTRS

View File

@ -227,167 +227,141 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
(__v32hi)_mm512_setzero_si512())
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
(__v8di)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
return (__m512i)__builtin_ia32_selectq_512(__U,
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
(__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
return (__m512i)__builtin_ia32_selectq_512(__U,
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
(__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
(__v32hi)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
return (__m512i)__builtin_ia32_selectw_512(__U,
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
(__v32hi)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
return (__m512i)__builtin_ia32_selectw_512(__U,
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
(__v32hi)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
(__v8di)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
return (__m512i)__builtin_ia32_selectq_512(__U,
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
(__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
return (__m512i)__builtin_ia32_selectq_512(__U,
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
return (__m512i) __builtin_ia32_selectd_512(__U,
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
(__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
return (__m512i) __builtin_ia32_selectd_512(__U,
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
(__v32hi)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
{
return (__m512i)__builtin_ia32_selectw_512(__U,
(__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
(__v32hi)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
{
return (__m512i)__builtin_ia32_selectw_512(__U,
(__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
(__v32hi)_mm512_setzero_si512());
}

View File

@ -91,30 +91,26 @@ _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
(__v64qi) __Y,
(__v64qi) __W,
(__mmask64) __M);
return (__m512i)__builtin_ia32_vpmultishiftqb512((__v64qi)__X, (__v64qi) __Y);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X,
__m512i __Y)
{
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
(__v64qi) __Y,
(__v64qi) _mm512_setzero_si512 (),
(__mmask64) __M);
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
(__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
(__v64qi) __Y,
(__v64qi) _mm512_undefined_epi32 (),
(__mmask64) -1);
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
(__v64qi)_mm512_setzero_si512());
}

View File

@ -150,61 +150,49 @@ _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
(__v16qi) __Y,
(__v16qi) __W,
(__mmask16) __M);
return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
__m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
(__v16qi) __Y,
(__v16qi)
_mm_setzero_si128 (),
(__mmask16) __M);
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
(__v16qi)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
(__v16qi) __Y,
(__v16qi)
_mm_undefined_si128 (),
(__mmask16) -1);
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
(__v16qi)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
(__v32qi) __Y,
(__v32qi) __W,
(__mmask32) __M);
return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
__m256i __Y)
{
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
(__v32qi) __Y,
(__v32qi)
_mm256_setzero_si256 (),
(__mmask32) __M);
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
(__v32qi)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
(__v32qi) __Y,
(__v32qi)
_mm256_undefined_si256 (),
(__mmask32) -1);
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
(__v32qi)_mm256_setzero_si256());
}

View File

@ -2297,6 +2297,15 @@ _mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
(__v32qi) _mm256_setzero_si256());
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi16 (void const *__P)
{
struct __loadu_epi16 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi16*)__P)->__v;
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
{
@ -2314,6 +2323,15 @@ _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
(__mmask8) __U);
}
static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi16 (void const *__P)
{
struct __loadu_epi16 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi16*)__P)->__v;
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
{
@ -2331,6 +2349,15 @@ _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
(__mmask16) __U);
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi8 (void const *__P)
{
struct __loadu_epi8 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi8*)__P)->__v;
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
{
@ -2348,6 +2375,15 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
(__mmask16) __U);
}
static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi8 (void const *__P)
{
struct __loadu_epi8 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi8*)__P)->__v;
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
{
@ -2364,7 +2400,17 @@ _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
_mm256_setzero_si256 (),
(__mmask32) __U);
}
static __inline__ void __DEFAULT_FN_ATTRS256
static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi16 (void *__P, __m128i __A)
{
struct __storeu_epi16 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi16*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
{
__builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
@ -2372,6 +2418,15 @@ _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi16 (void *__P, __m256i __A)
{
struct __storeu_epi16 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi16*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
{
@ -2380,6 +2435,15 @@ _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
(__mmask16) __U);
}
static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi8 (void *__P, __m128i __A)
{
struct __storeu_epi8 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi8*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
{
@ -2388,6 +2452,15 @@ _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
(__mmask16) __U);
}
static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi8 (void *__P, __m256i __A)
{
struct __storeu_epi8 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi8*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
{

View File

@ -461,11 +461,17 @@ _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
(__v4si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_and_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8su)__a & (__v8su)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_and_si256(__A, __B),
(__v8si)_mm256_and_epi32(__A, __B),
(__v8si)__W);
}
@ -475,11 +481,17 @@ _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_and_epi32(__m128i __a, __m128i __b)
{
return (__m128i)((__v4su)__a & (__v4su)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_and_si128(__A, __B),
(__v4si)_mm_and_epi32(__A, __B),
(__v4si)__W);
}
@ -489,11 +501,17 @@ _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_andnot_epi32(__m256i __A, __m256i __B)
{
return (__m256i)(~(__v8su)__A & (__v8su)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_andnot_si256(__A, __B),
(__v8si)_mm256_andnot_epi32(__A, __B),
(__v8si)__W);
}
@ -504,25 +522,37 @@ _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
__U, __A, __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_andnot_epi32(__m128i __A, __m128i __B)
{
return (__m128i)(~(__v4su)__A & (__v4su)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_andnot_si128(__A, __B),
(__v4si)_mm_andnot_epi32(__A, __B),
(__v4si)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_or_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8su)__a | (__v8su)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_or_si256(__A, __B),
(__v8si)_mm256_or_epi32(__A, __B),
(__v8si)__W);
}
@ -532,11 +562,17 @@ _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_or_epi32(__m128i __a, __m128i __b)
{
return (__m128i)((__v4su)__a | (__v4su)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_or_si128(__A, __B),
(__v4si)_mm_or_epi32(__A, __B),
(__v4si)__W);
}
@ -546,11 +582,17 @@ _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_xor_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8su)__a ^ (__v8su)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_xor_si256(__A, __B),
(__v8si)_mm256_xor_epi32(__A, __B),
(__v8si)__W);
}
@ -561,11 +603,16 @@ _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
__m128i __B)
_mm_xor_epi32(__m128i __a, __m128i __b)
{
return (__m128i)((__v4su)__a ^ (__v4su)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_xor_si128(__A, __B),
(__v4si)_mm_xor_epi32(__A, __B),
(__v4si)__W);
}
@ -575,11 +622,17 @@ _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_and_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a & (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_and_si256(__A, __B),
(__v4di)_mm256_and_epi64(__A, __B),
(__v4di)__W);
}
@ -589,11 +642,17 @@ _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_and_epi64(__m128i __a, __m128i __b)
{
return (__m128i)((__v2du)__a & (__v2du)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_and_si128(__A, __B),
(__v2di)_mm_and_epi64(__A, __B),
(__v2di)__W);
}
@ -603,11 +662,17 @@ _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_andnot_epi64(__m256i __A, __m256i __B)
{
return (__m256i)(~(__v4du)__A & (__v4du)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_andnot_si256(__A, __B),
(__v4di)_mm256_andnot_epi64(__A, __B),
(__v4di)__W);
}
@ -618,11 +683,17 @@ _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
__U, __A, __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_andnot_epi64(__m128i __A, __m128i __B)
{
return (__m128i)(~(__v2du)__A & (__v2du)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_andnot_si128(__A, __B),
(__v2di)_mm_andnot_epi64(__A, __B),
(__v2di)__W);
}
@ -632,11 +703,17 @@ _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_or_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a | (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_or_si256(__A, __B),
(__v4di)_mm256_or_epi64(__A, __B),
(__v4di)__W);
}
@ -646,11 +723,17 @@ _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_or_epi64(__m128i __a, __m128i __b)
{
return (__m128i)((__v2du)__a | (__v2du)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_or_si128(__A, __B),
(__v2di)_mm_or_epi64(__A, __B),
(__v2di)__W);
}
@ -660,11 +743,17 @@ _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_xor_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a ^ (__v4du)__b);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_xor_si256(__A, __B),
(__v4di)_mm256_xor_epi64(__A, __B),
(__v4di)__W);
}
@ -674,12 +763,18 @@ _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_xor_epi64(__m128i __a, __m128i __b)
{
return (__m128i)((__v2du)__a ^ (__v2du)__b);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
__m128i __B)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_xor_si128(__A, __B),
(__v2di)_mm_xor_epi64(__A, __B),
(__v2di)__W);
}
@ -3389,162 +3484,162 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
}
#define _mm_i64scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_i64scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm256_i64scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm_i64scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_i64scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
(__v2di)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
(__v2di)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm256_i64scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
(__v4di)(__m256i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
(__v4di)(__m256i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm_i32scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v2df)(__m128d)(v1), (int)(scale))
#define _mm_i32scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v2di)(__m128i)(v1), (int)(scale))
#define _mm256_i32scatter_pd(addr, index, v1, scale) \
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v4df)(__m256d)(v1), (int)(scale))
#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v4di)(__m256i)(v1), (int)(scale))
#define _mm_i32scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
(int)(scale))
#define _mm_i32scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
(__v4si)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
(__v4si)(__m128i)(index), \
(__v4si)(__m128i)(v1), (int)(scale))
#define _mm256_i32scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
(int)(scale))
#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
(int)(scale))
#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
(__v8si)(__m256i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
__builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
__builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
(__v8si)(__m256i)(index), \
(__v8si)(__m256i)(v1), (int)(scale))
@ -4989,6 +5084,12 @@ _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
(__v8si) _mm256_setzero_si256 ());
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_load_epi32 (void const *__P)
{
return *(__m128i *) __P;
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{
@ -5008,6 +5109,12 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
__U);
}
static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_load_epi32 (void const *__P)
{
return *(__m256i *) __P;
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
{
@ -5027,6 +5134,12 @@ _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
__U);
}
static __inline void __DEFAULT_FN_ATTRS128
_mm_store_epi32 (void *__P, __m128i __A)
{
*(__m128i *) __P = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
{
@ -5035,6 +5148,12 @@ _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline void __DEFAULT_FN_ATTRS256
_mm256_store_epi32 (void *__P, __m256i __A)
{
*(__m256i *) __P = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
{
@ -5075,6 +5194,12 @@ _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
(__v4di) _mm256_setzero_si256 ());
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_load_epi64 (void const *__P)
{
return *(__m128i *) __P;
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{
@ -5094,6 +5219,12 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
__U);
}
static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_load_epi64 (void const *__P)
{
return *(__m256i *) __P;
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
{
@ -5113,6 +5244,12 @@ _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
__U);
}
static __inline void __DEFAULT_FN_ATTRS128
_mm_store_epi64 (void *__P, __m128i __A)
{
*(__m128i *) __P = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
{
@ -5121,6 +5258,12 @@ _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline void __DEFAULT_FN_ATTRS256
_mm256_store_epi64 (void *__P, __m256i __A)
{
*(__m256i *) __P = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
{
@ -5366,6 +5509,15 @@ _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
(__mmask8) __U);
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi64 (void const *__P)
{
struct __loadu_epi64 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi64*)__P)->__v;
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{
@ -5383,6 +5535,15 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
(__mmask8) __U);
}
static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi64 (void const *__P)
{
struct __loadu_epi64 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi64*)__P)->__v;
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
{
@ -5400,6 +5561,15 @@ _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
(__mmask8) __U);
}
static __inline __m128i __DEFAULT_FN_ATTRS128
_mm_loadu_epi32 (void const *__P)
{
struct __loadu_epi32 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi32*)__P)->__v;
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{
@ -5417,6 +5587,15 @@ _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
(__mmask8) __U);
}
static __inline __m256i __DEFAULT_FN_ATTRS256
_mm256_loadu_epi32 (void const *__P)
{
struct __loadu_epi32 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
return ((struct __loadu_epi32*)__P)->__v;
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
{
@ -5534,6 +5713,15 @@ _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
(__mmask8) __U);
}
static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi64 (void *__P, __m128i __A)
{
struct __storeu_epi64 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi64*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
{
@ -5542,6 +5730,15 @@ _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi64 (void *__P, __m256i __A)
{
struct __storeu_epi64 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi64*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
{
@ -5550,6 +5747,15 @@ _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
(__mmask8) __U);
}
static __inline void __DEFAULT_FN_ATTRS128
_mm_storeu_epi32 (void *__P, __m128i __A)
{
struct __storeu_epi32 {
__m128i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi32*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
{
@ -5558,6 +5764,15 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline void __DEFAULT_FN_ATTRS256
_mm256_storeu_epi32 (void *__P, __m256i __A)
{
struct __storeu_epi32 {
__m256i __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_epi32*)__P)->__v = __A;
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
{
@ -7769,97 +7984,97 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
(__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
(__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
(__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
(__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
(__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
(__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
(double const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
(long long const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
(__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
(__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
(__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
(float const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))
#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
(__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
(int const *)(addr), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
(__mmask8)(mask), (int)(scale))

View File

@ -421,327 +421,279 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
(__v8hi)_mm_setzero_si128())
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
(__v4di)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
return (__m256i)__builtin_ia32_selectq_256(__U,
(__v4di)_mm256_shldv_epi64(__A, __B, __C),
(__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
(__mmask8) -1);
return (__m256i)__builtin_ia32_selectq_256(__U,
(__v4di)_mm256_shldv_epi64(__A, __B, __C),
(__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
(__v2di)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
_mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
return (__m128i)__builtin_ia32_selectq_128(__U,
(__v2di)_mm_shldv_epi64(__A, __B, __C),
(__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
(__mmask8) -1);
return (__m128i)__builtin_ia32_selectq_128(__U,
(__v2di)_mm_shldv_epi64(__A, __B, __C),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_shldv_epi32(__A, __B, __C),
(__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_shldv_epi32(__A, __B, __C),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
_mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_shldv_epi32(__A, __B, __C),
(__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_shldv_epi32(__A, __B, __C),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
(__v16hi)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
return (__m256i)__builtin_ia32_selectw_256(__U,
(__v16hi)_mm256_shldv_epi16(__A, __B, __C),
(__v16hi)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
(__mmask16) -1);
return (__m256i)__builtin_ia32_selectw_256(__U,
(__v16hi)_mm256_shldv_epi16(__A, __B, __C),
(__v16hi)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
(__v8hi)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
_mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
return (__m128i)__builtin_ia32_selectw_128(__U,
(__v8hi)_mm_shldv_epi16(__A, __B, __C),
(__v8hi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
(__mmask8) -1);
return (__m128i)__builtin_ia32_selectw_128(__U,
(__v8hi)_mm_shldv_epi16(__A, __B, __C),
(__v8hi)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
(__v4di)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
return (__m256i)__builtin_ia32_selectq_256(__U,
(__v4di)_mm256_shrdv_epi64(__A, __B, __C),
(__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
(__mmask8) -1);
return (__m256i)__builtin_ia32_selectq_256(__U,
(__v4di)_mm256_shrdv_epi64(__A, __B, __C),
(__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
(__v2di)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
return (__m128i)__builtin_ia32_selectq_128(__U,
(__v2di)_mm_shrdv_epi64(__A, __B, __C),
(__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
(__mmask8) -1);
return (__m128i)__builtin_ia32_selectq_128(__U,
(__v2di)_mm_shrdv_epi64(__A, __B, __C),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_shrdv_epi32(__A, __B, __C),
(__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_shrdv_epi32(__A, __B, __C),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_shrdv_epi32(__A, __B, __C),
(__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_shrdv_epi32(__A, __B, __C),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
(__v16hi)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
return (__m256i)__builtin_ia32_selectw_256(__U,
(__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
(__v16hi)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
(__mmask16) -1);
return (__m256i)__builtin_ia32_selectw_256(__U,
(__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
(__v16hi)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
(__v8hi)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
return (__m128i)__builtin_ia32_selectw_128(__U,
(__v8hi)_mm_shrdv_epi16(__A, __B, __C),
(__v8hi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
(__mmask8) -1);
return (__m128i)__builtin_ia32_selectw_128(__U,
(__v8hi)_mm_shrdv_epi16(__A, __B, __C),
(__v8hi)_mm_setzero_si128());
}

10
c_headers/bmiintrin.h vendored
View File

@ -62,7 +62,7 @@
static __inline__ unsigned short __RELAXED_FN_ATTRS
__tzcnt_u16(unsigned short __X)
{
return __X ? __builtin_ctzs(__X) : 16;
return __builtin_ia32_tzcnt_u16(__X);
}
/// Performs a bitwise AND of the second operand with the one's
@ -196,7 +196,7 @@ __blsr_u32(unsigned int __X)
static __inline__ unsigned int __RELAXED_FN_ATTRS
__tzcnt_u32(unsigned int __X)
{
return __X ? __builtin_ctz(__X) : 32;
return __builtin_ia32_tzcnt_u32(__X);
}
/// Counts the number of trailing zero bits in the operand.
@ -212,7 +212,7 @@ __tzcnt_u32(unsigned int __X)
static __inline__ int __RELAXED_FN_ATTRS
_mm_tzcnt_32(unsigned int __X)
{
return __X ? __builtin_ctz(__X) : 32;
return __builtin_ia32_tzcnt_u32(__X);
}
#ifdef __x86_64__
@ -359,7 +359,7 @@ __blsr_u64(unsigned long long __X)
static __inline__ unsigned long long __RELAXED_FN_ATTRS
__tzcnt_u64(unsigned long long __X)
{
return __X ? __builtin_ctzll(__X) : 64;
return __builtin_ia32_tzcnt_u64(__X);
}
/// Counts the number of trailing zero bits in the operand.
@ -375,7 +375,7 @@ __tzcnt_u64(unsigned long long __X)
static __inline__ long long __RELAXED_FN_ATTRS
_mm_tzcnt_64(unsigned long long __X)
{
return __X ? __builtin_ctzll(__X) : 64;
return __builtin_ia32_tzcnt_u64(__X);
}
#endif /* __x86_64__ */

View File

@ -73,10 +73,12 @@ __device__ inline void operator delete[](void *ptr,
// Sized delete, C++14 only.
#if __cplusplus >= 201402L
__device__ void operator delete(void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
__device__ inline void operator delete(void *ptr,
__SIZE_TYPE__ size) CUDA_NOEXCEPT {
::operator delete(ptr);
}
__device__ void operator delete[](void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
__device__ inline void operator delete[](void *ptr,
__SIZE_TYPE__ size) CUDA_NOEXCEPT {
::operator delete(ptr);
}
#endif

107
c_headers/emmintrin.h vendored
View File

@ -1675,7 +1675,49 @@ _mm_loadu_si64(void const *__a)
long long __v;
} __attribute__((__packed__, __may_alias__));
long long __u = ((struct __loadu_si64*)__a)->__v;
return __extension__ (__m128i)(__v2di){__u, 0L};
return __extension__ (__m128i)(__v2di){__u, 0LL};
}
/// Loads a 32-bit integer value to the low element of a 128-bit integer
/// vector and clears the upper element.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
///
/// \param __a
/// A pointer to a 32-bit memory location. The address of the memory
/// location does not have to be aligned.
/// \returns A 128-bit vector of [4 x i32] containing the loaded value.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_loadu_si32(void const *__a)
{
struct __loadu_si32 {
int __v;
} __attribute__((__packed__, __may_alias__));
int __u = ((struct __loadu_si32*)__a)->__v;
return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
}
/// Loads a 16-bit integer value to the low element of a 128-bit integer
/// vector and clears the upper element.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic does not correspond to a specific instruction.
///
/// \param __a
/// A pointer to a 16-bit memory location. The address of the memory
/// location does not have to be aligned.
/// \returns A 128-bit vector of [8 x i16] containing the loaded value.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_loadu_si16(void const *__a)
{
struct __loadu_si16 {
short __v;
} __attribute__((__packed__, __may_alias__));
short __u = ((struct __loadu_si16*)__a)->__v;
return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
}
/// Loads a 64-bit double-precision value to the low element of a
@ -3993,6 +4035,69 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
((struct __storeu_si128*)__p)->__v = __b;
}
/// Stores a 64-bit integer value from the low element of a 128-bit integer
/// vector.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
///
/// \param __p
/// A pointer to a 64-bit memory location. The address of the memory
/// location does not have to be algned.
/// \param __b
/// A 128-bit integer vector containing the value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeu_si64(void const *__p, __m128i __b)
{
struct __storeu_si64 {
long long __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
}
/// Stores a 32-bit integer value from the low element of a 128-bit integer
/// vector.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
///
/// \param __p
/// A pointer to a 32-bit memory location. The address of the memory
/// location does not have to be aligned.
/// \param __b
/// A 128-bit integer vector containing the value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeu_si32(void const *__p, __m128i __b)
{
struct __storeu_si32 {
int __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
}
/// Stores a 16-bit integer value from the low element of a 128-bit integer
/// vector.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic does not correspond to a specific instruction.
///
/// \param __p
/// A pointer to a 16-bit memory location. The address of the memory
/// location does not have to be aligned.
/// \param __b
/// A 128-bit integer vector containing the value to be stored.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeu_si16(void const *__p, __m128i __b)
{
struct __storeu_si16 {
short __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
}
/// Moves bytes selected by the mask from the first operand to the
/// specified unaligned memory location. When a mask bit is 1, the
/// corresponding byte is written, otherwise it is not written.

12
c_headers/float.h vendored
View File

@ -21,8 +21,8 @@
*===-----------------------------------------------------------------------===
*/
#ifndef __FLOAT_H
#define __FLOAT_H
#ifndef __CLANG_FLOAT_H
#define __CLANG_FLOAT_H
/* If we're on MinGW, fall back to the system's float.h, which might have
* additional definitions provided for Windows.
@ -85,6 +85,9 @@
# undef FLT_DECIMAL_DIG
# undef DBL_DECIMAL_DIG
# undef LDBL_DECIMAL_DIG
# undef FLT_HAS_SUBNORM
# undef DBL_HAS_SUBNORM
# undef LDBL_HAS_SUBNORM
# endif
#endif
@ -141,6 +144,9 @@
# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__
# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__
# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
#endif
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
@ -157,4 +163,4 @@
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
#endif /* __FLOAT_H */
#endif /* __CLANG_FLOAT_H */

59
c_headers/immintrin.h vendored
View File

@ -306,6 +306,65 @@ _writegsbase_u64(unsigned long long __V)
#endif
#endif /* __FSGSBASE__ */
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
/* The structs used below are to force the load/store to be unaligned. This
* is accomplished with the __packed__ attribute. The __may_alias__ prevents
* tbaa metadata from being generated based on the struct and the type of the
* field inside of it.
*/
static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P) {
struct __loadu_i16 {
short __v;
} __attribute__((__packed__, __may_alias__));
return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P, short __D) {
struct __storeu_i16 {
short __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P) {
struct __loadu_i32 {
int __v;
} __attribute__((__packed__, __may_alias__));
return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P, int __D) {
struct __storeu_i32 {
int __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
}
#ifdef __x86_64__
static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P) {
struct __loadu_i64 {
long long __v;
} __attribute__((__packed__, __may_alias__));
return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P, long long __D) {
struct __storeu_i64 {
long long __v;
} __attribute__((__packed__, __may_alias__));
((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
}
#endif
#endif /* __MOVBE */
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
#include <rtmintrin.h>
#include <xtestintrin.h>

538
c_headers/intrin.h vendored
View File

@ -90,8 +90,6 @@ void __inwordstring(unsigned short, unsigned short *, unsigned long);
void __lidt(void *);
unsigned __int64 __ll_lshift(unsigned __int64, int);
__int64 __ll_rshift(__int64, int);
unsigned int __lzcnt(unsigned int);
unsigned short __lzcnt16(unsigned short);
static __inline__
void __movsb(unsigned char *, unsigned char const *, size_t);
static __inline__
@ -219,7 +217,6 @@ void __incgsbyte(unsigned long);
void __incgsdword(unsigned long);
void __incgsqword(unsigned long);
void __incgsword(unsigned long);
unsigned __int64 __lzcnt64(unsigned __int64);
static __inline__
void __movsq(unsigned long long *, unsigned long long const *, size_t);
static __inline__
@ -329,189 +326,63 @@ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
|* Interlocked Exchange Add
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value) {
return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);
}
char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);
char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);
char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);
short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);
short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);
short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);
long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);
long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);
long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);
__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);
__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);
__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Increment
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedIncrement16_acq(short volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedIncrement16_nf(short volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedIncrement16_rel(short volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedIncrement_acq(long volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedIncrement_nf(long volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedIncrement_rel(long volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedIncrement64_acq(__int64 volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedIncrement64_nf(__int64 volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedIncrement64_rel(__int64 volatile *_Value) {
return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);
}
short _InterlockedIncrement16_acq(short volatile *_Value);
short _InterlockedIncrement16_nf(short volatile *_Value);
short _InterlockedIncrement16_rel(short volatile *_Value);
long _InterlockedIncrement_acq(long volatile *_Value);
long _InterlockedIncrement_nf(long volatile *_Value);
long _InterlockedIncrement_rel(long volatile *_Value);
__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);
__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);
__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Decrement
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedDecrement16_acq(short volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedDecrement16_nf(short volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedDecrement16_rel(short volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedDecrement_acq(long volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedDecrement_nf(long volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedDecrement_rel(long volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedDecrement64_acq(__int64 volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedDecrement64_nf(__int64 volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedDecrement64_rel(__int64 volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);
}
short _InterlockedDecrement16_acq(short volatile *_Value);
short _InterlockedDecrement16_nf(short volatile *_Value);
short _InterlockedDecrement16_rel(short volatile *_Value);
long _InterlockedDecrement_acq(long volatile *_Value);
long _InterlockedDecrement_nf(long volatile *_Value);
long _InterlockedDecrement_rel(long volatile *_Value);
__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);
__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);
__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked And
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedAnd8_acq(char volatile *_Value, char _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedAnd8_nf(char volatile *_Value, char _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedAnd8_rel(char volatile *_Value, char _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedAnd16_acq(short volatile *_Value, short _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedAnd16_nf(short volatile *_Value, short _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedAnd16_rel(short volatile *_Value, short _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedAnd_acq(long volatile *_Value, long _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedAnd_nf(long volatile *_Value, long _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedAnd_rel(long volatile *_Value, long _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);
}
char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);
char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);
char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);
short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);
short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);
short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);
long _InterlockedAnd_acq(long volatile *_Value, long _Mask);
long _InterlockedAnd_nf(long volatile *_Value, long _Mask);
long _InterlockedAnd_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Bit Counting and Testing
@ -534,261 +405,81 @@ unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
|* Interlocked Or
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedOr8_acq(char volatile *_Value, char _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedOr8_nf(char volatile *_Value, char _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedOr8_rel(char volatile *_Value, char _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedOr16_acq(short volatile *_Value, short _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedOr16_nf(short volatile *_Value, short _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedOr16_rel(short volatile *_Value, short _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedOr_acq(long volatile *_Value, long _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedOr_nf(long volatile *_Value, long _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedOr_rel(long volatile *_Value, long _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);
}
char _InterlockedOr8_acq(char volatile *_Value, char _Mask);
char _InterlockedOr8_nf(char volatile *_Value, char _Mask);
char _InterlockedOr8_rel(char volatile *_Value, char _Mask);
short _InterlockedOr16_acq(short volatile *_Value, short _Mask);
short _InterlockedOr16_nf(short volatile *_Value, short _Mask);
short _InterlockedOr16_rel(short volatile *_Value, short _Mask);
long _InterlockedOr_acq(long volatile *_Value, long _Mask);
long _InterlockedOr_nf(long volatile *_Value, long _Mask);
long _InterlockedOr_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Xor
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedXor8_acq(char volatile *_Value, char _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedXor8_nf(char volatile *_Value, char _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedXor8_rel(char volatile *_Value, char _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedXor16_acq(short volatile *_Value, short _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedXor16_nf(short volatile *_Value, short _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedXor16_rel(short volatile *_Value, short _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedXor_acq(long volatile *_Value, long _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedXor_nf(long volatile *_Value, long _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedXor_rel(long volatile *_Value, long _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);
}
char _InterlockedXor8_acq(char volatile *_Value, char _Mask);
char _InterlockedXor8_nf(char volatile *_Value, char _Mask);
char _InterlockedXor8_rel(char volatile *_Value, char _Mask);
short _InterlockedXor16_acq(short volatile *_Value, short _Mask);
short _InterlockedXor16_nf(short volatile *_Value, short _Mask);
short _InterlockedXor16_rel(short volatile *_Value, short _Mask);
long _InterlockedXor_acq(long volatile *_Value, long _Mask);
long _InterlockedXor_nf(long volatile *_Value, long _Mask);
long _InterlockedXor_rel(long volatile *_Value, long _Mask);
__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);
__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchange8_acq(char volatile *_Target, char _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
return _Value;
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchange8_nf(char volatile *_Target, char _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
return _Value;
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedExchange8_rel(char volatile *_Target, char _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
return _Value;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchange16_acq(short volatile *_Target, short _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
return _Value;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchange16_nf(short volatile *_Target, short _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
return _Value;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedExchange16_rel(short volatile *_Target, short _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
return _Value;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_acq(long volatile *_Target, long _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
return _Value;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_nf(long volatile *_Target, long _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
return _Value;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_rel(long volatile *_Target, long _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
return _Value;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);
return _Value;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);
return _Value;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value) {
__atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);
return _Value;
}
char _InterlockedExchange8_acq(char volatile *_Target, char _Value);
char _InterlockedExchange8_nf(char volatile *_Target, char _Value);
char _InterlockedExchange8_rel(char volatile *_Target, char _Value);
short _InterlockedExchange16_acq(short volatile *_Target, short _Value);
short _InterlockedExchange16_nf(short volatile *_Target, short _Value);
short _InterlockedExchange16_rel(short volatile *_Target, short _Value);
long _InterlockedExchange_acq(long volatile *_Target, long _Value);
long _InterlockedExchange_nf(long volatile *_Target, long _Value);
long _InterlockedExchange_rel(long volatile *_Target, long _Value);
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Compare Exchange
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedCompareExchange8_acq(char volatile *_Destination,
char _Exchange, char _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
return _Comparand;
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedCompareExchange8_nf(char volatile *_Destination,
char _Exchange, char _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return _Comparand;
}
static __inline__ char __DEFAULT_FN_ATTRS
_InterlockedCompareExchange8_rel(char volatile *_Destination,
char _Exchange, char _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
return _Comparand;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedCompareExchange16_acq(short volatile *_Destination,
short _Exchange, short _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
return _Comparand;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedCompareExchange16_nf(short volatile *_Destination,
short _Exchange, short _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return _Comparand;
}
static __inline__ short __DEFAULT_FN_ATTRS
_InterlockedCompareExchange16_rel(short volatile *_Destination,
short _Exchange, short _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
return _Comparand;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_acq(long volatile *_Destination,
long _Exchange, long _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
return _Comparand;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_nf(long volatile *_Destination,
long _Exchange, long _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return _Comparand;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_rel(long volatile *_Destination,
long _Exchange, long _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
return _Comparand;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
return _Comparand;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return _Comparand;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
__ATOMIC_SEQ_CST, __ATOMIC_RELEASE);
return _Comparand;
}
char _InterlockedCompareExchange8_acq(char volatile *_Destination,
char _Exchange, char _Comparand);
char _InterlockedCompareExchange8_nf(char volatile *_Destination,
char _Exchange, char _Comparand);
char _InterlockedCompareExchange8_rel(char volatile *_Destination,
char _Exchange, char _Comparand);
short _InterlockedCompareExchange16_acq(short volatile *_Destination,
short _Exchange, short _Comparand);
short _InterlockedCompareExchange16_nf(short volatile *_Destination,
short _Exchange, short _Comparand);
short _InterlockedCompareExchange16_rel(short volatile *_Destination,
short _Exchange, short _Comparand);
long _InterlockedCompareExchange_acq(long volatile *_Destination,
long _Exchange, long _Comparand);
long _InterlockedCompareExchange_nf(long volatile *_Destination,
long _Exchange, long _Comparand);
long _InterlockedCompareExchange_rel(long volatile *_Destination,
long _Exchange, long _Comparand);
__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
#endif
/*----------------------------------------------------------------------------*\
@ -841,7 +532,7 @@ __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
static __inline__ void __DEFAULT_FN_ATTRS
__cpuid(int __info[4], int __level) {
__asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
: "a"(__level));
: "a"(__level), "c"(0));
}
static __inline__ void __DEFAULT_FN_ATTRS
__cpuidex(int __info[4], int __level, int __ecx) {
@ -858,12 +549,35 @@ static __inline__ void __DEFAULT_FN_ATTRS
__halt(void) {
__asm__ volatile ("hlt");
}
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
static __inline__ void __DEFAULT_FN_ATTRS
__nop(void) {
__asm__ volatile ("nop");
}
#endif
/*----------------------------------------------------------------------------*\
|* MS AArch64 specific
\*----------------------------------------------------------------------------*/
#if defined(__aarch64__)
unsigned __int64 __getReg(int);
long _InterlockedAdd(long volatile *Addend, long Value);
__int64 _ReadStatusReg(int);
void _WriteStatusReg(int, __int64);
static inline unsigned short _byteswap_ushort (unsigned short val) {
return __builtin_bswap16(val);
}
static inline unsigned long _byteswap_ulong (unsigned long val) {
return __builtin_bswap32(val);
}
static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) {
return __builtin_bswap64(val);
}
#endif
/*----------------------------------------------------------------------------*\
|* Privileged intrinsics
\*----------------------------------------------------------------------------*/

View File

@ -31,6 +31,7 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
#ifndef _MSC_VER
/// Counts the number of leading zero bits in the operand.
///
/// \headerfile <x86intrin.h>
@ -41,11 +42,8 @@
/// An unsigned 16-bit integer whose leading zeros are to be counted.
/// \returns An unsigned 16-bit integer containing the number of leading zero
/// bits in the operand.
static __inline__ unsigned short __DEFAULT_FN_ATTRS
__lzcnt16(unsigned short __X)
{
return __X ? __builtin_clzs(__X) : 16;
}
#define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X))
#endif // _MSC_VER
/// Counts the number of leading zero bits in the operand.
///
@ -61,7 +59,7 @@ __lzcnt16(unsigned short __X)
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__lzcnt32(unsigned int __X)
{
return __X ? __builtin_clz(__X) : 32;
return __builtin_ia32_lzcnt_u32(__X);
}
/// Counts the number of leading zero bits in the operand.
@ -78,10 +76,11 @@ __lzcnt32(unsigned int __X)
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_lzcnt_u32(unsigned int __X)
{
return __X ? __builtin_clz(__X) : 32;
return __builtin_ia32_lzcnt_u32(__X);
}
#ifdef __x86_64__
#ifndef _MSC_VER
/// Counts the number of leading zero bits in the operand.
///
/// \headerfile <x86intrin.h>
@ -93,11 +92,8 @@ _lzcnt_u32(unsigned int __X)
/// \returns An unsigned 64-bit integer containing the number of leading zero
/// bits in the operand.
/// \see _lzcnt_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__lzcnt64(unsigned long long __X)
{
return __X ? __builtin_clzll(__X) : 64;
}
#define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X))
#endif // _MSC_VER
/// Counts the number of leading zero bits in the operand.
///
@ -113,7 +109,7 @@ __lzcnt64(unsigned long long __X)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_lzcnt_u64(unsigned long long __X)
{
return __X ? __builtin_clzll(__X) : 64;
return __builtin_ia32_lzcnt_u64(__X);
}
#endif

657
c_headers/opencl-c.h vendored
View File

@ -22,6 +22,14 @@
#endif //cl_khr_3d_image_writes
#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
#ifndef cl_intel_planar_yuv
#define cl_intel_planar_yuv
#endif // cl_intel_planar_yuv
#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
#pragma OPENCL EXTENSION cl_intel_planar_yuv : end
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
#define __ovld __attribute__((overloadable))
#define __conv __attribute__((convergent))
@ -14602,6 +14610,7 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
@ -14609,6 +14618,7 @@ int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_
int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);
@ -14618,6 +14628,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, f
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
@ -14625,6 +14636,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_
int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
#ifdef cl_khr_depth_images
float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);
@ -14727,6 +14739,8 @@ uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler,
#endif //cl_khr_mipmap_image
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
/**
* Sampler-less Image Access
*/
@ -14760,24 +14774,31 @@ float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);
int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);
uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
// Image read functions returning half4 type
#ifdef cl_khr_fp16
half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);
half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);
half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);
/**
* Sampler-less Image Access
*/
half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);
half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);
half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);
half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
#endif //cl_khr_fp16
// Image read functions for read_write images
@ -15707,7 +15728,6 @@ double __ovld __conv work_group_scan_inclusive_max(double x);
// OpenCL v2.0 s6.13.16 - Pipe Functions
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define PIPE_RESERVE_ID_VALID_BIT (1U << 30)
#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -16193,6 +16213,637 @@ void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, u
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
#endif // cl_intel_subgroups_short
#ifdef cl_intel_device_side_avc_motion_estimation
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0
#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1
#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2
#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3
#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0
#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1
#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)
#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#define CLK_AVC_ME_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
// MCE built-in functions
uchar __ovld
intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(
uchar slice_type, uchar qp);
ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty(
uchar slice_type, uchar qp);
uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty(
uchar slice_type, uchar qp);
uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty(
uchar slice_type, uchar qp);
uint2 __ovld
intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table(
uchar slice_type, uchar qp);
uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty(
uchar slice_type, uchar qp);
uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table();
uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table();
uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table();
uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty();
uchar __ovld
intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty();
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(
uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_inter_shape_penalty(
ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_inter_direction_penalty(
uchar direction_cost, intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_motion_vector_cost_function(
ulong packed_cost_center_delta, uint2 packed_cost_table,
uchar cost_precision, intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_ac_only_haar(
intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_source_interlaced_field_polarity(
uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(
uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
intel_sub_group_avc_mce_payload_t payload);
ulong __ovld intel_sub_group_avc_mce_get_motion_vectors(
intel_sub_group_avc_mce_result_t result);
ushort __ovld intel_sub_group_avc_mce_get_inter_distortions(
intel_sub_group_avc_mce_result_t result);
ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion(
intel_sub_group_avc_mce_result_t result);
uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape(
intel_sub_group_avc_mce_result_t result);
uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes(
intel_sub_group_avc_mce_result_t result);
uchar __ovld intel_sub_group_avc_mce_get_inter_directions(
intel_sub_group_avc_mce_result_t result);
uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count(
intel_sub_group_avc_mce_result_t result);
uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids(
intel_sub_group_avc_mce_result_t result);
uchar __ovld
intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities(
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
intel_sub_group_avc_mce_result_t result);
// IME built-in functions
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_initialize(
ushort2 src_coord, uchar partition_mask, uchar sad_adjustment);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_single_reference(
short2 ref_offset, uchar search_window_config,
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_dual_reference(
short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config,
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_max_motion_vector_count(
uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_unidirectional_mix_disable(
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_early_search_termination_threshold(
uchar threshold, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_weighted_sad(
uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload);
__attribute__((deprecated("If you use the latest Intel driver, please use "
"intel_sub_group_avc_ime_ref_window_size instead",
"intel_sub_group_avc_ime_ref_window_size")))
ushort2 __ovld
intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref);
ushort2 __ovld intel_sub_group_avc_ime_ref_window_size(
uchar search_window_config, char dual_ref);
short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(
short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,
ushort2 image_size);
intel_sub_group_avc_ime_result_t __ovld
intel_sub_group_avc_ime_evaluate_with_single_reference(
read_only image2d_t src_image, read_only image2d_t ref_image,
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_result_t __ovld
intel_sub_group_avc_ime_evaluate_with_dual_reference(
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld
intel_sub_group_avc_ime_evaluate_with_single_reference_streamout(
read_only image2d_t src_image, read_only image2d_t ref_image,
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld
intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout(
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_result_t __ovld
intel_sub_group_avc_ime_evaluate_with_single_reference_streamin(
read_only image2d_t src_image, read_only image2d_t ref_image,
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,
intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);
intel_sub_group_avc_ime_result_t __ovld
intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin(
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
intel_sub_group_avc_ime_payload_t payload,
intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld
intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout(
read_only image2d_t src_image, read_only image2d_t ref_image,
sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,
intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);
intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld
intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
intel_sub_group_avc_ime_payload_t payload,
intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
intel_sub_group_avc_ime_single_reference_streamin_t __ovld
intel_sub_group_avc_ime_get_single_reference_streamin(
intel_sub_group_avc_ime_result_single_reference_streamout_t result);
intel_sub_group_avc_ime_dual_reference_streamin_t __ovld
intel_sub_group_avc_ime_get_dual_reference_streamin(
intel_sub_group_avc_ime_result_dual_reference_streamout_t result);
intel_sub_group_avc_ime_result_t __ovld
intel_sub_group_avc_ime_strip_single_reference_streamout(
intel_sub_group_avc_ime_result_single_reference_streamout_t result);
intel_sub_group_avc_ime_result_t __ovld
intel_sub_group_avc_ime_strip_dual_reference_streamout(
intel_sub_group_avc_ime_result_dual_reference_streamout_t result);
uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
uchar major_shape);
ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
uchar major_shape);
uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
intel_sub_group_avc_ime_result_single_reference_streamout_t result,
uchar major_shape);
uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
uchar major_shape, uchar direction);
ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
uchar major_shape, uchar direction);
uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
uchar major_shape, uchar direction);
uchar __ovld intel_sub_group_avc_ime_get_border_reached(
uchar image_select, intel_sub_group_avc_ime_result_t result);
uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication(
intel_sub_group_avc_ime_result_t result);
uchar __ovld
intel_sub_group_avc_ime_get_unidirectional_early_search_termination(
intel_sub_group_avc_ime_result_t result);
uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector(
intel_sub_group_avc_ime_result_t result);
ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion(
intel_sub_group_avc_ime_result_t result);
// REF built-in functions
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_fme_initialize(
ushort2 src_coord, ulong motion_vectors, uchar major_shapes,
uchar minor_shapes, uchar directions, uchar pixel_resolution,
uchar sad_adjustment);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_bme_initialize(
ushort2 src_coord, ulong motion_vectors, uchar major_shapes,
uchar minor_shapes, uchar directions, uchar pixel_resolution,
uchar bidirectional_weight, uchar sad_adjustment);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_bidirectional_mix_disable(
intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_bilinear_filter_enable(
intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_ref_result_t __ovld
intel_sub_group_avc_ref_evaluate_with_single_reference(
read_only image2d_t src_image, read_only image2d_t ref_image,
sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_ref_result_t __ovld
intel_sub_group_avc_ref_evaluate_with_dual_reference(
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_ref_result_t __ovld
intel_sub_group_avc_ref_evaluate_with_multi_reference(
read_only image2d_t src_image, uint packed_reference_ids,
sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_ref_result_t __ovld
intel_sub_group_avc_ref_evaluate_with_multi_reference(
read_only image2d_t src_image, uint packed_reference_ids,
uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
intel_sub_group_avc_ref_payload_t payload);
// SIC built-in functions
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_initialize(
ushort2 src_coord);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_configure_skc(
uint skip_block_partition_type, uint skip_motion_vector_mask,
ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment,
intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_configure_ipe(
uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,
uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,
uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,
uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_configure_ipe(
uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,
uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,
uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,
ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel,
ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment,
intel_sub_group_avc_sic_payload_t payload);
uint __ovld
intel_sub_group_avc_sic_get_motion_vector_mask(
uint skip_block_partition_type, uchar direction);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_intra_luma_shape_penalty(
uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_intra_luma_mode_cost_function(
uchar luma_mode_penalty, uint luma_packed_neighbor_modes,
uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function(
uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_skc_bilinear_filter_enable(
intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_skc_forward_transform_enable(
ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_block_based_raw_skip_sad(
uchar block_based_skip_type,
intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_result_t __ovld
intel_sub_group_avc_sic_evaluate_ipe(
read_only image2d_t src_image, sampler_t vme_media_sampler,
intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_result_t __ovld
intel_sub_group_avc_sic_evaluate_with_single_reference(
read_only image2d_t src_image, read_only image2d_t ref_image,
sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_result_t __ovld
intel_sub_group_avc_sic_evaluate_with_dual_reference(
read_only image2d_t src_image, read_only image2d_t fwd_ref_image,
read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_result_t __ovld
intel_sub_group_avc_sic_evaluate_with_multi_reference(
read_only image2d_t src_image, uint packed_reference_ids,
sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_result_t __ovld
intel_sub_group_avc_sic_evaluate_with_multi_reference(
read_only image2d_t src_image, uint packed_reference_ids,
uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
intel_sub_group_avc_sic_payload_t payload);
uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(
intel_sub_group_avc_sic_result_t result);
ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion(
intel_sub_group_avc_sic_result_t result);
ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion(
intel_sub_group_avc_sic_result_t result);
ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes(
intel_sub_group_avc_sic_result_t result);
uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode(
intel_sub_group_avc_sic_result_t result);
uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold(
intel_sub_group_avc_sic_result_t result);
ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold(
intel_sub_group_avc_sic_result_t result);
ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads(
intel_sub_group_avc_sic_result_t result);
// Wrappers
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty(
uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty(
uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty(
uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_inter_shape_penalty(
ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_inter_shape_penalty(
ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_inter_shape_penalty(
ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_inter_direction_penalty(
uchar direction_cost, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_inter_direction_penalty(
uchar direction_cost, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_inter_direction_penalty(
uchar direction_cost, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_motion_vector_cost_function(
ulong packed_cost_center_delta, uint2 packed_cost_table,
uchar cost_precision, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_motion_vector_cost_function(
ulong packed_cost_center_delta, uint2 packed_cost_table,
uchar cost_precision, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_motion_vector_cost_function(
ulong packed_cost_center_delta, uint2 packed_cost_table,
uchar cost_precision, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_source_interlaced_field_polarity(
uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_source_interlaced_field_polarity(
uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_source_interlaced_field_polarity(
uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity(
uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity(
uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity(
uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities(
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities(
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities(
uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,
intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_ime_set_ac_only_haar(
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_ref_set_ac_only_haar(
intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_sic_set_ac_only_haar(
intel_sub_group_avc_sic_payload_t payload);
ulong __ovld intel_sub_group_avc_ime_get_motion_vectors(
intel_sub_group_avc_ime_result_t result);
ulong __ovld intel_sub_group_avc_ref_get_motion_vectors(
intel_sub_group_avc_ref_result_t result);
ushort __ovld intel_sub_group_avc_ime_get_inter_distortions(
intel_sub_group_avc_ime_result_t result);
ushort __ovld intel_sub_group_avc_ref_get_inter_distortions(
intel_sub_group_avc_ref_result_t result);
ushort __ovld intel_sub_group_avc_sic_get_inter_distortions(
intel_sub_group_avc_sic_result_t result);
ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion(
intel_sub_group_avc_ime_result_t result);
ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion(
intel_sub_group_avc_ref_result_t result);
uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape(
intel_sub_group_avc_ime_result_t result);
uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape(
intel_sub_group_avc_ref_result_t result);
uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes(
intel_sub_group_avc_ime_result_t result);
uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes(
intel_sub_group_avc_ref_result_t result);
uchar __ovld intel_sub_group_avc_ime_get_inter_directions(
intel_sub_group_avc_ime_result_t result);
uchar __ovld intel_sub_group_avc_ref_get_inter_directions(
intel_sub_group_avc_ref_result_t result);
uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count(
intel_sub_group_avc_ime_result_t result);
uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count(
intel_sub_group_avc_ref_result_t result);
uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids(
intel_sub_group_avc_ime_result_t result);
uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids(
intel_sub_group_avc_ref_result_t result);
uchar __ovld
intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities(
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
intel_sub_group_avc_ime_result_t result);
uchar __ovld
intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities(
uint packed_reference_ids, uint packed_reference_parameter_field_polarities,
intel_sub_group_avc_ref_result_t result);
// Type conversion functions
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_ime_convert_to_mce_payload(
intel_sub_group_avc_ime_payload_t payload);
intel_sub_group_avc_ime_payload_t __ovld
intel_sub_group_avc_mce_convert_to_ime_payload(
intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_ref_convert_to_mce_payload(
intel_sub_group_avc_ref_payload_t payload);
intel_sub_group_avc_ref_payload_t __ovld
intel_sub_group_avc_mce_convert_to_ref_payload(
intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_payload_t __ovld
intel_sub_group_avc_sic_convert_to_mce_payload(
intel_sub_group_avc_sic_payload_t payload);
intel_sub_group_avc_sic_payload_t __ovld
intel_sub_group_avc_mce_convert_to_sic_payload(
intel_sub_group_avc_mce_payload_t payload);
intel_sub_group_avc_mce_result_t __ovld
intel_sub_group_avc_ime_convert_to_mce_result(
intel_sub_group_avc_ime_result_t result);
intel_sub_group_avc_ime_result_t __ovld
intel_sub_group_avc_mce_convert_to_ime_result(
intel_sub_group_avc_mce_result_t result);
intel_sub_group_avc_mce_result_t __ovld
intel_sub_group_avc_ref_convert_to_mce_result(
intel_sub_group_avc_ref_result_t result);
intel_sub_group_avc_ref_result_t __ovld
intel_sub_group_avc_mce_convert_to_ref_result(
intel_sub_group_avc_mce_result_t result);
intel_sub_group_avc_mce_result_t __ovld
intel_sub_group_avc_sic_convert_to_mce_result(
intel_sub_group_avc_sic_result_t result);
intel_sub_group_avc_sic_result_t __ovld
intel_sub_group_avc_mce_convert_to_sic_result(
intel_sub_group_avc_mce_result_t result);
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end
#endif // cl_intel_device_side_avc_motion_estimation
#ifdef cl_amd_media_ops
uint __ovld amd_bitalign(uint a, uint b, uint c);
uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);

View File

@ -381,7 +381,7 @@ vec_insert_and_zero(const unsigned long long *__ptr) {
static inline __ATTRS_o_ai vector float
vec_insert_and_zero(const float *__ptr) {
vector float __vec = (vector float)0;
__vec[0] = *__ptr;
__vec[1] = *__ptr;
return __vec;
}
#endif
@ -5942,13 +5942,13 @@ vec_orc(vector unsigned long long __a, vector unsigned long long __b) {
static inline __ATTRS_o_ai vector float
vec_orc(vector float __a, vector float __b) {
return (vector float)((vector unsigned int)__a &
return (vector float)((vector unsigned int)__a |
~(vector unsigned int)__b);
}
static inline __ATTRS_o_ai vector double
vec_orc(vector double __a, vector double __b) {
return (vector double)((vector unsigned long long)__a &
return (vector double)((vector unsigned long long)__a |
~(vector unsigned long long)__b);
}
#endif

View File

@ -5,14 +5,14 @@ set -e
BUILDDIR="$(pwd)"
sudo sh -c 'echo "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main" >> /etc/apt/sources.list'
sudo sh -c 'echo "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-8 main" >> /etc/apt/sources.list'
wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
sudo apt-get update -q
sudo apt-get remove -y llvm-*
sudo rm -rf /usr/local/*
sudo apt-get install -y libxml2-dev libclang-7-dev llvm-7 llvm-7-dev cmake s3cmd gcc-7 g++-7
sudo apt-get install -y libxml2-dev libclang-8-dev llvm-8 llvm-8-dev cmake s3cmd gcc-7 g++-7
export CC=gcc-7
export CXX=g++-7
@ -25,7 +25,7 @@ make -j2 install
if [ "${BUILD_REASON}" != "PullRequest" ]; then
ARTIFACTSDIR="$BUILDDIR/artifacts"
mkdir "$ARTIFACTSDIR"
docker run -i --mount type=bind,source="$ARTIFACTSDIR",target=/z ziglang/static-base:llvm7-1 -j2 $BUILD_SOURCEVERSION
docker run -i --mount type=bind,source="$ARTIFACTSDIR",target=/z ziglang/static-base:llvm8-1 -j2 $BUILD_SOURCEVERSION
TARBALL="$(ls $ARTIFACTSDIR)"
mv "$DOWNLOADSECUREFILE_SECUREFILEPATH" "$HOME/.s3cfg"
s3cmd put -P "$ARTIFACTSDIR/$TARBALL" s3://ziglang.org/builds/

View File

@ -6,7 +6,7 @@ set -e
brew install s3cmd gcc@8
ZIGDIR="$(pwd)"
CACHE_BASENAME="llvm+clang-7.0.0-macos-x86_64-gcc8-release-static"
CACHE_BASENAME="llvm+clang-8.0.0-macos-x86_64-gcc8-release-static"
PREFIX="$HOME/$CACHE_BASENAME"
TMPDIR="$HOME/tmpz"
JOBS="-j2"
@ -42,25 +42,18 @@ else
rm $PREFIX/lib/libz*dylib
cd $TMPDIR
wget ftp://ftp.invisible-island.net/ncurses/ncurses.tar.gz
tar xf ncurses.tar.gz
cd ncurses-6.1/
./configure --without-shared --prefix=$PREFIX
make $JOBS install
cd $TMPDIR
wget https://releases.llvm.org/7.0.0/llvm-7.0.0.src.tar.xz
tar xf llvm-7.0.0.src.tar.xz
cd llvm-7.0.0.src/
wget https://releases.llvm.org/8.0.0/llvm-8.0.0.src.tar.xz
tar xf llvm-8.0.0.src.tar.xz
cd llvm-8.0.0.src/
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_BUILD_TYPE=Release -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="WebAssembly;AVR;RISCV" -DLLVM_ENABLE_LIBXML2=OFF
cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_BUILD_TYPE=Release -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="AVR;RISCV" -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_ENABLE_TERMINFO=OFF
make $JOBS install
cd $TMPDIR
wget https://releases.llvm.org/7.0.0/cfe-7.0.0.src.tar.xz
tar xf cfe-7.0.0.src.tar.xz
cd cfe-7.0.0.src/
wget https://releases.llvm.org/8.0.0/cfe-8.0.0.src.tar.xz
tar xf cfe-8.0.0.src.tar.xz
cd cfe-8.0.0.src/
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_BUILD_TYPE=Release

View File

@ -6,5 +6,5 @@ set -e
pacman -Su --needed --noconfirm
pacman -S --needed --noconfirm wget p7zip python3-pip
pip install s3cmd
wget -nv "https://ziglang.org/deps/llvm%2bclang-7.0.0-win64-msvc-release.tar.xz"
tar xf llvm+clang-7.0.0-win64-msvc-release.tar.xz
wget -nv "https://ziglang.org/deps/llvm%2bclang-8.0.0-win64-msvc-release.tar.xz"
tar xf llvm+clang-8.0.0-win64-msvc-release.tar.xz

View File

@ -11,7 +11,7 @@ SET "MSYSTEM=%PREVMSYSTEM%"
SET "ZIGBUILDDIR=%SRCROOT%\build"
SET "ZIGINSTALLDIR=%ZIGBUILDDIR%\release"
SET "ZIGPREFIXPATH=%SRCROOT%\llvm+clang-7.0.0-win64-msvc-release"
SET "ZIGPREFIXPATH=%SRCROOT%\llvm+clang-8.0.0-win64-msvc-release"
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64

53
ci/srht/freebsd_script Executable file
View File

@ -0,0 +1,53 @@
#!/bin/sh
set -x
set -e
ZIGDIR="$(pwd)"
CACHE_BASENAME="llvm+clang-8.0.0-freebsd-x86_64-release"
PREFIX="$HOME/$CACHE_BASENAME"
JOBS="-j$(sysctl -n hw.ncpu)"
cd $HOME
wget -nv "https://ziglang.org/builds/$CACHE_BASENAME.tar.xz"
tar xf "$CACHE_BASENAME.tar.xz"
cd $ZIGDIR
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=$PREFIX -DCMAKE_INSTALL_PREFIX=$(pwd)/release -DZIG_STATIC=ON
make $JOBS install
# TODO test everything. right now it's skipping stuff including docs
# because for some reason @cImport is failing on the CI server.
release/bin/zig build --build-file ../build.zig test-behavior -Dskip-release
if [ -f ~/.s3cfg ]; then
mv ../LICENSE release/
# TODO re-enable this
#mv ../zig-cache/langref.html release/
mv release/bin/zig release/
rmdir release/bin
GITBRANCH=$(git rev-parse --abbrev-ref HEAD)
VERSION=$(release/zig version)
DIRNAME="zig-freebsd-x86_64-$VERSION"
TARBALL="$DIRNAME.tar.xz"
mv release "$DIRNAME"
tar cfJ "$TARBALL" "$DIRNAME"
s3cmd put -P "$TARBALL" s3://ziglang.org/builds/
touch empty
s3cmd put -P empty s3://ziglang.org/builds/zig-freebsd-x86_64-$GITBRANCH.tar.xz --add-header="Cache-Control: max-age=0, must-revalidate" --add-header="x-amz-website-redirect-location:/builds/$TARBALL"
SHASUM=$(shasum -a 256 $TARBALL | cut '-d ' -f1)
BYTESIZE=$(wc -c < $TARBALL)
JSONFILE="freebsd-$GITBRANCH.json"
touch $JSONFILE
echo "{\"tarball\": \"$TARBALL\"," >>$JSONFILE
echo "\"shasum\": \"$SHASUM\"," >>$JSONFILE
echo "\"size\": \"$BYTESIZE\"}" >>$JSONFILE
s3cmd put -P "$JSONFILE" s3://ziglang.org/builds/$JSONFILE
fi

View File

@ -38,10 +38,10 @@ if(MSVC)
else()
find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
PATHS
/usr/lib/llvm/7/include
/usr/lib/llvm-7/include
/usr/lib/llvm-7.0/include
/usr/local/llvm70/include
/usr/lib/llvm/8/include
/usr/lib/llvm-8/include
/usr/lib/llvm-8.0/include
/usr/local/llvm80/include
/mingw64/include)
macro(FIND_AND_ADD_CLANG_LIB _libname_)
@ -49,10 +49,10 @@ else()
find_library(CLANG_${_prettylibname_}_LIB NAMES ${_libname_}
PATHS
${CLANG_LIBDIRS}
/usr/lib/llvm/7/lib
/usr/lib/llvm-7/lib
/usr/lib/llvm-7.0/lib
/usr/local/llvm70/lib
/usr/lib/llvm/8/lib
/usr/lib/llvm-8/lib
/usr/lib/llvm-8.0/lib
/usr/local/llvm80/lib
/mingw64/lib
/c/msys64/mingw64/lib
c:\\msys64\\mingw64\\lib)

View File

@ -8,14 +8,15 @@
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
PATHS
/usr/lib/llvm-7.0/include
/usr/local/llvm70/include
/usr/lib/llvm-8.0/include
/usr/local/llvm80/include
/mingw64/include)
find_library(LLD_LIBRARY NAMES lld-7.0 lld70 lld
find_library(LLD_LIBRARY NAMES lld-8.0 lld80 lld
PATHS
/usr/lib/llvm-7.0/lib
/usr/local/llvm70/lib)
/usr/lib/llvm-8.0/lib
/usr/local/llvm80/lib
)
if(EXISTS ${LLD_LIBRARY})
set(LLD_LIBRARIES ${LLD_LIBRARY})
else()
@ -23,8 +24,8 @@ else()
string(TOUPPER ${_libname_} _prettylibname_)
find_library(LLD_${_prettylibname_}_LIB NAMES ${_libname_}
PATHS
/usr/lib/llvm-7.0/lib
/usr/local/llvm70/lib
/usr/lib/llvm-8.0/lib
/usr/local/llvm80/lib
/mingw64/lib
/c/msys64/mingw64/lib
c:/msys64/mingw64/lib)

View File

@ -8,12 +8,16 @@
# LLVM_LIBDIRS
find_program(LLVM_CONFIG_EXE
NAMES llvm-config-7 llvm-config-7.0 llvm-config70 llvm-config
NAMES llvm-config-8 llvm-config-8.0 llvm-config80 llvm-config
PATHS
"/mingw64/bin"
"/c/msys64/mingw64/bin"
"c:/msys64/mingw64/bin"
"C:/Libraries/llvm-7.0.0/bin")
"C:/Libraries/llvm-8.0.0/bin")
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
message(FATAL_ERROR "unable to find llvm-config")
endif()
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
message(FATAL_ERROR "unable to find llvm-config")
@ -24,14 +28,14 @@ execute_process(
OUTPUT_VARIABLE LLVM_CONFIG_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 7)
message(FATAL_ERROR "expected LLVM 7.x but found ${LLVM_CONFIG_VERSION}")
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 8)
message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}")
endif()
if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 8)
message(FATAL_ERROR "expected LLVM 7.x but found ${LLVM_CONFIG_VERSION}")
if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 9)
message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}")
endif()
if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 8)
message(FATAL_ERROR "expected LLVM 7.x but found ${LLVM_CONFIG_VERSION}")
if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 9)
message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}")
endif()
execute_process(
@ -57,6 +61,7 @@ NEED_TARGET("NVPTX")
NEED_TARGET("PowerPC")
NEED_TARGET("Sparc")
NEED_TARGET("SystemZ")
NEED_TARGET("WebAssembly")
NEED_TARGET("X86")
NEED_TARGET("XCore")
@ -107,7 +112,7 @@ execute_process(
set(LLVM_LIBRARIES ${LLVM_LIBRARIES} ${LLVM_SYSTEM_LIBS})
if(NOT LLVM_LIBRARIES)
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-7 LLVM-7.0)
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-8 LLVM-8.0)
endif()
link_directories("${CMAKE_PREFIX_PATH}/lib")

View File

@ -55,10 +55,8 @@ OPTION(prefix_2, "color-diagnostics=", color_diagnostics_eq, Joined, INVALID, IN
"Use colors in diagnostics; one of 'always', 'never', 'auto'", nullptr, nullptr)
OPTION(prefix_2, "color-diagnostics", color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_1, "debug:dwarf", debug_dwarf, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "debug:full", debug_full, Flag, INVALID, debug, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "debug:ghash", debug_ghash, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "debug:symtab", debug_symtab, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "debug:", debug_opt, Joined, INVALID, INVALID, nullptr, 0, 0,
"Embed a symbol table in the image with option", nullptr, nullptr)
OPTION(prefix_1, "debugtype:", debugtype, Joined, INVALID, INVALID, nullptr, 0, 0,
"Debug Info Options", nullptr, nullptr)
OPTION(prefix_1, "debug", debug, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -95,9 +93,12 @@ OPTION(prefix_1, "fixed:no", fixed_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Enable base relocations (default)", nullptr, nullptr)
OPTION(prefix_1, "fixed", fixed, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable base relocations", nullptr, nullptr)
OPTION(prefix_1, "force:unresolved", force_unresolved, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "force", force, Flag, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_1, "force:multiple", force_multiple, Flag, INVALID, INVALID, nullptr, 0, 0,
"Allow multiply defined symbols when creating executables", nullptr, nullptr)
OPTION(prefix_1, "force:unresolved", force_unresolved, Flag, INVALID, INVALID, nullptr, 0, 0,
"Allow undefined symbols when creating executables", nullptr, nullptr)
OPTION(prefix_1, "force", force, Flag, INVALID, INVALID, nullptr, 0, 0,
"Allow undefined and multiply defined symbols when creating executables", nullptr, nullptr)
OPTION(prefix_1, "functionpadmin", functionpadmin, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "guard:", guard, Joined, INVALID, INVALID, nullptr, 0, 0,
"Control flow guard", nullptr, nullptr)
@ -148,30 +149,32 @@ OPTION(prefix_1, "lldsavetemps", lldsavetemps, Flag, INVALID, INVALID, nullptr,
OPTION(prefix_1, "machine:", machine, Joined, INVALID, INVALID, nullptr, 0, 0,
"Specify target platform", nullptr, nullptr)
OPTION(prefix_1, "manifest:", manifest_colon, Joined, INVALID, INVALID, nullptr, 0, 0,
"Create manifest file", nullptr, nullptr)
"NO disables manifest output; EMBED[,ID=#] embeds manifest as resource in the image", nullptr, nullptr)
OPTION(prefix_1, "manifestdependency:", manifestdependency, Joined, INVALID, INVALID, nullptr, 0, 0,
"Attributes for <dependency> in manifest file", nullptr, nullptr)
"Attributes for <dependency> element in manifest file; implies /manifest", nullptr, nullptr)
OPTION(prefix_1, "manifestfile:", manifestfile, Joined, INVALID, INVALID, nullptr, 0, 0,
"Manifest file path", nullptr, nullptr)
"Manifest output path, with /manifest", nullptr, nullptr)
OPTION(prefix_1, "manifestinput:", manifestinput, Joined, INVALID, INVALID, nullptr, 0, 0,
"Specify manifest file", nullptr, nullptr)
"Additional manifest inputs; only valid with /manifest:embed", nullptr, nullptr)
OPTION(prefix_1, "manifestuac:", manifestuac, Joined, INVALID, INVALID, nullptr, 0, 0,
"User access control", nullptr, nullptr)
OPTION(prefix_1, "manifest", manifest, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "manifest", manifest, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create .manifest file", nullptr, nullptr)
OPTION(prefix_1, "maxilksize:", maxilksize, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "merge:", merge, Joined, INVALID, INVALID, nullptr, 0, 0,
"Combine sections", nullptr, nullptr)
OPTION(prefix_1, "mllvm:", mllvm, Joined, INVALID, INVALID, nullptr, 0, 0,
"Options to pass to LLVM", nullptr, nullptr)
OPTION(prefix_1, "msvclto", msvclto, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "natvis:", natvis, Joined, INVALID, INVALID, nullptr, 0, 0,
"Path to natvis file to embed in the PDB", nullptr, nullptr)
OPTION(prefix_1, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_1, "nodefaultlib:", nodefaultlib, Joined, INVALID, INVALID, nullptr, 0, 0,
"Remove a default library", nullptr, nullptr)
OPTION(prefix_1, "nodefaultlib", nodefaultlib_all, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "noentry", noentry, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "nodefaultlib", nodefaultlib_all, Flag, INVALID, INVALID, nullptr, 0, 0,
"Remove all default libraries", nullptr, nullptr)
OPTION(prefix_1, "noentry", noentry, Flag, INVALID, INVALID, nullptr, 0, 0,
"Don't add reference to DllMainCRTStartup; only valid with /dll", nullptr, nullptr)
OPTION(prefix_1, "nologo", nologo, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "nxcompat:no", nxcompat_no, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable data execution provention", nullptr, nullptr)

View File

@ -54,7 +54,7 @@ OPTION(prefix_2, "Bsymbolic-functions", Bsymbolic_functions, Flag, INVALID, INVA
OPTION(prefix_2, "Bsymbolic", Bsymbolic, Flag, INVALID, INVALID, nullptr, 0, 0,
"Bind defined symbols locally", nullptr, nullptr)
OPTION(prefix_2, "build-id=", build_id_eq, Joined, INVALID, INVALID, nullptr, 0, 0,
"Generate build ID note", "[fast,md5,sha,uuid,0x<hexstring>]", nullptr)
"Generate build ID note", "[fast,md5,sha1,uuid,0x<hexstring>]", nullptr)
OPTION(prefix_2, "build-id", build_id, Flag, INVALID, INVALID, nullptr, 0, 0,
"Alias for --build-id=fast", nullptr, nullptr)
OPTION(prefix_1, "b", anonymous_16, Separate, INVALID, format, nullptr, 0, 0,
@ -62,6 +62,8 @@ OPTION(prefix_1, "b", anonymous_16, Separate, INVALID, format, nullptr, 0, 0,
OPTION(prefix_2, "call-graph-ordering-file=", call_graph_ordering_file_eq, Joined, INVALID, call_graph_ordering_file, nullptr, 0, 0,
"Layout sections to optimize the given callgraph", nullptr, nullptr)
OPTION(prefix_2, "call-graph-ordering-file", call_graph_ordering_file, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "call-graph-profile-sort", call_graph_profile_sort, Flag, INVALID, INVALID, nullptr, 0, 0,
"Reorder sections with call graph profile (default)", nullptr, nullptr)
OPTION(prefix_2, "call_shared", anonymous_1, Flag, INVALID, Bdynamic, nullptr, 0, 0,
"Alias for --Bdynamic", nullptr, nullptr)
OPTION(prefix_2, "check-sections", check_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -133,7 +135,7 @@ OPTION(prefix_2, "exclude-libs=", exclude_libs_eq, Joined, INVALID, exclude_libs
"Exclude static libraries from automatic export", nullptr, nullptr)
OPTION(prefix_2, "exclude-libs", exclude_libs, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "execute-only", execute_only, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not mark executable sections readable", nullptr, nullptr)
"Mark executable sections unreadable", nullptr, nullptr)
OPTION(prefix_2, "export-dynamic-symbol=", export_dynamic_symbol_eq, Joined, INVALID, export_dynamic_symbol, nullptr, 0, 0,
"Put a symbol in the dynamic symbol table", nullptr, nullptr)
OPTION(prefix_2, "export-dynamic-symbol", export_dynamic_symbol, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -245,6 +247,8 @@ OPTION(prefix_2, "no-apply-dynamic-relocs", no_apply_dynamic_relocs, Flag, INVAL
"Do not apply link-time values for dynamic relocations (default)", nullptr, nullptr)
OPTION(prefix_2, "no-as-needed", no_as_needed, Flag, INVALID, INVALID, nullptr, 0, 0,
"Always set DT_NEEDED for shared libraries (default)", nullptr, nullptr)
OPTION(prefix_2, "no-call-graph-profile-sort", no_call_graph_profile_sort, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not reorder sections with call graph profile", nullptr, nullptr)
OPTION(prefix_2, "no-check-sections", no_check_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not check section addresses for overlaps", nullptr, nullptr)
OPTION(prefix_2, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -291,6 +295,8 @@ OPTION(prefix_2, "no-rosegment", no_rosegment, Flag, INVALID, INVALID, nullptr,
"Do not put read-only non-executable sections in their own segment", nullptr, nullptr)
OPTION(prefix_2, "no-threads", no_threads, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not run the linker multi-threaded", nullptr, nullptr)
OPTION(prefix_2, "no-toc-optimize", no_toc_optimize, Flag, INVALID, INVALID, nullptr, 0, 0,
"(PowerPC64) Disable TOC related optimizations", nullptr, nullptr)
OPTION(prefix_2, "no-undefined-version", no_undefined_version, Flag, INVALID, INVALID, nullptr, 0, 0,
"Report version scripts that refer undefined symbols", nullptr, nullptr)
OPTION(prefix_2, "no-undefined", no_undefined, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -301,6 +307,8 @@ OPTION(prefix_2, "no-warn-backrefs", no_warn_backrefs, Flag, INVALID, INVALID, n
"Do not warn about backward symbol references to fetch archive members (default)", nullptr, nullptr)
OPTION(prefix_2, "no-warn-common", no_warn_common, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not warn about duplicate common symbols (default)", nullptr, nullptr)
OPTION(prefix_2, "no-warn-ifunc-textrel", no_warn_ifunc_textrel, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not warn about using ifunc symbols with text relocations (default)", nullptr, nullptr)
OPTION(prefix_2, "no-warn-mismatch", anonymous_57, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "no-warn-symbol-ordering", no_warn_symbol_ordering, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not warn about problems with the symbol ordering file", nullptr, nullptr)
@ -338,6 +346,8 @@ OPTION(prefix_2, "pack-dyn-relocs=", pack_dyn_relocs_eq, Joined, INVALID, pack_d
OPTION(prefix_2, "pack-dyn-relocs", pack_dyn_relocs, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, "[none,android,relr,android+relr]", nullptr)
OPTION(prefix_2, "pic-executable", anonymous_23, Flag, INVALID, pie, nullptr, 0, 0,
"Alias for --pie", nullptr, nullptr)
OPTION(prefix_2, "pic-veneer", pic_veneer, Flag, INVALID, INVALID, nullptr, 0, 0,
"Always generate position independent thunks (veneers)", nullptr, nullptr)
OPTION(prefix_2, "pie", pie, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create a position independent executable", nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=-fresolution=", plugin_opt_fresolution_eq, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -349,6 +359,7 @@ OPTION(prefix_2, "plugin-opt=disable-verify", anonymous_41, Flag, INVALID, disab
"Alias for -disable-verify", nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=dwo_dir=", plugin_opt_dwo_dir_eq, Joined, INVALID, INVALID, nullptr, 0, 0,
"Directory to store .dwo files when LTO and debug fission are used", nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=emit-llvm", plugin_opt_emit_llvm, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=jobs=", anonymous_42, Joined, INVALID, thinlto_jobs, nullptr, 0, 0,
"Alias for -thinlto-jobs", nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=lto-partitions=", anonymous_43, Joined, INVALID, lto_partitions, nullptr, 0, 0,
@ -424,6 +435,9 @@ OPTION(prefix_2, "sort-common", anonymous_60, Flag, INVALID, INVALID, nullptr, 0
OPTION(prefix_2, "sort-section=", sort_section_eq, Joined, INVALID, sort_section, nullptr, 0, 0,
"Specifies sections sorting rule when linkerscript is used", nullptr, nullptr)
OPTION(prefix_2, "sort-section", sort_section, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "split-stack-adjust-size=", split_stack_adjust_size_eq, Joined, INVALID, split_stack_adjust_size, nullptr, 0, 0,
"Specify adjustment to stack size when a split-stack function calls a non-split-stack function", "<value>", nullptr)
OPTION(prefix_2, "split-stack-adjust-size", split_stack_adjust_size, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, "<value>", nullptr)
OPTION(prefix_2, "start-group", start_group, Flag, INVALID, INVALID, nullptr, 0, 0,
"Ignored for compatibility with GNU unless you pass --warn-backrefs", nullptr, nullptr)
OPTION(prefix_2, "start-lib", start_lib, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -467,6 +481,8 @@ OPTION(prefix_2, "thinlto-jobs=", thinlto_jobs, Joined, INVALID, INVALID, nullpt
"Number of ThinLTO jobs", nullptr, nullptr)
OPTION(prefix_2, "threads", threads, Flag, INVALID, INVALID, nullptr, 0, 0,
"Run the linker multi-threaded (default)", nullptr, nullptr)
OPTION(prefix_2, "toc-optimize", toc_optimize, Flag, INVALID, INVALID, nullptr, 0, 0,
"(PowerPC64) Enable TOC related optimizations (default)", nullptr, nullptr)
OPTION(prefix_2, "trace-symbol=", trace_symbol_eq, Joined, INVALID, trace_symbol, nullptr, 0, 0,
"Trace references to symbols", nullptr, nullptr)
OPTION(prefix_2, "trace-symbol", trace_symbol, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
@ -511,6 +527,8 @@ OPTION(prefix_2, "warn-backrefs", warn_backrefs, Flag, INVALID, INVALID, nullptr
OPTION(prefix_2, "warn-common", warn_common, Flag, INVALID, INVALID, nullptr, 0, 0,
"Warn about duplicate common symbols", nullptr, nullptr)
OPTION(prefix_2, "warn-execstack", anonymous_62, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "warn-ifunc-textrel", warn_ifunc_textrel, Flag, INVALID, INVALID, nullptr, 0, 0,
"Warn about using ifunc symbols with text relocations", nullptr, nullptr)
OPTION(prefix_2, "warn-once", anonymous_63, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "warn-shared-textrel", anonymous_64, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "warn-symbol-ordering", warn_symbol_ordering, Flag, INVALID, INVALID, nullptr, 0, 0,

View File

@ -90,6 +90,13 @@ OPTION(prefix_1, "o", o, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
OPTION(prefix_2, "pdb", pdb, Separate, INVALID, INVALID, nullptr, 0, 0,
"Specify output PDB debug information file", nullptr, nullptr)
OPTION(prefix_2, "pic-executable", pic_executable, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin-opt=", anonymous_3, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin-opt", anonymous_2, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin=", anonymous_1, Joined, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "plugin", anonymous_0, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "require-defined=", require_defined_eq, Joined, INVALID, require_defined, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "require-defined", require_defined, Separate, INVALID, INVALID, nullptr, 0, 0,
"Force symbol to be added to symbol table as an undefined one", nullptr, nullptr)
OPTION(prefix_2, "shared", shared, Flag, INVALID, INVALID, nullptr, 0, 0,
"Build a shared object", nullptr, nullptr)
OPTION(prefix_2, "stack", stack, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)

View File

@ -1,6 +1,6 @@
#define LLD_VERSION 7.0.0
#define LLD_VERSION_STRING "7.0.0"
#define LLD_VERSION_MAJOR 7
#define LLD_VERSION 8.0.0
#define LLD_VERSION_STRING "8.0.0"
#define LLD_VERSION_MAJOR 8
#define LLD_VERSION_MINOR 0
#define LLD_REVISION_STRING ""
#define LLD_REPOSITORY_STRING ""

View File

@ -36,23 +36,28 @@ OPTION(prefix_1, "color-diagnostics=", color_diagnostics_eq, Joined, INVALID, IN
"Use colors in diagnostics; one of 'always', 'never', 'auto'", nullptr, nullptr)
OPTION(prefix_1, "color-diagnostics", color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_1, "compress-relocations", compress_relocations, Flag, INVALID, INVALID, nullptr, 0, 0,
"Compress the relocation targets in the code section.", nullptr, nullptr)
OPTION(prefix_1, "demangle", demangle, Flag, INVALID, INVALID, nullptr, 0, 0,
"Demangle symbol names", nullptr, nullptr)
OPTION(prefix_1, "disable-verify", disable_verify, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "entry=", alias_entry_entry, Joined, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "entry=", anonymous_1, Joined, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "entry", entry, Separate, INVALID, INVALID, nullptr, 0, 0,
"Name of entry point symbol", "<entry>", nullptr)
OPTION(prefix_1, "error-limit=", error_limit, Joined, INVALID, INVALID, nullptr, 0, 0,
"Maximum number of errors to emit before stopping (0 = no limit)", nullptr, nullptr)
OPTION(prefix_1, "export-all", export_all, Flag, INVALID, INVALID, nullptr, 0, 0,
"Export all symbols (normally combined with --no-gc-sections)", nullptr, nullptr)
OPTION(prefix_1, "export-dynamic", export_dynamic, Flag, INVALID, INVALID, nullptr, 0, 0,
"Put symbols in the dynamic symbol table", nullptr, nullptr)
OPTION(prefix_1, "export-table", export_table, Flag, INVALID, INVALID, nullptr, 0, 0,
"Export function table to the environment", nullptr, nullptr)
OPTION(prefix_1, "export=", export_eq, Joined, INVALID, export, nullptr, 0, 0,
"Force a symbol to be exported", nullptr, nullptr)
OPTION(prefix_1, "export", export, Separate, INVALID, INVALID, nullptr, 0, 0,
"Force a symbol to be exported", nullptr, nullptr)
OPTION(prefix_2, "e", alias_entry_e, JoinedOrSeparate, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "export", export, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "E", anonymous_2, Flag, INVALID, export_dynamic, nullptr, 0, 0,
"Alias for --export-dynamic", nullptr, nullptr)
OPTION(prefix_2, "e", anonymous_0, JoinedOrSeparate, INVALID, entry, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "fatal-warnings", fatal_warnings, Flag, INVALID, INVALID, nullptr, 0, 0,
"Treat warnings as errors", nullptr, nullptr)
OPTION(prefix_1, "gc-sections", gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -67,7 +72,7 @@ OPTION(prefix_1, "import-table", import_table, Flag, INVALID, INVALID, nullptr,
"Import function table from the environment", nullptr, nullptr)
OPTION(prefix_1, "initial-memory=", initial_memory, Joined, INVALID, INVALID, nullptr, 0, 0,
"Initial size of the linear memory", nullptr, nullptr)
OPTION(prefix_2, "i", alias_initial_memory_i, Flag, INVALID, initial_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "i", anonymous_3, Flag, INVALID, initial_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "lto-O", lto_O, Joined, INVALID, INVALID, nullptr, 0, 0,
"Optimization level for LTO", "<opt-level>", nullptr)
OPTION(prefix_1, "lto-partitions=", lto_partitions, Joined, INVALID, INVALID, nullptr, 0, 0,
@ -82,18 +87,22 @@ OPTION(prefix_1, "merge-data-segments", merge_data_segments, Flag, INVALID, INVA
"Enable merging data segments", nullptr, nullptr)
OPTION(prefix_1, "mllvm", mllvm, Separate, INVALID, INVALID, nullptr, 0, 0,
"Options to pass to LLVM", nullptr, nullptr)
OPTION(prefix_2, "m", alias_max_memory_m, Flag, INVALID, max_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "m", anonymous_4, Flag, INVALID, max_memory, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "no-color-diagnostics", no_color_diagnostics, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not use colors in diagnostics", nullptr, nullptr)
OPTION(prefix_1, "no-demangle", no_demangle, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not demangle symbol names", nullptr, nullptr)
OPTION(prefix_1, "no-entry", no_entry, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not output any entry point", nullptr, nullptr)
OPTION(prefix_1, "no-export-dynamic", no_export_dynamic, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not put symbols in the dynamic symbol table (default)", nullptr, nullptr)
OPTION(prefix_1, "no-fatal-warnings", no_fatal_warnings, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "no-gc-sections", no_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable garbage collection of unused sections", nullptr, nullptr)
OPTION(prefix_1, "no-merge-data-segments", no_merge_data_segments, Flag, INVALID, INVALID, nullptr, 0, 0,
"Disable merging data segments", nullptr, nullptr)
OPTION(prefix_1, "no-pie", no_pie, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not create a position independent executable (default)", nullptr, nullptr)
OPTION(prefix_1, "no-print-gc-sections", no_print_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"Do not list removed unused sections", nullptr, nullptr)
OPTION(prefix_1, "no-threads", no_threads, Flag, INVALID, INVALID, nullptr, 0, 0,
@ -104,33 +113,41 @@ OPTION(prefix_2, "O", O, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Optimize output file size", nullptr, nullptr)
OPTION(prefix_2, "o", o, JoinedOrSeparate, INVALID, INVALID, nullptr, 0, 0,
"Path to file to write output", "<path>", nullptr)
OPTION(prefix_1, "pie", pie, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create a position independent executable", nullptr, nullptr)
OPTION(prefix_1, "print-gc-sections", print_gc_sections, Flag, INVALID, INVALID, nullptr, 0, 0,
"List removed unused sections", nullptr, nullptr)
OPTION(prefix_1, "relocatable", relocatable, Flag, INVALID, INVALID, nullptr, 0, 0,
"Create relocatable object file", nullptr, nullptr)
OPTION(prefix_2, "r", alias_relocatable_r, Flag, INVALID, relocatable, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "r", anonymous_5, Flag, INVALID, relocatable, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "save-temps", save_temps, Flag, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "shared-memory", shared_memory, Flag, INVALID, INVALID, nullptr, 0, 0,
"Use shared linear memory", nullptr, nullptr)
OPTION(prefix_1, "shared", shared, Flag, INVALID, INVALID, nullptr, 0, 0,
"Build a shared object", nullptr, nullptr)
OPTION(prefix_1, "stack-first", stack_first, Flag, INVALID, INVALID, nullptr, 0, 0,
"Place stack at start of linear memory rather than after data", nullptr, nullptr)
OPTION(prefix_1, "strip-all", strip_all, Flag, INVALID, INVALID, nullptr, 0, 0,
"Strip all symbols", nullptr, nullptr)
OPTION(prefix_1, "strip-debug", strip_debug, Flag, INVALID, INVALID, nullptr, 0, 0,
"Strip debugging information", nullptr, nullptr)
OPTION(prefix_2, "S", anonymous_7, Flag, INVALID, strip_debug, nullptr, 0, 0,
"Alias for --strip-debug", nullptr, nullptr)
OPTION(prefix_2, "s", anonymous_6, Flag, INVALID, strip_all, nullptr, 0, 0,
"Alias for --strip-all", nullptr, nullptr)
OPTION(prefix_1, "thinlto-cache-dir=", thinlto_cache_dir, Joined, INVALID, INVALID, nullptr, 0, 0,
"Path to ThinLTO cached object file directory", nullptr, nullptr)
OPTION(prefix_1, "thinlto-cache-policy=", thinlto_cache_policy_eq, Joined, INVALID, thinlto_cache_policy, nullptr, 0, 0,
"Pruning policy for the ThinLTO cache", nullptr, nullptr)
OPTION(prefix_1, "thinlto-cache-policy", thinlto_cache_policy, Separate, INVALID, INVALID, nullptr, 0, 0,
"Pruning policy for the ThinLTO cache", nullptr, nullptr)
OPTION(prefix_1, "thinlto-cache-policy", thinlto_cache_policy, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "thinlto-jobs=", thinlto_jobs, Joined, INVALID, INVALID, nullptr, 0, 0,
"Number of ThinLTO jobs", nullptr, nullptr)
OPTION(prefix_1, "threads", threads, Flag, INVALID, INVALID, nullptr, 0, 0,
"Run the linker multi-threaded", nullptr, nullptr)
OPTION(prefix_1, "undefined=", undefined_eq, Joined, INVALID, undefined, nullptr, 0, 0,
"Force undefined symbol during linking", nullptr, nullptr)
OPTION(prefix_1, "undefined", undefined, Separate, INVALID, INVALID, nullptr, 0, 0,
"Force undefined symbol during linking", nullptr, nullptr)
OPTION(prefix_2, "u", alias_undefined_u, JoinedOrSeparate, INVALID, undefined, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "undefined", undefined, Separate, INVALID, INVALID, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_2, "u", anonymous_8, JoinedOrSeparate, INVALID, undefined, nullptr, 0, 0, nullptr, nullptr, nullptr)
OPTION(prefix_1, "verbose", verbose, Flag, INVALID, INVALID, nullptr, 0, 0,
"Verbose mode", nullptr, nullptr)
OPTION(prefix_1, "version", version, Flag, INVALID, INVALID, nullptr, 0, 0,

View File

@ -11,6 +11,7 @@
#include "InputFiles.h"
#include "Symbols.h"
#include "Writer.h"
#include "SymbolTable.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
@ -44,6 +45,22 @@ SectionChunk::SectionChunk(ObjFile *F, const coff_section *H)
Live = !Config->DoGC || !isCOMDAT();
}
// Initialize the RelocTargets vector, to allow redirecting certain relocations
// to a thunk instead of the actual symbol the relocation's symbol table index
// indicates.
void SectionChunk::readRelocTargets() {
assert(RelocTargets.empty());
RelocTargets.reserve(Relocs.size());
for (const coff_relocation &Rel : Relocs)
RelocTargets.push_back(File->getSymbol(Rel.SymbolTableIndex));
}
// Reset RelocTargets to their original targets before thunks were added.
void SectionChunk::resetRelocTargets() {
for (size_t I = 0, E = Relocs.size(); I < E; ++I)
RelocTargets[I] = File->getSymbol(Relocs[I].SymbolTableIndex);
}
static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); }
static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); }
static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); }
@ -58,7 +75,8 @@ static bool checkSecRel(const SectionChunk *Sec, OutputSection *OS) {
return true;
if (Sec->isCodeView())
return false;
fatal("SECREL relocation cannot be applied to absolute symbols");
error("SECREL relocation cannot be applied to absolute symbols");
return false;
}
static void applySecRel(const SectionChunk *Sec, uint8_t *Off,
@ -98,7 +116,7 @@ void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS,
case IMAGE_REL_AMD64_SECTION: applySecIdx(Off, OS); break;
case IMAGE_REL_AMD64_SECREL: applySecRel(this, Off, OS, S); break;
default:
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
toString(File));
}
}
@ -113,7 +131,7 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS,
case IMAGE_REL_I386_SECTION: applySecIdx(Off, OS); break;
case IMAGE_REL_I386_SECREL: applySecRel(this, Off, OS, S); break;
default:
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
toString(File));
}
}
@ -123,16 +141,22 @@ static void applyMOV(uint8_t *Off, uint16_t V) {
write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff));
}
static uint16_t readMOV(uint8_t *Off) {
static uint16_t readMOV(uint8_t *Off, bool MOVT) {
uint16_t Op1 = read16le(Off);
if ((Op1 & 0xfbf0) != (MOVT ? 0xf2c0 : 0xf240))
error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") +
" instruction in MOV32T relocation");
uint16_t Op2 = read16le(Off + 2);
if ((Op2 & 0x8000) != 0)
error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") +
" instruction in MOV32T relocation");
return (Op2 & 0x00ff) | ((Op2 >> 4) & 0x0700) | ((Op1 << 1) & 0x0800) |
((Op1 & 0x000f) << 12);
}
void applyMOV32T(uint8_t *Off, uint32_t V) {
uint16_t ImmW = readMOV(Off); // read MOVW operand
uint16_t ImmT = readMOV(Off + 4); // read MOVT operand
uint16_t ImmW = readMOV(Off, false); // read MOVW operand
uint16_t ImmT = readMOV(Off + 4, true); // read MOVT operand
uint32_t Imm = ImmW | (ImmT << 16);
V += Imm; // add the immediate offset
applyMOV(Off, V); // set MOVW operand
@ -141,7 +165,7 @@ void applyMOV32T(uint8_t *Off, uint32_t V) {
static void applyBranch20T(uint8_t *Off, int32_t V) {
if (!isInt<21>(V))
fatal("relocation out of range");
error("relocation out of range");
uint32_t S = V < 0 ? 1 : 0;
uint32_t J1 = (V >> 19) & 1;
uint32_t J2 = (V >> 18) & 1;
@ -151,7 +175,7 @@ static void applyBranch20T(uint8_t *Off, int32_t V) {
void applyBranch24T(uint8_t *Off, int32_t V) {
if (!isInt<25>(V))
fatal("relocation out of range");
error("relocation out of range");
uint32_t S = V < 0 ? 1 : 0;
uint32_t J1 = ((~V >> 23) & 1) ^ S;
uint32_t J2 = ((~V >> 22) & 1) ^ S;
@ -176,7 +200,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS,
case IMAGE_REL_ARM_SECTION: applySecIdx(Off, OS); break;
case IMAGE_REL_ARM_SECREL: applySecRel(this, Off, OS, S); break;
default:
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
toString(File));
}
}
@ -184,7 +208,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS,
// Interpret the existing immediate value as a byte offset to the
// target symbol, then update the instruction with the immediate as
// the page offset from the current instruction to the target.
static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) {
void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) {
uint32_t Orig = read32le(Off);
uint64_t Imm = ((Orig >> 29) & 0x3) | ((Orig >> 3) & 0x1FFFFC);
S += Imm;
@ -198,7 +222,7 @@ static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) {
// Update the immediate field in a AARCH64 ldr, str, and add instruction.
// Optionally limit the range of the written immediate by one or more bits
// (RangeLimit).
static void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) {
void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) {
uint32_t Orig = read32le(Off);
Imm += (Orig >> 10) & 0xFFF;
Orig &= ~(0xFFF << 10);
@ -221,7 +245,7 @@ static void applyArm64Ldr(uint8_t *Off, uint64_t Imm) {
if ((Orig & 0x4800000) == 0x4800000)
Size += 4;
if ((Imm & ((1 << Size) - 1)) != 0)
fatal("misaligned ldr/str offset");
error("misaligned ldr/str offset");
applyArm64Imm(Off, Imm >> Size, Size);
}
@ -250,21 +274,21 @@ static void applySecRelLdr(const SectionChunk *Sec, uint8_t *Off,
applyArm64Ldr(Off, (S - OS->getRVA()) & 0xfff);
}
static void applyArm64Branch26(uint8_t *Off, int64_t V) {
void applyArm64Branch26(uint8_t *Off, int64_t V) {
if (!isInt<28>(V))
fatal("relocation out of range");
error("relocation out of range");
or32(Off, (V & 0x0FFFFFFC) >> 2);
}
static void applyArm64Branch19(uint8_t *Off, int64_t V) {
if (!isInt<21>(V))
fatal("relocation out of range");
error("relocation out of range");
or32(Off, (V & 0x001FFFFC) << 3);
}
static void applyArm64Branch14(uint8_t *Off, int64_t V) {
if (!isInt<16>(V))
fatal("relocation out of range");
error("relocation out of range");
or32(Off, (V & 0x0000FFFC) << 3);
}
@ -287,11 +311,37 @@ void SectionChunk::applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS,
case IMAGE_REL_ARM64_SECREL_LOW12L: applySecRelLdr(this, Off, OS, S); break;
case IMAGE_REL_ARM64_SECTION: applySecIdx(Off, OS); break;
default:
fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " +
toString(File));
}
}
static void maybeReportRelocationToDiscarded(const SectionChunk *FromChunk,
Defined *Sym,
const coff_relocation &Rel) {
// Don't report these errors when the relocation comes from a debug info
// section or in mingw mode. MinGW mode object files (built by GCC) can
// have leftover sections with relocations against discarded comdat
// sections. Such sections are left as is, with relocations untouched.
if (FromChunk->isCodeView() || FromChunk->isDWARF() || Config->MinGW)
return;
// Get the name of the symbol. If it's null, it was discarded early, so we
// have to go back to the object file.
ObjFile *File = FromChunk->File;
StringRef Name;
if (Sym) {
Name = Sym->getName();
} else {
COFFSymbolRef COFFSym =
check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex));
File->getCOFFObj()->getSymbolName(COFFSym, Name);
}
error("relocation against symbol in discarded section: " + Name +
getSymbolLocations(File, Rel.SymbolTableIndex));
}
void SectionChunk::writeTo(uint8_t *Buf) const {
if (!hasData())
return;
@ -302,46 +352,40 @@ void SectionChunk::writeTo(uint8_t *Buf) const {
// Apply relocations.
size_t InputSize = getSize();
for (const coff_relocation &Rel : Relocs) {
for (size_t I = 0, E = Relocs.size(); I < E; I++) {
const coff_relocation &Rel = Relocs[I];
// Check for an invalid relocation offset. This check isn't perfect, because
// we don't have the relocation size, which is only known after checking the
// machine and relocation type. As a result, a relocation may overwrite the
// beginning of the following input section.
if (Rel.VirtualAddress >= InputSize)
fatal("relocation points beyond the end of its parent section");
if (Rel.VirtualAddress >= InputSize) {
error("relocation points beyond the end of its parent section");
continue;
}
uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress;
// Use the potentially remapped Symbol instead of the one that the
// relocation points to.
auto *Sym = dyn_cast_or_null<Defined>(RelocTargets[I]);
// Get the output section of the symbol for this relocation. The output
// section is needed to compute SECREL and SECTION relocations used in debug
// info.
auto *Sym =
dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex));
if (!Sym) {
if (isCodeView() || isDWARF())
continue;
// Symbols in early discarded sections are represented using null pointers,
// so we need to retrieve the name from the object file.
COFFSymbolRef Sym =
check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex));
StringRef Name;
File->getCOFFObj()->getSymbolName(Sym, Name);
fatal("relocation against symbol in discarded section: " + Name);
}
Chunk *C = Sym->getChunk();
Chunk *C = Sym ? Sym->getChunk() : nullptr;
OutputSection *OS = C ? C->getOutputSection() : nullptr;
// Only absolute and __ImageBase symbols lack an output section. For any
// other symbol, this indicates that the chunk was discarded. Normally
// relocations against discarded sections are an error. However, debug info
// sections are not GC roots and can end up with these kinds of relocations.
// Skip these relocations.
if (!OS && !isa<DefinedAbsolute>(Sym) && !isa<DefinedSynthetic>(Sym)) {
if (isCodeView() || isDWARF())
continue;
fatal("relocation against symbol in discarded section: " +
Sym->getName());
// Skip the relocation if it refers to a discarded section, and diagnose it
// as an error if appropriate. If a symbol was discarded early, it may be
// null. If it was discarded late, the output section will be null, unless
// it was an absolute or synthetic symbol.
if (!Sym ||
(!OS && !isa<DefinedAbsolute>(Sym) && !isa<DefinedSynthetic>(Sym))) {
maybeReportRelocationToDiscarded(this, Sym, Rel);
continue;
}
uint64_t S = Sym->getRVA();
// Compute the RVA of the relocation for relative relocations.
@ -399,17 +443,125 @@ static uint8_t getBaserelType(const coff_relocation &Rel) {
// fixed by the loader if load-time relocation is needed.
// Only called when base relocation is enabled.
void SectionChunk::getBaserels(std::vector<Baserel> *Res) {
for (const coff_relocation &Rel : Relocs) {
for (size_t I = 0, E = Relocs.size(); I < E; I++) {
const coff_relocation &Rel = Relocs[I];
uint8_t Ty = getBaserelType(Rel);
if (Ty == IMAGE_REL_BASED_ABSOLUTE)
continue;
Symbol *Target = File->getSymbol(Rel.SymbolTableIndex);
// Use the potentially remapped Symbol instead of the one that the
// relocation points to.
Symbol *Target = RelocTargets[I];
if (!Target || isa<DefinedAbsolute>(Target))
continue;
Res->emplace_back(RVA + Rel.VirtualAddress, Ty);
}
}
// MinGW specific.
// Check whether a static relocation of type Type can be deferred and
// handled at runtime as a pseudo relocation (for references to a module
// local variable, which turned out to actually need to be imported from
// another DLL) This returns the size the relocation is supposed to update,
// in bits, or 0 if the relocation cannot be handled as a runtime pseudo
// relocation.
static int getRuntimePseudoRelocSize(uint16_t Type) {
// Relocations that either contain an absolute address, or a plain
// relative offset, since the runtime pseudo reloc implementation
// adds 8/16/32/64 bit values to a memory address.
//
// Given a pseudo relocation entry,
//
// typedef struct {
// DWORD sym;
// DWORD target;
// DWORD flags;
// } runtime_pseudo_reloc_item_v2;
//
// the runtime relocation performs this adjustment:
// *(base + .target) += *(base + .sym) - (base + .sym)
//
// This works for both absolute addresses (IMAGE_REL_*_ADDR32/64,
// IMAGE_REL_I386_DIR32, where the memory location initially contains
// the address of the IAT slot, and for relative addresses (IMAGE_REL*_REL32),
// where the memory location originally contains the relative offset to the
// IAT slot.
//
// This requires the target address to be writable, either directly out of
// the image, or temporarily changed at runtime with VirtualProtect.
// Since this only operates on direct address values, it doesn't work for
// ARM/ARM64 relocations, other than the plain ADDR32/ADDR64 relocations.
switch (Config->Machine) {
case AMD64:
switch (Type) {
case IMAGE_REL_AMD64_ADDR64:
return 64;
case IMAGE_REL_AMD64_ADDR32:
case IMAGE_REL_AMD64_REL32:
case IMAGE_REL_AMD64_REL32_1:
case IMAGE_REL_AMD64_REL32_2:
case IMAGE_REL_AMD64_REL32_3:
case IMAGE_REL_AMD64_REL32_4:
case IMAGE_REL_AMD64_REL32_5:
return 32;
default:
return 0;
}
case I386:
switch (Type) {
case IMAGE_REL_I386_DIR32:
case IMAGE_REL_I386_REL32:
return 32;
default:
return 0;
}
case ARMNT:
switch (Type) {
case IMAGE_REL_ARM_ADDR32:
return 32;
default:
return 0;
}
case ARM64:
switch (Type) {
case IMAGE_REL_ARM64_ADDR64:
return 64;
case IMAGE_REL_ARM64_ADDR32:
return 32;
default:
return 0;
}
default:
llvm_unreachable("unknown machine type");
}
}
// MinGW specific.
// Append information to the provided vector about all relocations that
// need to be handled at runtime as runtime pseudo relocations (references
// to a module local variable, which turned out to actually need to be
// imported from another DLL).
void SectionChunk::getRuntimePseudoRelocs(
std::vector<RuntimePseudoReloc> &Res) {
for (const coff_relocation &Rel : Relocs) {
auto *Target =
dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex));
if (!Target || !Target->IsRuntimePseudoReloc)
continue;
int SizeInBits = getRuntimePseudoRelocSize(Rel.Type);
if (SizeInBits == 0) {
error("unable to automatically import from " + Target->getName() +
" with relocation type " +
File->getCOFFObj()->getRelocationTypeName(Rel.Type) + " in " +
toString(File));
continue;
}
// SizeInBits is used to initialize the Flags field; currently no
// other flags are defined.
Res.emplace_back(
RuntimePseudoReloc(Target, this, Rel.VirtualAddress, SizeInBits));
}
}
bool SectionChunk::hasData() const {
return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA);
}
@ -447,6 +599,13 @@ void SectionChunk::replace(SectionChunk *Other) {
Other->Live = false;
}
uint32_t SectionChunk::getSectionNumber() const {
DataRefImpl R;
R.p = reinterpret_cast<uintptr_t>(Header);
SectionRef S(R, File->getCOFFObj());
return S.getIndex() + 1;
}
CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) {
// Common symbols are aligned on natural boundaries up to 32 bytes.
// This is what MSVC link.exe does.
@ -460,6 +619,7 @@ uint32_t CommonChunk::getOutputCharacteristics() const {
void StringChunk::writeTo(uint8_t *Buf) const {
memcpy(Buf + OutputSectionOff, Str.data(), Str.size());
Buf[OutputSectionOff + Str.size()] = '\0';
}
ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) {
@ -502,13 +662,50 @@ void ImportThunkChunkARM64::writeTo(uint8_t *Buf) const {
applyArm64Ldr(Buf + OutputSectionOff + 4, Off);
}
// A Thumb2, PIC, non-interworking range extension thunk.
const uint8_t ArmThunk[] = {
0x40, 0xf2, 0x00, 0x0c, // P: movw ip,:lower16:S - (P + (L1-P) + 4)
0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P) + 4)
0xe7, 0x44, // L1: add pc, ip
};
size_t RangeExtensionThunkARM::getSize() const {
assert(Config->Machine == ARMNT);
return sizeof(ArmThunk);
}
void RangeExtensionThunkARM::writeTo(uint8_t *Buf) const {
assert(Config->Machine == ARMNT);
uint64_t Offset = Target->getRVA() - RVA - 12;
memcpy(Buf + OutputSectionOff, ArmThunk, sizeof(ArmThunk));
applyMOV32T(Buf + OutputSectionOff, uint32_t(Offset));
}
// A position independent ARM64 adrp+add thunk, with a maximum range of
// +/- 4 GB, which is enough for any PE-COFF.
const uint8_t Arm64Thunk[] = {
0x10, 0x00, 0x00, 0x90, // adrp x16, Dest
0x10, 0x02, 0x00, 0x91, // add x16, x16, :lo12:Dest
0x00, 0x02, 0x1f, 0xd6, // br x16
};
size_t RangeExtensionThunkARM64::getSize() const {
assert(Config->Machine == ARM64);
return sizeof(Arm64Thunk);
}
void RangeExtensionThunkARM64::writeTo(uint8_t *Buf) const {
assert(Config->Machine == ARM64);
memcpy(Buf + OutputSectionOff, Arm64Thunk, sizeof(Arm64Thunk));
applyArm64Addr(Buf + OutputSectionOff + 0, Target->getRVA(), RVA, 12);
applyArm64Imm(Buf + OutputSectionOff + 4, Target->getRVA() & 0xfff, 0);
}
void LocalImportChunk::getBaserels(std::vector<Baserel> *Res) {
Res->emplace_back(getRVA());
}
size_t LocalImportChunk::getSize() const {
return Config->is64() ? 8 : 4;
}
size_t LocalImportChunk::getSize() const { return Config->Wordsize; }
void LocalImportChunk::writeTo(uint8_t *Buf) const {
if (Config->is64()) {
@ -528,6 +725,34 @@ void RVATableChunk::writeTo(uint8_t *Buf) const {
"RVA tables should be de-duplicated");
}
// MinGW specific, for the "automatic import of variables from DLLs" feature.
size_t PseudoRelocTableChunk::getSize() const {
if (Relocs.empty())
return 0;
return 12 + 12 * Relocs.size();
}
// MinGW specific.
void PseudoRelocTableChunk::writeTo(uint8_t *Buf) const {
if (Relocs.empty())
return;
ulittle32_t *Table = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff);
// This is the list header, to signal the runtime pseudo relocation v2
// format.
Table[0] = 0;
Table[1] = 0;
Table[2] = 1;
size_t Idx = 3;
for (const RuntimePseudoReloc &RPR : Relocs) {
Table[Idx + 0] = RPR.Sym->getRVA();
Table[Idx + 1] = RPR.Target->getRVA() + RPR.TargetOffset;
Table[Idx + 2] = RPR.Flags;
Idx += 3;
}
}
// Windows-specific. This class represents a block in .reloc section.
// The format is described here.
//
@ -613,13 +838,16 @@ void MergeChunk::addSection(SectionChunk *C) {
}
void MergeChunk::finalizeContents() {
for (SectionChunk *C : Sections)
if (C->isLive())
Builder.add(toStringRef(C->getContents()));
Builder.finalize();
if (!Finalized) {
for (SectionChunk *C : Sections)
if (C->Live)
Builder.add(toStringRef(C->getContents()));
Builder.finalize();
Finalized = true;
}
for (SectionChunk *C : Sections) {
if (!C->isLive())
if (!C->Live)
continue;
size_t Off = Builder.getOffset(toStringRef(C->getContents()));
C->setOutputSection(Out);
@ -640,5 +868,16 @@ void MergeChunk::writeTo(uint8_t *Buf) const {
Builder.write(Buf + OutputSectionOff);
}
// MinGW specific.
size_t AbsolutePointerChunk::getSize() const { return Config->Wordsize; }
void AbsolutePointerChunk::writeTo(uint8_t *Buf) const {
if (Config->is64()) {
write64le(Buf + OutputSectionOff, Value);
} else {
write32le(Buf + OutputSectionOff, Value);
}
}
} // namespace coff
} // namespace lld

130
deps/lld/COFF/Chunks.h vendored
View File

@ -36,6 +36,7 @@ class DefinedImportData;
class DefinedRegular;
class ObjFile;
class OutputSection;
class RuntimePseudoReloc;
class Symbol;
// Mask for permissions (discardable, writable, readable, executable, etc).
@ -63,6 +64,13 @@ public:
// before calling this function.
virtual void writeTo(uint8_t *Buf) const {}
// Called by the writer once before assigning addresses and writing
// the output.
virtual void readRelocTargets() {}
// Called if restarting thunk addition.
virtual void resetRelocTargets() {}
// Called by the writer after an RVA is assigned, but before calling
// getSize().
virtual void finalizeContents() {}
@ -114,6 +122,10 @@ protected:
public:
// The offset from beginning of the output section. The writer sets a value.
uint64_t OutputSectionOff = 0;
// Whether this section needs to be kept distinct from other sections during
// ICF. This is set by the driver using address-significance tables.
bool KeepUnique = false;
};
// A chunk corresponding a section of an input file.
@ -140,6 +152,8 @@ public:
SectionChunk(ObjFile *File, const coff_section *Header);
static bool classof(const Chunk *C) { return C->kind() == SectionKind; }
void readRelocTargets() override;
void resetRelocTargets() override;
size_t getSize() const override { return Header->SizeOfRawData; }
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *Buf) const override;
@ -157,6 +171,8 @@ public:
void applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S,
uint64_t P) const;
void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &Res);
// Called if the garbage collector decides to not include this chunk
// in a final output. It's supposed to print out a log message to stdout.
void printDiscardedMessage() const;
@ -167,16 +183,6 @@ public:
StringRef getDebugName() override;
// Returns true if the chunk was not dropped by GC.
bool isLive() { return Live; }
// Used by the garbage collector.
void markLive() {
assert(Config->DoGC && "should only mark things live from GC");
assert(!isLive() && "Cannot mark an already live section!");
Live = true;
}
// True if this is a codeview debug info chunk. These will not be laid out in
// the image. Instead they will end up in the PDB, if one is requested.
bool isCodeView() const {
@ -197,10 +203,13 @@ public:
// Allow iteration over the associated child chunks for this section.
ArrayRef<SectionChunk *> children() const { return AssocChildren; }
// The section ID this chunk belongs to in its Obj.
uint32_t getSectionNumber() const;
// A pointer pointing to a replacement for this chunk.
// Initially it points to "this" object. If this chunk is merged
// with other chunk by ICF, it points to another chunk,
// and this chunk is considrered as dead.
// and this chunk is considered as dead.
SectionChunk *Repl;
// The CRC of the contents as described in the COFF spec 4.5.5.
@ -217,13 +226,17 @@ public:
ArrayRef<coff_relocation> Relocs;
// Used by the garbage collector.
bool Live;
// When inserting a thunk, we need to adjust a relocation to point to
// the thunk instead of the actual original target Symbol.
std::vector<Symbol *> RelocTargets;
private:
StringRef SectionName;
std::vector<SectionChunk *> AssocChildren;
// Used by the garbage collector.
bool Live;
// Used for ICF (Identical COMDAT Folding)
void replace(SectionChunk *Other);
uint32_t Class[2] = {0, 0};
@ -254,6 +267,7 @@ public:
private:
llvm::StringTableBuilder Builder;
bool Finalized = false;
};
// A chunk for common symbols. Common chunks don't have actual data.
@ -297,7 +311,7 @@ static const uint8_t ImportThunkARM64[] = {
};
// Windows-specific.
// A chunk for DLL import jump table entry. In a final output, it's
// A chunk for DLL import jump table entry. In a final output, its
// contents will be a JMP instruction to some __imp_ symbol.
class ImportThunkChunkX64 : public Chunk {
public:
@ -341,11 +355,31 @@ private:
Defined *ImpSymbol;
};
class RangeExtensionThunkARM : public Chunk {
public:
explicit RangeExtensionThunkARM(Defined *T) : Target(T) {}
size_t getSize() const override;
void writeTo(uint8_t *Buf) const override;
Defined *Target;
};
class RangeExtensionThunkARM64 : public Chunk {
public:
explicit RangeExtensionThunkARM64(Defined *T) : Target(T) {}
size_t getSize() const override;
void writeTo(uint8_t *Buf) const override;
Defined *Target;
};
// Windows-specific.
// See comments for DefinedLocalImport class.
class LocalImportChunk : public Chunk {
public:
explicit LocalImportChunk(Defined *S) : Sym(S) {}
explicit LocalImportChunk(Defined *S) : Sym(S) {
Alignment = Config->Wordsize;
}
size_t getSize() const override;
void getBaserels(std::vector<Baserel> *Res) override;
void writeTo(uint8_t *Buf) const override;
@ -414,9 +448,73 @@ public:
uint8_t Type;
};
// This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
// specific place in a section, without any data. This is used for the MinGW
// specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
// of an empty chunk isn't MinGW specific.
class EmptyChunk : public Chunk {
public:
EmptyChunk() {}
size_t getSize() const override { return 0; }
void writeTo(uint8_t *Buf) const override {}
};
// MinGW specific, for the "automatic import of variables from DLLs" feature.
// This provides the table of runtime pseudo relocations, for variable
// references that turned out to need to be imported from a DLL even though
// the reference didn't use the dllimport attribute. The MinGW runtime will
// process this table after loading, before handling control over to user
// code.
class PseudoRelocTableChunk : public Chunk {
public:
PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &Relocs)
: Relocs(std::move(Relocs)) {
Alignment = 4;
}
size_t getSize() const override;
void writeTo(uint8_t *Buf) const override;
private:
std::vector<RuntimePseudoReloc> Relocs;
};
// MinGW specific; information about one individual location in the image
// that needs to be fixed up at runtime after loading. This represents
// one individual element in the PseudoRelocTableChunk table.
class RuntimePseudoReloc {
public:
RuntimePseudoReloc(Defined *Sym, SectionChunk *Target, uint32_t TargetOffset,
int Flags)
: Sym(Sym), Target(Target), TargetOffset(TargetOffset), Flags(Flags) {}
Defined *Sym;
SectionChunk *Target;
uint32_t TargetOffset;
// The Flags field contains the size of the relocation, in bits. No other
// flags are currently defined.
int Flags;
};
// MinGW specific. A Chunk that contains one pointer-sized absolute value.
class AbsolutePointerChunk : public Chunk {
public:
AbsolutePointerChunk(uint64_t Value) : Value(Value) {
Alignment = getSize();
}
size_t getSize() const override;
void writeTo(uint8_t *Buf) const override;
private:
uint64_t Value;
};
void applyMOV32T(uint8_t *Off, uint32_t V);
void applyBranch24T(uint8_t *Off, int32_t V);
void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift);
void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit);
void applyArm64Branch26(uint8_t *Off, int64_t V);
} // namespace coff
} // namespace lld

View File

@ -84,6 +84,7 @@ struct Configuration {
bool is64() { return Machine == AMD64 || Machine == ARM64; }
llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
size_t Wordsize;
bool Verbose = false;
WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN;
Symbol *Entry = nullptr;
@ -94,7 +95,8 @@ struct Configuration {
bool DoICF = true;
bool TailMerge;
bool Relocatable = true;
bool Force = false;
bool ForceMultiple = false;
bool ForceUnresolved = false;
bool Debug = false;
bool DebugDwarf = false;
bool DebugGHashes = false;
@ -195,6 +197,7 @@ struct Configuration {
bool MinGW = false;
bool WarnMissingOrderSymbol = true;
bool WarnLocallyDefinedImported = true;
bool WarnDebugInfoUnusable = true;
bool Incremental = true;
bool IntegrityCheck = false;
bool KillAt = false;

118
deps/lld/COFF/DLL.cpp vendored
View File

@ -35,8 +35,6 @@ namespace {
// Import table
static int ptrSize() { return Config->is64() ? 8 : 4; }
// A chunk for the import descriptor table.
class HintNameChunk : public Chunk {
public:
@ -49,6 +47,7 @@ public:
}
void writeTo(uint8_t *Buf) const override {
memset(Buf + OutputSectionOff, 0, getSize());
write16le(Buf + OutputSectionOff, Hint);
memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size());
}
@ -61,11 +60,14 @@ private:
// A chunk for the import descriptor table.
class LookupChunk : public Chunk {
public:
explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = ptrSize(); }
size_t getSize() const override { return ptrSize(); }
explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = Config->Wordsize; }
size_t getSize() const override { return Config->Wordsize; }
void writeTo(uint8_t *Buf) const override {
write32le(Buf + OutputSectionOff, HintName->getRVA());
if (Config->is64())
write64le(Buf + OutputSectionOff, HintName->getRVA());
else
write32le(Buf + OutputSectionOff, HintName->getRVA());
}
Chunk *HintName;
@ -76,8 +78,10 @@ public:
// See Microsoft PE/COFF spec 7.1. Import Header for details.
class OrdinalOnlyChunk : public Chunk {
public:
explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) { Alignment = ptrSize(); }
size_t getSize() const override { return ptrSize(); }
explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {
Alignment = Config->Wordsize;
}
size_t getSize() const override { return Config->Wordsize; }
void writeTo(uint8_t *Buf) const override {
// An import-by-ordinal slot has MSB 1 to indicate that
@ -99,6 +103,8 @@ public:
size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); }
void writeTo(uint8_t *Buf) const override {
memset(Buf + OutputSectionOff, 0, getSize());
auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff);
E->ImportLookupTableRVA = LookupTab->getRVA();
E->NameRVA = DLLName->getRVA();
@ -118,6 +124,10 @@ public:
bool hasData() const override { return false; }
size_t getSize() const override { return Size; }
void writeTo(uint8_t *Buf) const override {
memset(Buf + OutputSectionOff, 0, Size);
}
private:
size_t Size;
};
@ -160,6 +170,8 @@ public:
}
void writeTo(uint8_t *Buf) const override {
memset(Buf + OutputSectionOff, 0, getSize());
auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff);
E->Attributes = 1;
E->Name = DLLName->getRVA();
@ -230,6 +242,36 @@ static const uint8_t ThunkARM[] = {
0x60, 0x47, // bx ip
};
static const uint8_t ThunkARM64[] = {
0x11, 0x00, 0x00, 0x90, // adrp x17, #0 __imp_<FUNCNAME>
0x31, 0x02, 0x00, 0x91, // add x17, x17, #0 :lo12:__imp_<FUNCNAME>
0xfd, 0x7b, 0xb3, 0xa9, // stp x29, x30, [sp, #-208]!
0xfd, 0x03, 0x00, 0x91, // mov x29, sp
0xe0, 0x07, 0x01, 0xa9, // stp x0, x1, [sp, #16]
0xe2, 0x0f, 0x02, 0xa9, // stp x2, x3, [sp, #32]
0xe4, 0x17, 0x03, 0xa9, // stp x4, x5, [sp, #48]
0xe6, 0x1f, 0x04, 0xa9, // stp x6, x7, [sp, #64]
0xe0, 0x87, 0x02, 0xad, // stp q0, q1, [sp, #80]
0xe2, 0x8f, 0x03, 0xad, // stp q2, q3, [sp, #112]
0xe4, 0x97, 0x04, 0xad, // stp q4, q5, [sp, #144]
0xe6, 0x9f, 0x05, 0xad, // stp q6, q7, [sp, #176]
0xe1, 0x03, 0x11, 0xaa, // mov x1, x17
0x00, 0x00, 0x00, 0x90, // adrp x0, #0 DELAY_IMPORT_DESCRIPTOR
0x00, 0x00, 0x00, 0x91, // add x0, x0, #0 :lo12:DELAY_IMPORT_DESCRIPTOR
0x00, 0x00, 0x00, 0x94, // bl #0 __delayLoadHelper2
0xf0, 0x03, 0x00, 0xaa, // mov x16, x0
0xe6, 0x9f, 0x45, 0xad, // ldp q6, q7, [sp, #176]
0xe4, 0x97, 0x44, 0xad, // ldp q4, q5, [sp, #144]
0xe2, 0x8f, 0x43, 0xad, // ldp q2, q3, [sp, #112]
0xe0, 0x87, 0x42, 0xad, // ldp q0, q1, [sp, #80]
0xe6, 0x1f, 0x44, 0xa9, // ldp x6, x7, [sp, #64]
0xe4, 0x17, 0x43, 0xa9, // ldp x4, x5, [sp, #48]
0xe2, 0x0f, 0x42, 0xa9, // ldp x2, x3, [sp, #32]
0xe0, 0x07, 0x41, 0xa9, // ldp x0, x1, [sp, #16]
0xfd, 0x7b, 0xcd, 0xa8, // ldp x29, x30, [sp], #208
0x00, 0x02, 0x1f, 0xd6, // br x16
};
// A chunk for the delay import thunk.
class ThunkChunkX64 : public Chunk {
public:
@ -298,11 +340,35 @@ public:
Defined *Helper = nullptr;
};
class ThunkChunkARM64 : public Chunk {
public:
ThunkChunkARM64(Defined *I, Chunk *D, Defined *H)
: Imp(I), Desc(D), Helper(H) {}
size_t getSize() const override { return sizeof(ThunkARM64); }
void writeTo(uint8_t *Buf) const override {
memcpy(Buf + OutputSectionOff, ThunkARM64, sizeof(ThunkARM64));
applyArm64Addr(Buf + OutputSectionOff + 0, Imp->getRVA(), RVA + 0, 12);
applyArm64Imm(Buf + OutputSectionOff + 4, Imp->getRVA() & 0xfff, 0);
applyArm64Addr(Buf + OutputSectionOff + 52, Desc->getRVA(), RVA + 52, 12);
applyArm64Imm(Buf + OutputSectionOff + 56, Desc->getRVA() & 0xfff, 0);
applyArm64Branch26(Buf + OutputSectionOff + 60,
Helper->getRVA() - RVA - 60);
}
Defined *Imp = nullptr;
Chunk *Desc = nullptr;
Defined *Helper = nullptr;
};
// A chunk for the import descriptor table.
class DelayAddressChunk : public Chunk {
public:
explicit DelayAddressChunk(Chunk *C) : Thunk(C) { Alignment = ptrSize(); }
size_t getSize() const override { return ptrSize(); }
explicit DelayAddressChunk(Chunk *C) : Thunk(C) {
Alignment = Config->Wordsize;
}
size_t getSize() const override { return Config->Wordsize; }
void writeTo(uint8_t *Buf) const override {
if (Config->is64()) {
@ -338,6 +404,8 @@ public:
}
void writeTo(uint8_t *Buf) const override {
memset(Buf + OutputSectionOff, 0, getSize());
auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff);
E->NameRVA = DLLName->getRVA();
E->OrdinalBase = 0;
@ -362,6 +430,8 @@ public:
size_t getSize() const override { return Size * 4; }
void writeTo(uint8_t *Buf) const override {
memset(Buf + OutputSectionOff, 0, getSize());
for (const Export &E : Config->Exports) {
uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4;
uint32_t Bit = 0;
@ -418,30 +488,6 @@ private:
} // anonymous namespace
uint64_t IdataContents::getDirSize() {
return Dirs.size() * sizeof(ImportDirectoryTableEntry);
}
uint64_t IdataContents::getIATSize() {
return Addresses.size() * ptrSize();
}
// Returns a list of .idata contents.
// See Microsoft PE/COFF spec 5.4 for details.
std::vector<Chunk *> IdataContents::getChunks() {
create();
// The loader assumes a specific order of data.
// Add each type in the correct order.
std::vector<Chunk *> V;
V.insert(V.end(), Dirs.begin(), Dirs.end());
V.insert(V.end(), Lookups.begin(), Lookups.end());
V.insert(V.end(), Addresses.begin(), Addresses.end());
V.insert(V.end(), Hints.begin(), Hints.end());
V.insert(V.end(), DLLNames.begin(), DLLNames.end());
return V;
}
void IdataContents::create() {
std::vector<std::vector<DefinedImportData *>> V = binImports(Imports);
@ -465,8 +511,8 @@ void IdataContents::create() {
Hints.push_back(C);
}
// Terminate with null values.
Lookups.push_back(make<NullChunk>(ptrSize()));
Addresses.push_back(make<NullChunk>(ptrSize()));
Lookups.push_back(make<NullChunk>(Config->Wordsize));
Addresses.push_back(make<NullChunk>(Config->Wordsize));
for (int I = 0, E = Syms.size(); I < E; ++I)
Syms[I]->setLocation(Addresses[Base + I]);
@ -555,6 +601,8 @@ Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) {
return make<ThunkChunkX86>(S, Dir, Helper);
case ARMNT:
return make<ThunkChunkARM>(S, Dir, Helper);
case ARM64:
return make<ThunkChunkARM64>(S, Dir, Helper);
default:
llvm_unreachable("unsupported machine type");
}

9
deps/lld/COFF/DLL.h vendored
View File

@ -19,19 +19,12 @@ namespace coff {
// Windows-specific.
// IdataContents creates all chunks for the DLL import table.
// You are supposed to call add() to add symbols and then
// call getChunks() to get a list of chunks.
// call create() to populate the chunk vectors.
class IdataContents {
public:
void add(DefinedImportData *Sym) { Imports.push_back(Sym); }
bool empty() { return Imports.empty(); }
std::vector<Chunk *> getChunks();
uint64_t getDirRVA() { return Dirs[0]->getRVA(); }
uint64_t getDirSize();
uint64_t getIATRVA() { return Addresses[0]->getRVA(); }
uint64_t getIATSize();
private:
void create();
std::vector<DefinedImportData *> Imports;

View File

@ -32,6 +32,7 @@
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/TarWriter.h"
@ -56,7 +57,7 @@ Configuration *Config;
LinkerDriver *Driver;
bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) {
errorHandler().LogName = sys::path::filename(Args[0]);
errorHandler().LogName = args::getFilenameWithoutExe(Args[0]);
errorHandler().ErrorOS = &Diag;
errorHandler().ColorDiagnostics = Diag.has_colors();
errorHandler().ErrorLimitExceededMsg =
@ -370,13 +371,30 @@ Optional<StringRef> LinkerDriver::findFile(StringRef Filename) {
return Path;
}
// MinGW specific. If an embedded directive specified to link to
// foo.lib, but it isn't found, try libfoo.a instead.
StringRef LinkerDriver::doFindLibMinGW(StringRef Filename) {
if (Filename.contains('/') || Filename.contains('\\'))
return Filename;
SmallString<128> S = Filename;
sys::path::replace_extension(S, ".a");
StringRef LibName = Saver.save("lib" + S.str());
return doFindFile(LibName);
}
// Find library file from search path.
StringRef LinkerDriver::doFindLib(StringRef Filename) {
// Add ".lib" to Filename if that has no file extension.
bool HasExt = Filename.contains('.');
if (!HasExt)
Filename = Saver.save(Filename + ".lib");
return doFindFile(Filename);
StringRef Ret = doFindFile(Filename);
// For MinGW, if the find above didn't turn up anything, try
// looking for a MinGW formatted library name.
if (Config->MinGW && Ret == Filename)
return doFindLibMinGW(Filename);
return Ret;
}
// Resolves a library path. /nodefaultlib options are taken into
@ -429,29 +447,48 @@ StringRef LinkerDriver::findDefaultEntry() {
assert(Config->Subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
"must handle /subsystem before calling this");
// As a special case, if /nodefaultlib is given, we directly look for an
// entry point. This is because, if no default library is linked, users
// need to define an entry point instead of a "main".
bool FindMain = !Config->NoDefaultLibAll;
if (Config->MinGW)
return mangle(Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
? "WinMainCRTStartup"
: "mainCRTStartup");
if (Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
if (findUnderscoreMangle(FindMain ? "WinMain" : "WinMainCRTStartup"))
return mangle("WinMainCRTStartup");
if (findUnderscoreMangle(FindMain ? "wWinMain" : "wWinMainCRTStartup"))
return mangle("wWinMainCRTStartup");
if (findUnderscoreMangle("wWinMain")) {
if (!findUnderscoreMangle("WinMain"))
return mangle("wWinMainCRTStartup");
warn("found both wWinMain and WinMain; using latter");
}
return mangle("WinMainCRTStartup");
}
if (findUnderscoreMangle(FindMain ? "main" : "mainCRTStartup"))
return mangle("mainCRTStartup");
if (findUnderscoreMangle(FindMain ? "wmain" : "wmainCRTStartup"))
return mangle("wmainCRTStartup");
return "";
if (findUnderscoreMangle("wmain")) {
if (!findUnderscoreMangle("main"))
return mangle("wmainCRTStartup");
warn("found both wmain and main; using latter");
}
return mangle("mainCRTStartup");
}
WindowsSubsystem LinkerDriver::inferSubsystem() {
if (Config->DLL)
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
if (findUnderscoreMangle("main") || findUnderscoreMangle("wmain"))
if (Config->MinGW)
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
if (findUnderscoreMangle("WinMain") || findUnderscoreMangle("wWinMain"))
// Note that link.exe infers the subsystem from the presence of these
// functions even if /entry: or /nodefaultlib are passed which causes them
// to not be called.
bool HaveMain = findUnderscoreMangle("main");
bool HaveWMain = findUnderscoreMangle("wmain");
bool HaveWinMain = findUnderscoreMangle("WinMain");
bool HaveWWinMain = findUnderscoreMangle("wWinMain");
if (HaveMain || HaveWMain) {
if (HaveWinMain || HaveWWinMain) {
warn(std::string("found ") + (HaveMain ? "main" : "wmain") + " and " +
(HaveWinMain ? "WinMain" : "wWinMain") +
"; defaulting to /subsystem:console");
}
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
}
if (HaveWinMain || HaveWWinMain)
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
return IMAGE_SUBSYSTEM_UNKNOWN;
}
@ -497,26 +534,65 @@ static std::string createResponseFile(const opt::InputArgList &Args,
return Data.str();
}
static unsigned getDefaultDebugType(const opt::InputArgList &Args) {
unsigned DebugTypes = static_cast<unsigned>(DebugType::CV);
enum class DebugKind { Unknown, None, Full, FastLink, GHash, Dwarf, Symtab };
static DebugKind parseDebugKind(const opt::InputArgList &Args) {
auto *A = Args.getLastArg(OPT_debug, OPT_debug_opt);
if (!A)
return DebugKind::None;
if (A->getNumValues() == 0)
return DebugKind::Full;
DebugKind Debug = StringSwitch<DebugKind>(A->getValue())
.CaseLower("none", DebugKind::None)
.CaseLower("full", DebugKind::Full)
.CaseLower("fastlink", DebugKind::FastLink)
// LLD extensions
.CaseLower("ghash", DebugKind::GHash)
.CaseLower("dwarf", DebugKind::Dwarf)
.CaseLower("symtab", DebugKind::Symtab)
.Default(DebugKind::Unknown);
if (Debug == DebugKind::FastLink) {
warn("/debug:fastlink unsupported; using /debug:full");
return DebugKind::Full;
}
if (Debug == DebugKind::Unknown) {
error("/debug: unknown option: " + Twine(A->getValue()));
return DebugKind::None;
}
return Debug;
}
static unsigned parseDebugTypes(const opt::InputArgList &Args) {
unsigned DebugTypes = static_cast<unsigned>(DebugType::None);
if (auto *A = Args.getLastArg(OPT_debugtype)) {
SmallVector<StringRef, 3> Types;
A->getSpelling().split(Types, ',', /*KeepEmpty=*/false);
for (StringRef Type : Types) {
unsigned V = StringSwitch<unsigned>(Type.lower())
.Case("cv", static_cast<unsigned>(DebugType::CV))
.Case("pdata", static_cast<unsigned>(DebugType::PData))
.Case("fixup", static_cast<unsigned>(DebugType::Fixup))
.Default(0);
if (V == 0) {
warn("/debugtype: unknown option: " + Twine(A->getValue()));
continue;
}
DebugTypes |= V;
}
return DebugTypes;
}
// Default debug types
DebugTypes = static_cast<unsigned>(DebugType::CV);
if (Args.hasArg(OPT_driver))
DebugTypes |= static_cast<unsigned>(DebugType::PData);
if (Args.hasArg(OPT_profile))
DebugTypes |= static_cast<unsigned>(DebugType::Fixup);
return DebugTypes;
}
static unsigned parseDebugType(StringRef Arg) {
SmallVector<StringRef, 3> Types;
Arg.split(Types, ',', /*KeepEmpty=*/false);
unsigned DebugTypes = static_cast<unsigned>(DebugType::None);
for (StringRef Type : Types)
DebugTypes |= StringSwitch<unsigned>(Type.lower())
.Case("cv", static_cast<unsigned>(DebugType::CV))
.Case("pdata", static_cast<unsigned>(DebugType::PData))
.Case("fixup", static_cast<unsigned>(DebugType::Fixup))
.Default(0);
return DebugTypes;
}
@ -676,131 +752,6 @@ static void parseModuleDefs(StringRef Path) {
}
}
// A helper function for filterBitcodeFiles.
static bool needsRebuilding(MemoryBufferRef MB) {
// The MSVC linker doesn't support thin archives, so if it's a thin
// archive, we always need to rebuild it.
std::unique_ptr<Archive> File =
CHECK(Archive::create(MB), "Failed to read " + MB.getBufferIdentifier());
if (File->isThin())
return true;
// Returns true if the archive contains at least one bitcode file.
for (MemoryBufferRef Member : getArchiveMembers(File.get()))
if (identify_magic(Member.getBuffer()) == file_magic::bitcode)
return true;
return false;
}
// Opens a given path as an archive file and removes bitcode files
// from them if exists. This function is to appease the MSVC linker as
// their linker doesn't like archive files containing non-native
// object files.
//
// If a given archive doesn't contain bitcode files, the archive path
// is returned as-is. Otherwise, a new temporary file is created and
// its path is returned.
static Optional<std::string>
filterBitcodeFiles(StringRef Path, std::vector<std::string> &TemporaryFiles) {
std::unique_ptr<MemoryBuffer> MB = CHECK(
MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path);
MemoryBufferRef MBRef = MB->getMemBufferRef();
file_magic Magic = identify_magic(MBRef.getBuffer());
if (Magic == file_magic::bitcode)
return None;
if (Magic != file_magic::archive)
return Path.str();
if (!needsRebuilding(MBRef))
return Path.str();
std::unique_ptr<Archive> File =
CHECK(Archive::create(MBRef),
MBRef.getBufferIdentifier() + ": failed to parse archive");
std::vector<NewArchiveMember> New;
for (MemoryBufferRef Member : getArchiveMembers(File.get()))
if (identify_magic(Member.getBuffer()) != file_magic::bitcode)
New.emplace_back(Member);
if (New.empty())
return None;
log("Creating a temporary archive for " + Path + " to remove bitcode files");
SmallString<128> S;
if (std::error_code EC = sys::fs::createTemporaryFile(
"lld-" + sys::path::stem(Path), ".lib", S))
fatal("cannot create a temporary file: " + EC.message());
std::string Temp = S.str();
TemporaryFiles.push_back(Temp);
Error E =
llvm::writeArchive(Temp, New, /*WriteSymtab=*/true, Archive::Kind::K_GNU,
/*Deterministics=*/true,
/*Thin=*/false);
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
error("failed to create a new archive " + S.str() + ": " + EI.message());
});
return Temp;
}
// Create response file contents and invoke the MSVC linker.
void LinkerDriver::invokeMSVC(opt::InputArgList &Args) {
std::string Rsp = "/nologo\n";
std::vector<std::string> Temps;
// Write out archive members that we used in symbol resolution and pass these
// to MSVC before any archives, so that MSVC uses the same objects to satisfy
// references.
for (ObjFile *Obj : ObjFile::Instances) {
if (Obj->ParentName.empty())
continue;
SmallString<128> S;
int Fd;
if (auto EC = sys::fs::createTemporaryFile(
"lld-" + sys::path::filename(Obj->ParentName), ".obj", Fd, S))
fatal("cannot create a temporary file: " + EC.message());
raw_fd_ostream OS(Fd, /*shouldClose*/ true);
OS << Obj->MB.getBuffer();
Temps.push_back(S.str());
Rsp += quote(S) + "\n";
}
for (auto *Arg : Args) {
switch (Arg->getOption().getID()) {
case OPT_linkrepro:
case OPT_lldmap:
case OPT_lldmap_file:
case OPT_lldsavetemps:
case OPT_msvclto:
// LLD-specific options are stripped.
break;
case OPT_opt:
if (!StringRef(Arg->getValue()).startswith("lld"))
Rsp += toString(*Arg) + " ";
break;
case OPT_INPUT: {
if (Optional<StringRef> Path = doFindFile(Arg->getValue())) {
if (Optional<std::string> S = filterBitcodeFiles(*Path, Temps))
Rsp += quote(*S) + "\n";
continue;
}
Rsp += quote(Arg->getValue()) + "\n";
break;
}
default:
Rsp += toString(*Arg) + "\n";
}
}
std::vector<StringRef> ObjFiles = Symtab->compileBitcodeFiles();
runMSVCLinker(Rsp, ObjFiles);
for (StringRef Path : Temps)
sys::fs::remove(Path);
}
void LinkerDriver::enqueueTask(std::function<void()> Task) {
TaskQueue.push_back(std::move(Task));
}
@ -856,6 +807,97 @@ static void parseOrderFile(StringRef Arg) {
}
}
static void markAddrsig(Symbol *S) {
if (auto *D = dyn_cast_or_null<Defined>(S))
if (Chunk *C = D->getChunk())
C->KeepUnique = true;
}
static void findKeepUniqueSections() {
// Exported symbols could be address-significant in other executables or DSOs,
// so we conservatively mark them as address-significant.
for (Export &R : Config->Exports)
markAddrsig(R.Sym);
// Visit the address-significance table in each object file and mark each
// referenced symbol as address-significant.
for (ObjFile *Obj : ObjFile::Instances) {
ArrayRef<Symbol *> Syms = Obj->getSymbols();
if (Obj->AddrsigSec) {
ArrayRef<uint8_t> Contents;
Obj->getCOFFObj()->getSectionContents(Obj->AddrsigSec, Contents);
const uint8_t *Cur = Contents.begin();
while (Cur != Contents.end()) {
unsigned Size;
const char *Err;
uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
if (Err)
fatal(toString(Obj) + ": could not decode addrsig section: " + Err);
if (SymIndex >= Syms.size())
fatal(toString(Obj) + ": invalid symbol index in addrsig section");
markAddrsig(Syms[SymIndex]);
Cur += Size;
}
} else {
// If an object file does not have an address-significance table,
// conservatively mark all of its symbols as address-significant.
for (Symbol *S : Syms)
markAddrsig(S);
}
}
}
// link.exe replaces each %foo% in AltPath with the contents of environment
// variable foo, and adds the two magic env vars _PDB (expands to the basename
// of pdb's output path) and _EXT (expands to the extension of the output
// binary).
// lld only supports %_PDB% and %_EXT% and warns on references to all other env
// vars.
static void parsePDBAltPath(StringRef AltPath) {
SmallString<128> Buf;
StringRef PDBBasename =
sys::path::filename(Config->PDBPath, sys::path::Style::windows);
StringRef BinaryExtension =
sys::path::extension(Config->OutputFile, sys::path::Style::windows);
if (!BinaryExtension.empty())
BinaryExtension = BinaryExtension.substr(1); // %_EXT% does not include '.'.
// Invariant:
// +--------- Cursor ('a...' might be the empty string).
// | +----- FirstMark
// | | +- SecondMark
// v v v
// a...%...%...
size_t Cursor = 0;
while (Cursor < AltPath.size()) {
size_t FirstMark, SecondMark;
if ((FirstMark = AltPath.find('%', Cursor)) == StringRef::npos ||
(SecondMark = AltPath.find('%', FirstMark + 1)) == StringRef::npos) {
// Didn't find another full fragment, treat rest of string as literal.
Buf.append(AltPath.substr(Cursor));
break;
}
// Found a full fragment. Append text in front of first %, and interpret
// text between first and second % as variable name.
Buf.append(AltPath.substr(Cursor, FirstMark - Cursor));
StringRef Var = AltPath.substr(FirstMark, SecondMark - FirstMark + 1);
if (Var.equals_lower("%_pdb%"))
Buf.append(PDBBasename);
else if (Var.equals_lower("%_ext%"))
Buf.append(BinaryExtension);
else {
warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " +
Var + " as literal");
Buf.append(Var);
}
Cursor = SecondMark + 1;
}
Config->PDBAltPath = Buf;
}
void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// If the first command line argument is "/lib", link.exe acts like lib.exe.
// We call our own implementation of lib.exe that understands bitcode files.
@ -944,11 +986,17 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Handle /ignore
for (auto *Arg : Args.filtered(OPT_ignore)) {
if (StringRef(Arg->getValue()) == "4037")
Config->WarnMissingOrderSymbol = false;
else if (StringRef(Arg->getValue()) == "4217")
Config->WarnLocallyDefinedImported = false;
// Other warning numbers are ignored.
SmallVector<StringRef, 8> Vec;
StringRef(Arg->getValue()).split(Vec, ',');
for (StringRef S : Vec) {
if (S == "4037")
Config->WarnMissingOrderSymbol = false;
else if (S == "4099")
Config->WarnDebugInfoUnusable = false;
else if (S == "4217")
Config->WarnLocallyDefinedImported = false;
// Other warning numbers are ignored.
}
}
// Handle /out
@ -962,20 +1010,26 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Handle /force or /force:unresolved
if (Args.hasArg(OPT_force, OPT_force_unresolved))
Config->Force = true;
Config->ForceUnresolved = true;
// Handle /force or /force:multiple
if (Args.hasArg(OPT_force, OPT_force_multiple))
Config->ForceMultiple = true;
// Handle /debug
if (Args.hasArg(OPT_debug, OPT_debug_dwarf, OPT_debug_ghash)) {
DebugKind Debug = parseDebugKind(Args);
if (Debug == DebugKind::Full || Debug == DebugKind::Dwarf ||
Debug == DebugKind::GHash) {
Config->Debug = true;
Config->Incremental = true;
if (auto *Arg = Args.getLastArg(OPT_debugtype))
Config->DebugTypes = parseDebugType(Arg->getValue());
else
Config->DebugTypes = getDefaultDebugType(Args);
}
// Handle /debugtype
Config->DebugTypes = parseDebugTypes(Args);
// Handle /pdb
bool ShouldCreatePDB = Args.hasArg(OPT_debug, OPT_debug_ghash);
bool ShouldCreatePDB =
(Debug == DebugKind::Full || Debug == DebugKind::GHash);
if (ShouldCreatePDB) {
if (auto *Arg = Args.getLastArg(OPT_pdb))
Config->PDBPath = Arg->getValue();
@ -1096,7 +1150,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
Config->Implib = Arg->getValue();
// Handle /opt.
bool DoGC = !Args.hasArg(OPT_debug) || Args.hasArg(OPT_profile);
bool DoGC = Debug == DebugKind::None || Args.hasArg(OPT_profile);
unsigned ICFLevel =
Args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on
unsigned TailMerge = 1;
@ -1181,6 +1235,12 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
parseMerge(".xdata=.rdata");
parseMerge(".bss=.data");
if (Config->MinGW) {
parseMerge(".ctors=.rdata");
parseMerge(".dtors=.rdata");
parseMerge(".CRT=.rdata");
}
// Handle /section
for (auto *Arg : Args.filtered(OPT_section))
parseSection(Arg->getValue());
@ -1234,9 +1294,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
Config->NxCompat = Args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true);
Config->TerminalServerAware =
!Config->DLL && Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true);
Config->DebugDwarf = Args.hasArg(OPT_debug_dwarf);
Config->DebugGHashes = Args.hasArg(OPT_debug_ghash);
Config->DebugSymtab = Args.hasArg(OPT_debug_symtab);
Config->DebugDwarf = Debug == DebugKind::Dwarf;
Config->DebugGHashes = Debug == DebugKind::GHash;
Config->DebugSymtab = Debug == DebugKind::Symtab;
Config->MapFile = getMapFile(Args);
@ -1266,10 +1326,14 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
return;
std::set<sys::fs::UniqueID> WholeArchives;
for (auto *Arg : Args.filtered(OPT_wholearchive_file))
if (Optional<StringRef> Path = doFindFile(Arg->getValue()))
AutoExporter Exporter;
for (auto *Arg : Args.filtered(OPT_wholearchive_file)) {
if (Optional<StringRef> Path = doFindFile(Arg->getValue())) {
if (Optional<sys::fs::UniqueID> ID = getUniqueID(*Path))
WholeArchives.insert(*ID);
Exporter.addWholeArchive(*Path);
}
}
// A predicate returning true if a given path is an argument for
// /wholearchive:, or /wholearchive is enabled globally.
@ -1300,12 +1364,16 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Read all input files given via the command line.
run();
if (errorCount())
return;
// We should have inferred a machine type by now from the input files, but if
// not we assume x64.
if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
warn("/machine is not specified. x64 is assumed");
Config->Machine = AMD64;
}
Config->Wordsize = Config->is64() ? 8 : 4;
// Input files can be Windows resource files (.res files). We use
// WindowsResource to convert resource files to a regular COFF file,
@ -1418,6 +1486,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// tools won't work correctly if these assumptions are not held.
sys::fs::make_absolute(Config->PDBAltPath);
sys::path::remove_dots(Config->PDBAltPath);
} else {
// Don't do this earlier, so that Config->OutputFile is ready.
parsePDBAltPath(Config->PDBAltPath);
}
}
@ -1441,6 +1512,13 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Needed for MSVC 2017 15.5 CRT.
Symtab->addAbsolute(mangle("__enclave_config"), 0);
if (Config->MinGW) {
Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
Symtab->addAbsolute(mangle("__CTOR_LIST__"), 0);
Symtab->addAbsolute(mangle("__DTOR_LIST__"), 0);
}
// This code may add new undefined symbols to the link, which may enqueue more
// symbol resolution tasks, so we need to continue executing tasks until we
// converge.
@ -1480,18 +1558,29 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
if (errorCount())
return;
// If /msvclto is given, we use the MSVC linker to link LTO output files.
// This is useful because MSVC link.exe can generate complete PDBs.
if (Args.hasArg(OPT_msvclto)) {
invokeMSVC(Args);
return;
}
// Do LTO by compiling bitcode input files to a set of native COFF files then
// link those files.
Symtab->addCombinedLTOObjects();
run();
if (Config->MinGW) {
// Load any further object files that might be needed for doing automatic
// imports.
//
// For cases with no automatically imported symbols, this iterates once
// over the symbol table and doesn't do anything.
//
// For the normal case with a few automatically imported symbols, this
// should only need to be run once, since each new object file imported
// is an import library and wouldn't add any new undefined references,
// but there's nothing stopping the __imp_ symbols from coming from a
// normal object file as well (although that won't be used for the
// actual autoimport later on). If this pass adds new undefined references,
// we won't iterate further to resolve them.
Symtab->loadMinGWAutomaticImports();
run();
}
// Make sure we have resolved all symbols.
Symtab->reportRemainingUndefines();
if (errorCount())
@ -1510,7 +1599,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// are chosen to be exported.
if (Config->DLL && ((Config->MinGW && Config->Exports.empty()) ||
Args.hasArg(OPT_export_all_symbols))) {
AutoExporter Exporter;
Exporter.initSymbolExcludes();
Symtab->forEachSymbol([=](Symbol *S) {
auto *Def = dyn_cast<Defined>(S);
@ -1574,8 +1663,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
markLive(Symtab->getChunks());
// Identify identical COMDAT sections to merge them.
if (Config->DoICF)
if (Config->DoICF) {
findKeepUniqueSections();
doICF(Symtab->getChunks());
}
// Write the result.
writeResult();

View File

@ -89,6 +89,7 @@ private:
Optional<StringRef> findLib(StringRef Filename);
StringRef doFindFile(StringRef Filename);
StringRef doFindLib(StringRef Filename);
StringRef doFindLibMinGW(StringRef Filename);
// Parses LIB environment which contains a list of search paths.
void addLibSearchPaths();
@ -114,8 +115,6 @@ private:
StringRef findDefaultEntry();
WindowsSubsystem inferSubsystem();
void invokeMSVC(llvm::opt::InputArgList &Args);
void addBuffer(std::unique_ptr<MemoryBuffer> MB, bool WholeArchive);
void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName,
StringRef ParentName);

View File

@ -713,26 +713,6 @@ MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> MBs) {
return MBRef;
}
// Run MSVC link.exe for given in-memory object files.
// Command line options are copied from those given to LLD.
// This is for the /msvclto option.
void runMSVCLinker(std::string Rsp, ArrayRef<StringRef> Objects) {
// Write the in-memory object files to disk.
std::vector<TemporaryFile> Temps;
for (StringRef S : Objects) {
Temps.emplace_back("lto", "obj", S);
Rsp += quote(Temps.back().Path) + "\n";
}
log("link.exe " + Rsp);
// Run MSVC link.exe.
Temps.emplace_back("lto", "rsp", Rsp);
Executor E("link.exe");
E.add(Twine("@" + Temps.back().Path));
E.run();
}
// Create OptTable
// Create prefix string literals used in Options.td
@ -883,7 +863,9 @@ std::vector<const char *> ArgParser::tokenize(StringRef S) {
}
void printHelp(const char *Argv0) {
COFFOptTable().PrintHelp(outs(), Argv0, "LLVM Linker", false);
COFFOptTable().PrintHelp(outs(),
(std::string(Argv0) + " [options] file...").c_str(),
"LLVM Linker", false);
}
} // namespace coff

31
deps/lld/COFF/ICF.cpp vendored
View File

@ -22,6 +22,7 @@
#include "Chunks.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Threads.h"
#include "lld/Common/Timer.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Debug.h"
@ -80,7 +81,7 @@ private:
bool ICF::isEligible(SectionChunk *C) {
// Non-comdat chunks, dead chunks, and writable chunks are not elegible.
bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
if (!C->isCOMDAT() || !C->isLive() || Writable)
if (!C->isCOMDAT() || !C->Live || Writable)
return false;
// Code sections are eligible.
@ -93,7 +94,11 @@ bool ICF::isEligible(SectionChunk *C) {
return true;
// So are vtables.
return C->Sym && C->Sym->getName().startswith("??_7");
if (C->Sym && C->Sym->getName().startswith("??_7"))
return true;
// Anything else not in an address-significance table is eligible.
return !C->KeepUnique;
}
// Split an equivalence class into smaller classes.
@ -222,10 +227,10 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
size_t Boundaries[NumShards + 1];
Boundaries[0] = 0;
Boundaries[NumShards] = Chunks.size();
for_each_n(parallel::par, size_t(1), NumShards, [&](size_t I) {
parallelForEachN(1, NumShards, [&](size_t I) {
Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size());
});
for_each_n(parallel::par, size_t(1), NumShards + 1, [&](size_t I) {
parallelForEachN(1, NumShards + 1, [&](size_t I) {
if (Boundaries[I - 1] < Boundaries[I]) {
forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
}
@ -257,11 +262,23 @@ void ICF::run(ArrayRef<Chunk *> Vec) {
SC->Class[0] = NextId++;
// Initially, we use hash values to partition sections.
for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) {
// Set MSB to 1 to avoid collisions with non-hash classs.
SC->Class[0] = xxHash64(SC->getContents()) | (1 << 31);
parallelForEach(Chunks, [&](SectionChunk *SC) {
SC->Class[0] = xxHash64(SC->getContents());
});
// Combine the hashes of the sections referenced by each section into its
// hash.
for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
parallelForEach(Chunks, [&](SectionChunk *SC) {
uint32_t Hash = SC->Class[Cnt % 2];
for (Symbol *B : SC->symbols())
if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B))
Hash += Sym->getChunk()->Class[Cnt % 2];
// Set MSB to 1 to avoid collisions with non-hash classs.
SC->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
});
}
// From now on, sections in Chunks are ordered so that sections in
// the same group are consecutive in the vector.
std::stable_sort(Chunks.begin(), Chunks.end(),

View File

@ -54,8 +54,16 @@ std::vector<BitcodeFile *> BitcodeFile::Instances;
static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
Symbol *Source, Symbol *Target) {
if (auto *U = dyn_cast<Undefined>(Source)) {
if (U->WeakAlias && U->WeakAlias != Target)
if (U->WeakAlias && U->WeakAlias != Target) {
// Weak aliases as produced by GCC are named in the form
// .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
// of another symbol emitted near the weak symbol.
// Just use the definition from the first object file that defined
// this weak symbol.
if (Config->MinGW)
return;
Symtab->reportDuplicate(Source, F);
}
U->WeakAlias = Target;
}
}
@ -147,9 +155,10 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
const coff_aux_section_definition *Def,
StringRef LeaderName) {
const coff_section *Sec;
StringRef Name;
if (auto EC = COFFObj->getSection(SectionNumber, Sec))
fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
StringRef Name;
if (auto EC = COFFObj->getSectionName(Sec, Name))
fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
EC.message());
@ -161,6 +170,11 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
return nullptr;
}
if (Name == ".llvm_addrsig") {
AddrsigSec = Sec;
return nullptr;
}
// Object files may have DWARF debug info or MS CodeView debug info
// (or both).
//
@ -168,8 +182,8 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
// of the linker; they are just a data section containing relocations.
// We can just link them to complete debug info.
//
// CodeView needs a linker support. We need to interpret and debug
// info, and then write it to a separate .pdb file.
// CodeView needs linker support. We need to interpret debug info,
// and then write it to a separate .pdb file.
// Ignore DWARF debug info unless /debug is given.
if (!Config->Debug && Name.startswith(".debug_"))
@ -267,10 +281,17 @@ Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
COFFObj->getSymbolName(Sym, Name);
if (SC)
return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
// For MinGW symbols named .weak.* that point to a discarded section,
// don't create an Undefined symbol. If nothing ever refers to the symbol,
// everything should be fine. If something actually refers to the symbol
// (e.g. the undefined weak alias), linking will fail due to undefined
// references at the end.
if (Config->MinGW && Name.startswith(".weak."))
return nullptr;
return Symtab->addUndefined(Name, this, false);
}
if (SC)
return make<DefinedRegular>(this, /*Name*/ "", false,
return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
/*IsExternal*/ false, Sym.getGeneric(), SC);
return nullptr;
}
@ -318,7 +339,7 @@ void ObjFile::initializeSymbols() {
for (uint32_t I : PendingIndexes) {
COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
if (auto *Def = Sym.getSectionDefinition()) {
if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
readAssociativeDefinition(Sym, Def);
else if (Config->MinGW)
@ -401,7 +422,7 @@ Optional<Symbol *> ObjFile::createDefined(
std::tie(Leader, Prevailing) =
Symtab->addComdat(this, GetName(), Sym.getGeneric());
} else {
Leader = make<DefinedRegular>(this, /*Name*/ "", false,
Leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
/*IsExternal*/ false, Sym.getGeneric());
Prevailing = true;
}
@ -421,7 +442,7 @@ Optional<Symbol *> ObjFile::createDefined(
// leader symbol by setting the section's ComdatDefs pointer if we encounter a
// non-associative comdat.
if (SparseChunks[SectionNumber] == PendingComdat) {
if (auto *Def = Sym.getSectionDefinition()) {
if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
readAssociativeDefinition(Sym, Def);
else
@ -429,8 +450,10 @@ Optional<Symbol *> ObjFile::createDefined(
}
}
// readAssociativeDefinition() writes to SparseChunks, so need to check again.
if (SparseChunks[SectionNumber] == PendingComdat)
return None;
return createRegular(Sym);
}
@ -481,6 +504,10 @@ void ImportFile::parse() {
ExternalName = ExtName;
ImpSym = Symtab->addImportData(ImpName, this);
// If this was a duplicate, we logged an error but may continue;
// in this case, ImpSym is nullptr.
if (!ImpSym)
return;
if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
static_cast<void>(Symtab->addImportData(Name, this));

View File

@ -15,6 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
@ -122,9 +123,12 @@ public:
return Symbols[SymbolIndex];
}
// Returns the underying COFF file.
// Returns the underlying COFF file.
COFFObjectFile *getCOFFObj() { return COFFObj.get(); }
// Whether the object was already merged into the final PDB or not
bool wasProcessedForPDB() const { return !!ModuleDBI; }
static std::vector<ObjFile *> Instances;
// Flags in the absolute @feat.00 symbol if it is present. These usually
@ -145,6 +149,13 @@ public:
// if we are not producing a PDB.
llvm::pdb::DbiModuleDescriptorBuilder *ModuleDBI = nullptr;
const coff_section *AddrsigSec = nullptr;
// When using Microsoft precompiled headers, this is the PCH's key.
// The same key is used by both the precompiled object, and objects using the
// precompiled object. Any difference indicates out-of-date objects.
llvm::Optional<uint32_t> PCHSignature;
private:
void initializeChunks();
void initializeSymbols();

View File

@ -60,6 +60,9 @@ static std::unique_ptr<lto::LTO> createLTO() {
C.DisableVerify = true;
C.DiagHandler = diagnosticHandler;
C.OptLevel = Config->LTOO;
C.CPU = GetCPUStr();
C.MAttrs = GetMAttrs();
if (Config->SaveTemps)
checkError(C.addSaveTemps(std::string(Config->OutputFile) + ".",
/*UseInputModulePath*/ true));

View File

@ -110,7 +110,7 @@ void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) {
writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize);
OS << Sec->Name << '\n';
for (Chunk *C : Sec->getChunks()) {
for (Chunk *C : Sec->Chunks) {
auto *SC = dyn_cast<SectionChunk>(C);
if (!SC)
continue;

View File

@ -32,13 +32,13 @@ void markLive(ArrayRef<Chunk *> Chunks) {
// COMDAT section chunks are dead by default. Add non-COMDAT chunks.
for (Chunk *C : Chunks)
if (auto *SC = dyn_cast<SectionChunk>(C))
if (SC->isLive())
if (SC->Live)
Worklist.push_back(SC);
auto Enqueue = [&](SectionChunk *C) {
if (C->isLive())
if (C->Live)
return;
C->markLive();
C->Live = true;
Worklist.push_back(C);
};
@ -57,7 +57,7 @@ void markLive(ArrayRef<Chunk *> Chunks) {
while (!Worklist.empty()) {
SectionChunk *SC = Worklist.pop_back_val();
assert(SC->isLive() && "We mark as live when pushing onto the worklist!");
assert(SC->Live && "We mark as live when pushing onto the worklist!");
// Mark all symbols listed in the relocation table for this section.
for (Symbol *B : SC->symbols())

View File

@ -19,7 +19,23 @@ using namespace lld::coff;
using namespace llvm;
using namespace llvm::COFF;
AutoExporter::AutoExporter() {
void AutoExporter::initSymbolExcludes() {
ExcludeSymbolPrefixes = {
// Import symbols
"__imp_",
"__IMPORT_DESCRIPTOR_",
// Extra import symbols from GNU import libraries
"__nm_",
// C++ symbols
"__rtti_",
"__builtin_",
// Artifical symbols such as .refptr
".",
};
ExcludeSymbolSuffixes = {
"_iname",
"_NULL_THUNK_DATA",
};
if (Config->Machine == I386) {
ExcludeSymbols = {
"__NULL_IMPORT_DESCRIPTOR",
@ -36,9 +52,10 @@ AutoExporter::AutoExporter() {
"_DllEntryPoint@12",
"_DllMainCRTStartup@12",
};
ExcludeSymbolPrefixes.insert("__head_");
} else {
ExcludeSymbols = {
"_NULL_IMPORT_DESCRIPTOR",
"__NULL_IMPORT_DESCRIPTOR",
"_pei386_runtime_relocator",
"do_pseudo_reloc",
"impure_ptr",
@ -52,8 +69,11 @@ AutoExporter::AutoExporter() {
"DllEntryPoint",
"DllMainCRTStartup",
};
ExcludeSymbolPrefixes.insert("_head_");
}
}
AutoExporter::AutoExporter() {
ExcludeLibs = {
"libgcc",
"libgcc_s",
@ -64,6 +84,7 @@ AutoExporter::AutoExporter() {
"libsupc++",
"libobjc",
"libgcj",
"libclang_rt.builtins",
"libclang_rt.builtins-aarch64",
"libclang_rt.builtins-arm",
"libclang_rt.builtins-i386",
@ -90,6 +111,13 @@ AutoExporter::AutoExporter() {
};
}
void AutoExporter::addWholeArchive(StringRef Path) {
StringRef LibName = sys::path::filename(Path);
// Drop the file extension, to match the processing below.
LibName = LibName.substr(0, LibName.rfind('.'));
ExcludeLibs.erase(LibName);
}
bool AutoExporter::shouldExport(Defined *Sym) const {
if (!Sym || !Sym->isLive() || !Sym->getChunk())
return false;
@ -101,10 +129,12 @@ bool AutoExporter::shouldExport(Defined *Sym) const {
if (ExcludeSymbols.count(Sym->getName()))
return false;
// Don't export anything that looks like an import symbol (which also can be
// a manually defined data symbol with such a name).
if (Sym->getName().startswith("__imp_"))
return false;
for (StringRef Prefix : ExcludeSymbolPrefixes.keys())
if (Sym->getName().startswith(Prefix))
return false;
for (StringRef Suffix : ExcludeSymbolSuffixes.keys())
if (Sym->getName().endswith(Suffix))
return false;
// If a corresponding __imp_ symbol exists and is defined, don't export it.
if (Symtab->find(("__imp_" + Sym->getName()).str()))

View File

@ -23,7 +23,13 @@ class AutoExporter {
public:
AutoExporter();
void initSymbolExcludes();
void addWholeArchive(StringRef Path);
llvm::StringSet<> ExcludeSymbols;
llvm::StringSet<> ExcludeSymbolPrefixes;
llvm::StringSet<> ExcludeSymbolSuffixes;
llvm::StringSet<> ExcludeLibs;
llvm::StringSet<> ExcludeObjects;

View File

@ -66,13 +66,18 @@ def wholearchive_file : P<"wholearchive", "Include all object files from this ar
def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>;
def manifest : F<"manifest">;
def manifest_colon : P<"manifest", "Create manifest file">;
def manifest : F<"manifest">, HelpText<"Create .manifest file">;
def manifest_colon : P<
"manifest",
"NO disables manifest output; EMBED[,ID=#] embeds manifest as resource in the image">;
def manifestuac : P<"manifestuac", "User access control">;
def manifestfile : P<"manifestfile", "Manifest file path">;
def manifestdependency : P<"manifestdependency",
"Attributes for <dependency> in manifest file">;
def manifestinput : P<"manifestinput", "Specify manifest file">;
def manifestfile : P<"manifestfile", "Manifest output path, with /manifest">;
def manifestdependency : P<
"manifestdependency",
"Attributes for <dependency> element in manifest file; implies /manifest">;
def manifestinput : P<
"manifestinput",
"Additional manifest inputs; only valid with /manifest:embed">;
// We cannot use multiclass P because class name "incl" is different
// from its command line option name. We do this because "include" is
@ -85,22 +90,28 @@ def deffile : Joined<["/", "-"], "def:">,
HelpText<"Use module-definition file">;
def debug : F<"debug">, HelpText<"Embed a symbol table in the image">;
def debug_full : F<"debug:full">, Alias<debug>;
def debug_opt : P<"debug", "Embed a symbol table in the image with option">;
def debugtype : P<"debugtype", "Debug Info Options">;
def dll : F<"dll">, HelpText<"Create a DLL">;
def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">;
def nodefaultlib_all : F<"nodefaultlib">;
def noentry : F<"noentry">;
def nodefaultlib_all : F<"nodefaultlib">,
HelpText<"Remove all default libraries">;
def noentry : F<"noentry">,
HelpText<"Don't add reference to DllMainCRTStartup; only valid with /dll">;
def profile : F<"profile">;
def repro : F<"Brepro">, HelpText<"Use a hash of the executable as the PE header timestamp">;
def repro : F<"Brepro">,
HelpText<"Use a hash of the executable as the PE header timestamp">;
def swaprun_cd : F<"swaprun:cd">;
def swaprun_net : F<"swaprun:net">;
def verbose : F<"verbose">;
def wholearchive_flag : F<"wholearchive">;
def force : F<"force">,
HelpText<"Allow undefined and multiply defined symbols when creating executables">;
def force_unresolved : F<"force:unresolved">,
HelpText<"Allow undefined symbols when creating executables">;
def force_unresolved : F<"force:unresolved">;
def force_multiple : F<"force:multiple">,
HelpText<"Allow multiply defined symbols when creating executables">;
defm WX : B<"WX", "Treat warnings as errors", "Don't treat warnings as errors">;
defm allowbind : B<"allowbind", "Enable DLL binding (default)",
@ -139,13 +150,9 @@ def help : F<"help">;
def help_q : Flag<["/?", "-?"], "">, Alias<help>;
// LLD extensions
def debug_ghash : F<"debug:ghash">;
def debug_dwarf : F<"debug:dwarf">;
def debug_symtab : F<"debug:symtab">;
def export_all_symbols : F<"export-all-symbols">;
def kill_at : F<"kill-at">;
def lldmingw : F<"lldmingw">;
def msvclto : F<"msvclto">;
def output_def : Joined<["/", "-"], "output-def:">;
def pdb_source_path : P<"pdbsourcepath",
"Base path used to make relative source file path absolute in PDB">;

1029
deps/lld/COFF/PDB.cpp vendored

File diff suppressed because it is too large Load Diff

2
deps/lld/COFF/PDB.h vendored
View File

@ -28,7 +28,7 @@ class SymbolTable;
void createPDB(SymbolTable *Symtab,
llvm::ArrayRef<OutputSection *> OutputSections,
llvm::ArrayRef<uint8_t> SectionTable,
const llvm::codeview::DebugInfo &BuildId);
llvm::codeview::DebugInfo *BuildId);
std::pair<llvm::StringRef, uint32_t> getFileLine(const SectionChunk *C,
uint32_t Addr);

View File

@ -60,16 +60,16 @@ void SymbolTable::addFile(InputFile *File) {
}
static void errorOrWarn(const Twine &S) {
if (Config->Force)
if (Config->ForceUnresolved)
warn(S);
else
error(S);
}
// Returns the name of the symbol in SC whose value is <= Addr that is closest
// to Addr. This is generally the name of the global variable or function whose
// definition contains Addr.
static StringRef getSymbolName(SectionChunk *SC, uint32_t Addr) {
// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
// This is generally the global variable or function whose definition contains
// Addr.
static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) {
DefinedRegular *Candidate = nullptr;
for (Symbol *S : SC->File->getSymbols()) {
@ -81,14 +81,12 @@ static StringRef getSymbolName(SectionChunk *SC, uint32_t Addr) {
Candidate = D;
}
if (!Candidate)
return "";
return Candidate->getName();
return Candidate;
}
static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
struct Location {
StringRef SymName;
Symbol *Sym;
std::pair<StringRef, uint32_t> FileLine;
};
std::vector<Location> Locations;
@ -102,14 +100,14 @@ static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
continue;
std::pair<StringRef, uint32_t> FileLine =
getFileLine(SC, R.VirtualAddress);
StringRef SymName = getSymbolName(SC, R.VirtualAddress);
if (!FileLine.first.empty() || !SymName.empty())
Locations.push_back({SymName, FileLine});
Symbol *Sym = getSymbol(SC, R.VirtualAddress);
if (!FileLine.first.empty() || Sym)
Locations.push_back({Sym, FileLine});
}
}
if (Locations.empty())
return "\n>>> referenced by " + toString(File) + "\n";
return "\n>>> referenced by " + toString(File);
std::string Out;
llvm::raw_string_ostream OS(Out);
@ -119,13 +117,87 @@ static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) {
OS << Loc.FileLine.first << ":" << Loc.FileLine.second
<< "\n>>> ";
OS << toString(File);
if (!Loc.SymName.empty())
OS << ":(" << Loc.SymName << ')';
if (Loc.Sym)
OS << ":(" << toString(*Loc.Sym) << ')';
}
OS << '\n';
return OS.str();
}
void SymbolTable::loadMinGWAutomaticImports() {
for (auto &I : SymMap) {
Symbol *Sym = I.second;
auto *Undef = dyn_cast<Undefined>(Sym);
if (!Undef)
continue;
if (!Sym->IsUsedInRegularObj)
continue;
StringRef Name = Undef->getName();
if (Name.startswith("__imp_"))
continue;
// If we have an undefined symbol, but we have a Lazy representing a
// symbol we could load from file, make sure to load that.
Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str()));
if (!L || L->PendingArchiveLoad)
continue;
log("Loading lazy " + L->getName() + " from " + L->File->getName() +
" for automatic import");
L->PendingArchiveLoad = true;
L->File->addMember(&L->Sym);
}
}
bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) {
if (Name.startswith("__imp_"))
return false;
Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str()));
if (!Imp)
return false;
// Replace the reference directly to a variable with a reference
// to the import address table instead. This obviously isn't right,
// but we mark the symbol as IsRuntimePseudoReloc, and a later pass
// will add runtime pseudo relocations for every relocation against
// this Symbol. The runtime pseudo relocation framework expects the
// reference itself to point at the IAT entry.
size_t ImpSize = 0;
if (isa<DefinedImportData>(Imp)) {
log("Automatically importing " + Name + " from " +
cast<DefinedImportData>(Imp)->getDLLName());
ImpSize = sizeof(DefinedImportData);
} else if (isa<DefinedRegular>(Imp)) {
log("Automatically importing " + Name + " from " +
toString(cast<DefinedRegular>(Imp)->File));
ImpSize = sizeof(DefinedRegular);
} else {
warn("unable to automatically import " + Name + " from " + Imp->getName() +
" from " + toString(cast<DefinedRegular>(Imp)->File) +
"; unexpected symbol type");
return false;
}
Sym->replaceKeepingName(Imp, ImpSize);
Sym->IsRuntimePseudoReloc = true;
// There may exist symbols named .refptr.<name> which only consist
// of a single pointer to <name>. If it turns out <name> is
// automatically imported, we don't need to keep the .refptr.<name>
// pointer at all, but redirect all accesses to it to the IAT entry
// for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
DefinedRegular *Refptr =
dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str()));
if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) {
SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk());
if (SC && SC->Relocs.size() == 1 && *SC->symbols().begin() == Sym) {
log("Replacing .refptr." + Name + " with " + Imp->getName());
Refptr->getChunk()->Live = false;
Refptr->replaceKeepingName(Imp, ImpSize);
}
}
return true;
}
void SymbolTable::reportRemainingUndefines() {
SmallPtrSet<Symbol *, 8> Undefs;
DenseMap<Symbol *, Symbol *> LocalImports;
@ -169,9 +241,17 @@ void SymbolTable::reportRemainingUndefines() {
}
}
// We don't want to report missing Microsoft precompiled headers symbols.
// A proper message will be emitted instead in PDBLinker::aquirePrecompObj
if (Name.contains("_PchSym_"))
continue;
if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name))
continue;
// Remaining undefined symbols are not fatal if /force is specified.
// They are replaced with dummy defined symbols.
if (Config->Force)
if (Config->ForceUnresolved)
replaceSymbol<DefinedAbsolute>(Sym, Name, 0);
Undefs.insert(Sym);
}
@ -181,10 +261,10 @@ void SymbolTable::reportRemainingUndefines() {
for (Symbol *B : Config->GCRoot) {
if (Undefs.count(B))
errorOrWarn("<root>: undefined symbol: " + B->getName());
errorOrWarn("<root>: undefined symbol: " + toString(*B));
if (Config->WarnLocallyDefinedImported)
if (Symbol *Imp = LocalImports.lookup(B))
warn("<root>: locally defined symbol imported: " + Imp->getName() +
warn("<root>: locally defined symbol imported: " + toString(*Imp) +
" (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
}
@ -195,34 +275,41 @@ void SymbolTable::reportRemainingUndefines() {
if (!Sym)
continue;
if (Undefs.count(Sym))
errorOrWarn("undefined symbol: " + Sym->getName() +
errorOrWarn("undefined symbol: " + toString(*Sym) +
getSymbolLocations(File, SymIndex));
if (Config->WarnLocallyDefinedImported)
if (Symbol *Imp = LocalImports.lookup(Sym))
warn(toString(File) + ": locally defined symbol imported: " +
Imp->getName() + " (defined in " + toString(Imp->getFile()) +
") [LNK4217]");
warn(toString(File) +
": locally defined symbol imported: " + toString(*Imp) +
" (defined in " + toString(Imp->getFile()) + ") [LNK4217]");
}
}
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) {
bool Inserted = false;
Symbol *&Sym = SymMap[CachedHashStringRef(Name)];
if (Sym)
return {Sym, false};
Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
Sym->IsUsedInRegularObj = false;
Sym->PendingArchiveLoad = false;
return {Sym, true};
if (!Sym) {
Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
Sym->IsUsedInRegularObj = false;
Sym->PendingArchiveLoad = false;
Inserted = true;
}
return {Sym, Inserted};
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) {
std::pair<Symbol *, bool> Result = insert(Name);
if (!File || !isa<BitcodeFile>(File))
Result.first->IsUsedInRegularObj = true;
return Result;
}
Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F,
bool IsWeakAlias) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
if (!F || !isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
std::tie(S, WasInserted) = insert(Name, F);
if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) {
replaceSymbol<Undefined>(S, Name);
return S;
@ -253,14 +340,20 @@ void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) {
}
void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) {
error("duplicate symbol: " + toString(*Existing) + " in " +
toString(Existing->getFile()) + " and in " + toString(NewFile));
std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " +
toString(Existing->getFile()) + " and in " +
toString(NewFile);
if (Config->ForceMultiple)
warn(Msg);
else
error(Msg);
}
Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
std::tie(S, WasInserted) = insert(N, nullptr);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
replaceSymbol<DefinedAbsolute>(S, N, Sym);
@ -272,7 +365,7 @@ Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) {
Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
std::tie(S, WasInserted) = insert(N, nullptr);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
replaceSymbol<DefinedAbsolute>(S, N, VA);
@ -284,7 +377,7 @@ Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) {
Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
std::tie(S, WasInserted) = insert(N, nullptr);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S))
replaceSymbol<DefinedSynthetic>(S, N, C);
@ -298,9 +391,7 @@ Symbol *SymbolTable::addRegular(InputFile *F, StringRef N,
SectionChunk *C) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
if (!isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
std::tie(S, WasInserted) = insert(N, F);
if (WasInserted || !isa<DefinedRegular>(S))
replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false,
/*IsExternal*/ true, Sym, C);
@ -314,9 +405,7 @@ SymbolTable::addComdat(InputFile *F, StringRef N,
const coff_symbol_generic *Sym) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
if (!isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
std::tie(S, WasInserted) = insert(N, F);
if (WasInserted || !isa<DefinedRegular>(S)) {
replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true,
/*IsExternal*/ true, Sym, nullptr);
@ -331,9 +420,7 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
const coff_symbol_generic *Sym, CommonChunk *C) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
if (!isa<BitcodeFile>(F))
S->IsUsedInRegularObj = true;
std::tie(S, WasInserted) = insert(N, F);
if (WasInserted || !isa<DefinedCOFF>(S))
replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C);
else if (auto *DC = dyn_cast<DefinedCommon>(S))
@ -345,7 +432,7 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size,
Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(N);
std::tie(S, WasInserted) = insert(N, nullptr);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
replaceSymbol<DefinedImportData>(S, N, F);
@ -360,7 +447,7 @@ Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID,
uint16_t Machine) {
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
std::tie(S, WasInserted) = insert(Name, nullptr);
S->IsUsedInRegularObj = true;
if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) {
replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine);

View File

@ -54,6 +54,9 @@ public:
// symbols.
void reportRemainingUndefines();
void loadMinGWAutomaticImports();
bool handleMinGWAutomaticImport(Symbol *Sym, StringRef Name);
// Returns a list of chunks of selected symbols.
std::vector<Chunk *> getChunks();
@ -108,7 +111,10 @@ public:
}
private:
/// Inserts symbol if not already present.
std::pair<Symbol *, bool> insert(StringRef Name);
/// Same as insert(Name), but also sets IsUsedInRegularObj.
std::pair<Symbol *, bool> insert(StringRef Name, InputFile *F);
StringRef findByPrefix(StringRef Prefix);
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap;
@ -117,6 +123,8 @@ private:
extern SymbolTable *Symtab;
std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex);
} // namespace coff
} // namespace lld

View File

@ -54,7 +54,7 @@ InputFile *Symbol::getFile() {
bool Symbol::isLive() const {
if (auto *R = dyn_cast<DefinedRegular>(this))
return R->getChunk()->isLive();
return R->getChunk()->Live;
if (auto *Imp = dyn_cast<DefinedImportData>(this))
return Imp->File->Live;
if (auto *Imp = dyn_cast<DefinedImportThunk>(this))
@ -63,6 +63,13 @@ bool Symbol::isLive() const {
return true;
}
// MinGW specific.
void Symbol::replaceKeepingName(Symbol *Other, size_t Size) {
StringRef OrigName = Name;
memcpy(this, Other, Size);
Name = OrigName;
}
COFFSymbolRef DefinedCOFF::getCOFFSymbol() {
size_t SymSize = cast<ObjFile>(File)->getCOFFObj()->getSymbolTableEntrySize();
if (SymSize == sizeof(coff_symbol16))

View File

@ -39,9 +39,9 @@ class Symbol {
public:
enum Kind {
// The order of these is significant. We start with the regular defined
// symbols as those are the most prevelant and the zero tag is the cheapest
// symbols as those are the most prevalent and the zero tag is the cheapest
// to set. Among the defined kinds, the lower the kind is preferred over
// the higher kind when testing wether one symbol should take precedence
// the higher kind when testing whether one symbol should take precedence
// over another.
DefinedRegularKind = 0,
DefinedCommonKind,
@ -66,6 +66,8 @@ public:
// Returns the symbol name.
StringRef getName();
void replaceKeepingName(Symbol *Other, size_t Size);
// Returns the file from which this symbol was created.
InputFile *getFile();
@ -78,7 +80,7 @@ protected:
explicit Symbol(Kind K, StringRef N = "")
: SymbolKind(K), IsExternal(true), IsCOMDAT(false),
WrittenToSymtab(false), PendingArchiveLoad(false), IsGCRoot(false),
Name(N) {}
IsRuntimePseudoReloc(false), Name(N) {}
const unsigned SymbolKind : 8;
unsigned IsExternal : 1;
@ -102,6 +104,8 @@ public:
/// True if we've already added this symbol to the list of GC roots.
unsigned IsGCRoot : 1;
unsigned IsRuntimePseudoReloc : 1;
protected:
StringRef Name;
};
@ -331,8 +335,8 @@ private:
Chunk *Data;
};
// If you have a symbol "__imp_foo" in your object file, a symbol name
// "foo" becomes automatically available as a pointer to "__imp_foo".
// If you have a symbol "foo" in your object file, a symbol name
// "__imp_foo" becomes automatically available as a pointer to "foo".
// This class is for such automatically-created symbols.
// Yes, this is an odd feature. We didn't intend to implement that.
// This is here just for compatibility with MSVC.

File diff suppressed because it is too large Load Diff

View File

@ -34,8 +34,8 @@ public:
Header.Characteristics = Chars;
}
void addChunk(Chunk *C);
void insertChunkAtStart(Chunk *C);
void merge(OutputSection *Other);
ArrayRef<Chunk *> getChunks() { return Chunks; }
void addPermissions(uint32_t C);
void setPermissions(uint32_t C);
uint64_t getRVA() { return Header.VirtualAddress; }
@ -62,9 +62,11 @@ public:
llvm::StringRef Name;
llvm::object::coff_section Header = {};
std::vector<Chunk *> Chunks;
std::vector<Chunk *> OrigChunks;
private:
uint32_t StringTableOff = 0;
std::vector<Chunk *> Chunks;
};
}

View File

@ -13,6 +13,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Path.h"
using namespace llvm;
using namespace lld;
@ -40,7 +41,7 @@ std::vector<StringRef> lld::args::getStrings(opt::InputArgList &Args, int Id) {
uint64_t lld::args::getZOptionValue(opt::InputArgList &Args, int Id,
StringRef Key, uint64_t Default) {
for (auto *Arg : Args.filtered(Id)) {
for (auto *Arg : Args.filtered_reverse(Id)) {
std::pair<StringRef, StringRef> KV = StringRef(Arg->getValue()).split('=');
if (KV.first == Key) {
uint64_t Result = Default;
@ -64,3 +65,9 @@ std::vector<StringRef> lld::args::getLines(MemoryBufferRef MB) {
}
return Ret;
}
StringRef lld::args::getFilenameWithoutExe(StringRef Path) {
if (Path.endswith_lower(".exe"))
return sys::path::stem(Path);
return sys::path::filename(Path);
}

View File

@ -47,8 +47,9 @@ ErrorHandler &lld::errorHandler() {
}
void lld::exitLld(int Val) {
// Delete the output buffer so that any tempory file is deleted.
errorHandler().OutputBuffer.reset();
// Delete any temporary file, while keeping the memory mapping open.
if (errorHandler().OutputBuffer)
errorHandler().OutputBuffer->discard();
// Dealloc/destroy ManagedStatic variables before calling
// _exit(). In a non-LTO build, this is a nop. In an LTO

View File

@ -16,14 +16,6 @@
#include <mutex>
#include <vector>
#if defined(_MSC_VER)
#include <Windows.h>
// DbgHelp.h must be included after Windows.h.
#include <DbgHelp.h>
#pragma comment(lib, "dbghelp.lib")
#endif
using namespace llvm;
using namespace lld;
@ -45,18 +37,21 @@ Optional<std::string> lld::demangleItanium(StringRef Name) {
return S;
}
Optional<std::string> lld::demangleMSVC(StringRef S) {
#if defined(_MSC_VER)
// UnDecorateSymbolName is not thread-safe, so we need a mutex.
static std::mutex Mu;
std::lock_guard<std::mutex> Lock(Mu);
Optional<std::string> lld::demangleMSVC(StringRef Name) {
std::string Prefix;
if (Name.consume_front("__imp_"))
Prefix = "__declspec(dllimport) ";
char Buf[4096];
if (S.startswith("?"))
if (size_t Len = UnDecorateSymbolName(S.str().c_str(), Buf, sizeof(Buf), 0))
return std::string(Buf, Len);
#endif
return None;
// Demangle only C++ names.
if (!Name.startswith("?"))
return None;
char *Buf = microsoftDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
if (!Buf)
return None;
std::string S(Buf);
free(Buf);
return Prefix + S;
}
StringMatcher::StringMatcher(ArrayRef<StringRef> Pat) {

View File

@ -32,3 +32,4 @@ llvm::Optional<llvm::CodeModel::Model> lld::GetCodeModelFromCMModel() {
}
std::string lld::GetCPUStr() { return ::getCPUStr(); }
std::vector<std::string> lld::GetMAttrs() { return ::MAttrs; }

View File

@ -356,7 +356,7 @@ static uint64_t scanCortexA53Errata843419(InputSection *IS, uint64_t &Off,
}
uint64_t PatchOff = 0;
const uint8_t *Buf = IS->Data.begin();
const uint8_t *Buf = IS->data().begin();
const ulittle32_t *InstBuf = reinterpret_cast<const ulittle32_t *>(Buf + Off);
uint32_t Instr1 = *InstBuf++;
uint32_t Instr2 = *InstBuf++;
@ -411,7 +411,7 @@ uint64_t lld::elf::Patch843419Section::getLDSTAddr() const {
void lld::elf::Patch843419Section::writeTo(uint8_t *Buf) {
// Copy the instruction that we will be replacing with a branch in the
// Patchee Section.
write32le(Buf, read32le(Patchee->Data.begin() + PatcheeOffset));
write32le(Buf, read32le(Patchee->data().begin() + PatcheeOffset));
// Apply any relocation transferred from the original PatcheeSection.
// For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc
@ -451,7 +451,7 @@ void AArch64Err843419Patcher::init() {
continue;
if (!IsCodeMapSymbol(Def) && !IsDataMapSymbol(Def))
continue;
if (auto *Sec = dyn_cast<InputSection>(Def->Section))
if (auto *Sec = dyn_cast_or_null<InputSection>(Def->Section))
if (Sec->Flags & SHF_EXECINSTR)
SectionMap[Sec].push_back(Def);
}
@ -487,7 +487,8 @@ void AArch64Err843419Patcher::insertPatches(
InputSectionDescription &ISD, std::vector<Patch843419Section *> &Patches) {
uint64_t ISLimit;
uint64_t PrevISLimit = ISD.Sections.front()->OutSecOff;
uint64_t PatchUpperBound = PrevISLimit + Target->ThunkSectionSpacing;
uint64_t PatchUpperBound = PrevISLimit + Target->getThunkSectionSpacing();
uint64_t OutSecAddr = ISD.Sections.front()->getParent()->Addr;
// Set the OutSecOff of patches to the place where we want to insert them.
// We use a similar strategy to Thunk placement. Place patches roughly
@ -498,12 +499,12 @@ void AArch64Err843419Patcher::insertPatches(
ISLimit = IS->OutSecOff + IS->getSize();
if (ISLimit > PatchUpperBound) {
while (PatchIt != PatchEnd) {
if ((*PatchIt)->getLDSTAddr() >= PrevISLimit)
if ((*PatchIt)->getLDSTAddr() - OutSecAddr >= PrevISLimit)
break;
(*PatchIt)->OutSecOff = PrevISLimit;
++PatchIt;
}
PatchUpperBound = PrevISLimit + Target->ThunkSectionSpacing;
PatchUpperBound = PrevISLimit + Target->getThunkSectionSpacing();
}
PrevISLimit = ISLimit;
}
@ -538,20 +539,24 @@ static void implementPatch(uint64_t AdrpAddr, uint64_t PatcheeOffset,
InputSection *IS,
std::vector<Patch843419Section *> &Patches) {
// There may be a relocation at the same offset that we are patching. There
// are three cases that we need to consider.
// are four cases that we need to consider.
// Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this
// instance of the erratum on a previous patch and altered the relocation. We
// have nothing more to do.
// Case 2: A load/store register (unsigned immediate) class relocation. There
// Case 2: A TLS Relaxation R_RELAX_TLS_IE_TO_LE. In this case the ADRP that
// we read will be transformed into a MOVZ later so we actually don't match
// the sequence and have nothing more to do.
// Case 3: A load/store register (unsigned immediate) class relocation. There
// are two of these R_AARCH_LD64_ABS_LO12_NC and R_AARCH_LD64_GOT_LO12_NC and
// they are both absolute. We need to add the same relocation to the patch,
// and replace the relocation with a R_AARCH_JUMP26 branch relocation.
// Case 3: No relocation. We must create a new R_AARCH64_JUMP26 branch
// Case 4: No relocation. We must create a new R_AARCH64_JUMP26 branch
// relocation at the offset.
auto RelIt = std::find_if(
IS->Relocations.begin(), IS->Relocations.end(),
[=](const Relocation &R) { return R.Offset == PatcheeOffset; });
if (RelIt != IS->Relocations.end() && RelIt->Type == R_AARCH64_JUMP26)
if (RelIt != IS->Relocations.end() &&
(RelIt->Type == R_AARCH64_JUMP26 || RelIt->Expr == R_RELAX_TLS_IE_TO_LE))
return;
log("detected cortex-a53-843419 erratum sequence starting at " +
@ -598,7 +603,7 @@ AArch64Err843419Patcher::patchInputSectionDescription(
auto DataSym = std::next(CodeSym);
uint64_t Off = (*CodeSym)->Value;
uint64_t Limit =
(DataSym == MapSyms.end()) ? IS->Data.size() : (*DataSym)->Value;
(DataSym == MapSyms.end()) ? IS->data().size() : (*DataSym)->Value;
while (Off < Limit) {
uint64_t StartAddr = IS->getVA(Off);

View File

@ -41,6 +41,7 @@ public:
int32_t Index, unsigned RelOff) const override;
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
uint32_t getThunkSectionSpacing() const override;
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
bool usesOnlyLowPageBits(RelType Type) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
@ -57,6 +58,7 @@ AArch64::AArch64() {
RelativeRel = R_AARCH64_RELATIVE;
IRelativeRel = R_AARCH64_IRELATIVE;
GotRel = R_AARCH64_GLOB_DAT;
NoneRel = R_AARCH64_NONE;
PltRel = R_AARCH64_JUMP_SLOT;
TlsDescRel = R_AARCH64_TLSDESC;
TlsGotRel = R_AARCH64_TLS_TPREL64;
@ -66,22 +68,18 @@ AArch64::AArch64() {
PltHeaderSize = 32;
DefaultMaxPageSize = 65536;
// It doesn't seem to be documented anywhere, but tls on aarch64 uses variant
// 1 of the tls structures and the tcb size is 16.
TcbSize = 16;
NeedsThunks = true;
// Align to the 2 MiB page size (known as a superpage or huge page).
// FreeBSD automatically promotes 2 MiB-aligned allocations.
DefaultImageBase = 0x200000;
// See comment in Arch/ARM.cpp for a more detailed explanation of
// ThunkSectionSpacing. For AArch64 the only branches we are permitted to
// Thunk have a range of +/- 128 MiB
ThunkSectionSpacing = (128 * 1024 * 1024) - 0x30000;
NeedsThunks = true;
}
RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const {
switch (Type) {
case R_AARCH64_TLSDESC_ADR_PAGE21:
return R_TLSDESC_PAGE;
return R_AARCH64_TLSDESC_PAGE;
case R_AARCH64_TLSDESC_LD64_LO12:
case R_AARCH64_TLSDESC_ADD_LO12:
return R_TLSDESC;
@ -107,13 +105,13 @@ RelExpr AArch64::getRelExpr(RelType Type, const Symbol &S,
case R_AARCH64_LD_PREL_LO19:
return R_PC;
case R_AARCH64_ADR_PREL_PG_HI21:
return R_PAGE_PC;
return R_AARCH64_PAGE_PC;
case R_AARCH64_LD64_GOT_LO12_NC:
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
return R_GOT;
case R_AARCH64_ADR_GOT_PAGE:
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
return R_GOT_PAGE_PC;
return R_AARCH64_GOT_PAGE_PC;
case R_AARCH64_NONE:
return R_NONE;
default:
@ -125,7 +123,7 @@ RelExpr AArch64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const {
if (Expr == R_RELAX_TLS_GD_TO_IE) {
if (Type == R_AARCH64_TLSDESC_ADR_PAGE21)
return R_RELAX_TLS_GD_TO_IE_PAGE_PC;
return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
return R_RELAX_TLS_GD_TO_IE_ABS;
}
return Expr;
@ -156,7 +154,7 @@ RelType AArch64::getDynRel(RelType Type) const {
}
void AArch64::writeGotPlt(uint8_t *Buf, const Symbol &) const {
write64le(Buf, InX::Plt->getVA());
write64le(Buf, In.Plt->getVA());
}
void AArch64::writePltHeader(uint8_t *Buf) const {
@ -172,8 +170,8 @@ void AArch64::writePltHeader(uint8_t *Buf) const {
};
memcpy(Buf, PltData, sizeof(PltData));
uint64_t Got = InX::GotPlt->getVA();
uint64_t Plt = InX::Plt->getVA();
uint64_t Got = In.GotPlt->getVA();
uint64_t Plt = In.Plt->getVA();
relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
getAArch64Page(Got + 16) - getAArch64Page(Plt + 4));
relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16);
@ -208,6 +206,13 @@ bool AArch64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
return !inBranchRange(Type, BranchAddr, Dst);
}
uint32_t AArch64::getThunkSectionSpacing() const {
// See comment in Arch/ARM.cpp for a more detailed explanation of
// getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
// Thunk have a range of +/- 128 MiB
return (128 * 1024 * 1024) - 0x30000;
}
bool AArch64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
if (Type != R_AARCH64_CALL26 && Type != R_AARCH64_JUMP26)
return true;
@ -338,7 +343,7 @@ void AArch64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
or32le(Loc, (Val & 0xFFFC) << 3);
break;
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
checkInt(Loc, Val, 24, Type);
checkUInt(Loc, Val, 24, Type);
or32AArch64Imm(Loc, Val >> 12);
break;
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:

View File

@ -35,6 +35,7 @@ public:
AMDGPU::AMDGPU() {
RelativeRel = R_AMDGPU_RELATIVE64;
GotRel = R_AMDGPU_ABS64;
NoneRel = R_AMDGPU_NONE;
GotEntrySize = 8;
}

View File

@ -40,6 +40,7 @@ public:
void addPltHeaderSymbols(InputSection &ISD) const override;
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
uint32_t getThunkSectionSpacing() const override;
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
};
@ -50,6 +51,7 @@ ARM::ARM() {
RelativeRel = R_ARM_RELATIVE;
IRelativeRel = R_ARM_IRELATIVE;
GotRel = R_ARM_GLOB_DAT;
NoneRel = R_ARM_NONE;
PltRel = R_ARM_JUMP_SLOT;
TlsGotRel = R_ARM_TLS_TPOFF32;
TlsModuleIndexRel = R_ARM_TLS_DTPMOD32;
@ -59,41 +61,8 @@ ARM::ARM() {
GotPltEntrySize = 4;
PltEntrySize = 16;
PltHeaderSize = 32;
TrapInstr = 0xd4d4d4d4;
// ARM uses Variant 1 TLS
TcbSize = 8;
TrapInstr = {0xd4, 0xd4, 0xd4, 0xd4};
NeedsThunks = true;
// The placing of pre-created ThunkSections is controlled by the
// ThunkSectionSpacing parameter. The aim is to place the
// ThunkSection such that all branches from the InputSections prior to the
// ThunkSection can reach a Thunk placed at the end of the ThunkSection.
// Graphically:
// | up to ThunkSectionSpacing .text input sections |
// | ThunkSection |
// | up to ThunkSectionSpacing .text input sections |
// | ThunkSection |
// Pre-created ThunkSections are spaced roughly 16MiB apart on ARM. This is to
// match the most common expected case of a Thumb 2 encoded BL, BLX or B.W
// ARM B, BL, BLX range +/- 32MiB
// Thumb B.W, BL, BLX range +/- 16MiB
// Thumb B<cc>.W range +/- 1MiB
// If a branch cannot reach a pre-created ThunkSection a new one will be
// created so we can handle the rare cases of a Thumb 2 conditional branch.
// We intentionally use a lower size for ThunkSectionSpacing than the maximum
// branch range so the end of the ThunkSection is more likely to be within
// range of the branch instruction that is furthest away. The value we shorten
// ThunkSectionSpacing by is set conservatively to allow us to create 16,384
// 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
// one of the Thunks going out of range.
// FIXME: lld assumes that the Thumb BL and BLX encoding permits the J1 and
// J2 bits to be used to extend the branch range. On earlier Architectures
// such as ARMv4, ARMv5 and ARMv6 (except ARMv6T2) the range is +/- 4MiB. If
// support for the earlier encodings is added then when they are used the
// ThunkSectionSpacing will need lowering.
ThunkSectionSpacing = 0x1000000 - 0x30000;
}
uint32_t ARM::calcEFlags() const {
@ -165,6 +134,12 @@ RelExpr ARM::getRelExpr(RelType Type, const Symbol &S,
return R_NONE;
case R_ARM_TLS_LE32:
return R_TLS;
case R_ARM_V4BX:
// V4BX is just a marker to indicate there's a "bx rN" instruction at the
// given address. It can be used to implement a special linker mode which
// rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and
// not ARMv4 output, we can just ignore it.
return R_HINT;
default:
return R_ABS;
}
@ -177,7 +152,7 @@ RelType ARM::getDynRel(RelType Type) const {
}
void ARM::writeGotPlt(uint8_t *Buf, const Symbol &) const {
write32le(Buf, InX::Plt->getVA());
write32le(Buf, In.Plt->getVA());
}
void ARM::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
@ -198,8 +173,8 @@ static void writePltHeaderLong(uint8_t *Buf) {
0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
0xd4, 0xd4, 0xd4, 0xd4};
memcpy(Buf, PltData, sizeof(PltData));
uint64_t GotPlt = InX::GotPlt->getVA();
uint64_t L1 = InX::Plt->getVA() + 8;
uint64_t GotPlt = In.GotPlt->getVA();
uint64_t L1 = In.Plt->getVA() + 8;
write32le(Buf + 16, GotPlt - L1 - 8);
}
@ -217,7 +192,7 @@ void ARM::writePltHeader(uint8_t *Buf) const {
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
};
uint64_t Offset = InX::GotPlt->getVA() - InX::Plt->getVA() - 4;
uint64_t Offset = In.GotPlt->getVA() - In.Plt->getVA() - 4;
if (!llvm::isUInt<27>(Offset)) {
// We cannot encode the Offset, use the long form.
writePltHeaderLong(Buf);
@ -227,10 +202,10 @@ void ARM::writePltHeader(uint8_t *Buf) const {
write32le(Buf + 4, PltData[1] | ((Offset >> 20) & 0xff));
write32le(Buf + 8, PltData[2] | ((Offset >> 12) & 0xff));
write32le(Buf + 12, PltData[3] | (Offset & 0xfff));
write32le(Buf + 16, TrapInstr); // Pad to 32-byte boundary
write32le(Buf + 20, TrapInstr);
write32le(Buf + 24, TrapInstr);
write32le(Buf + 28, TrapInstr);
memcpy(Buf + 16, TrapInstr.data(), 4); // Pad to 32-byte boundary
memcpy(Buf + 20, TrapInstr.data(), 4);
memcpy(Buf + 24, TrapInstr.data(), 4);
memcpy(Buf + 28, TrapInstr.data(), 4);
}
void ARM::addPltHeaderSymbols(InputSection &IS) const {
@ -279,7 +254,7 @@ void ARM::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
write32le(Buf + 0, PltData[0] | ((Offset >> 20) & 0xff));
write32le(Buf + 4, PltData[1] | ((Offset >> 12) & 0xff));
write32le(Buf + 8, PltData[2] | (Offset & 0xfff));
write32le(Buf + 12, TrapInstr); // Pad to 16-byte boundary
memcpy(Buf + 12, TrapInstr.data(), 4); // Pad to 16-byte boundary
}
void ARM::addPltSymbols(InputSection &IS, uint64_t Off) const {
@ -324,6 +299,40 @@ bool ARM::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
return false;
}
uint32_t ARM::getThunkSectionSpacing() const {
// The placing of pre-created ThunkSections is controlled by the value
// ThunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to
// place the ThunkSection such that all branches from the InputSections
// prior to the ThunkSection can reach a Thunk placed at the end of the
// ThunkSection. Graphically:
// | up to ThunkSectionSpacing .text input sections |
// | ThunkSection |
// | up to ThunkSectionSpacing .text input sections |
// | ThunkSection |
// Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This
// is to match the most common expected case of a Thumb 2 encoded BL, BLX or
// B.W:
// ARM B, BL, BLX range +/- 32MiB
// Thumb B.W, BL, BLX range +/- 16MiB
// Thumb B<cc>.W range +/- 1MiB
// If a branch cannot reach a pre-created ThunkSection a new one will be
// created so we can handle the rare cases of a Thumb 2 conditional branch.
// We intentionally use a lower size for ThunkSectionSpacing than the maximum
// branch range so the end of the ThunkSection is more likely to be within
// range of the branch instruction that is furthest away. The value we shorten
// ThunkSectionSpacing by is set conservatively to allow us to create 16,384
// 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
// one of the Thunks going out of range.
// On Arm the ThunkSectionSpacing depends on the range of the Thumb Branch
// range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except
// ARMv6T2) the range is +/- 4MiB.
return (Config->ARMJ1J2BranchEncoding) ? 0x1000000 - 0x30000
: 0x400000 - 0x7500;
}
bool ARM::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
uint64_t Range;
uint64_t InstrSize;
@ -342,7 +351,7 @@ bool ARM::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
break;
case R_ARM_THM_JUMP24:
case R_ARM_THM_CALL:
Range = 0x1000000;
Range = Config->ARMJ1J2BranchEncoding ? 0x1000000 : 0x400000;
InstrSize = 2;
break;
default:
@ -447,11 +456,23 @@ void ARM::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
}
// Bit 12 is 0 for BLX, 1 for BL
write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12);
if (!Config->ARMJ1J2BranchEncoding) {
// Older Arm architectures do not support R_ARM_THM_JUMP24 and have
// different encoding rules and range due to J1 and J2 always being 1.
checkInt(Loc, Val, 23, Type);
write16le(Loc,
0xf000 | // opcode
((Val >> 12) & 0x07ff)); // imm11
write16le(Loc + 2,
(read16le(Loc + 2) & 0xd000) | // opcode
0x2800 | // J1 == J2 == 1
((Val >> 1) & 0x07ff)); // imm11
break;
}
// Fall through as rest of encoding is the same as B.W
LLVM_FALLTHROUGH;
case R_ARM_THM_JUMP24:
// Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
// FIXME: Use of I1 and I2 require v6T2ops
checkInt(Loc, Val, 25, Type);
write16le(Loc,
0xf000 | // opcode
@ -470,14 +491,12 @@ void ARM::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
break;
case R_ARM_MOVT_ABS:
case R_ARM_MOVT_PREL:
checkInt(Loc, Val, 32, Type);
write32le(Loc, (read32le(Loc) & ~0x000f0fff) |
(((Val >> 16) & 0xf000) << 4) | ((Val >> 16) & 0xfff));
break;
case R_ARM_THM_MOVT_ABS:
case R_ARM_THM_MOVT_PREL:
// Encoding T1: A = imm4:i:imm3:imm8
checkInt(Loc, Val, 32, Type);
write16le(Loc,
0xf2c0 | // opcode
((Val >> 17) & 0x0400) | // i
@ -542,10 +561,19 @@ int64_t ARM::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
((Lo & 0x07ff) << 1)); // imm11:0
}
case R_ARM_THM_CALL:
if (!Config->ARMJ1J2BranchEncoding) {
// Older Arm architectures do not support R_ARM_THM_JUMP24 and have
// different encoding rules and range due to J1 and J2 always being 1.
uint16_t Hi = read16le(Buf);
uint16_t Lo = read16le(Buf + 2);
return SignExtend64<22>(((Hi & 0x7ff) << 12) | // imm11
((Lo & 0x7ff) << 1)); // imm11:0
break;
}
LLVM_FALLTHROUGH;
case R_ARM_THM_JUMP24: {
// Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0
// I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)
// FIXME: I1 and I2 require v6T2ops
uint16_t Hi = read16le(Buf);
uint16_t Lo = read16le(Buf + 2);
return SignExtend64<24>(((Hi & 0x0400) << 14) | // S

View File

@ -43,12 +43,15 @@ using namespace lld::elf;
namespace {
class AVR final : public TargetInfo {
public:
AVR();
RelExpr getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
};
} // namespace
AVR::AVR() { NoneRel = R_AVR_NONE; }
RelExpr AVR::getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const {
return R_ABS;

View File

@ -9,6 +9,7 @@
#include "InputFiles.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/BinaryFormat/ELF.h"
@ -25,15 +26,48 @@ using namespace lld::elf;
namespace {
class Hexagon final : public TargetInfo {
public:
Hexagon();
uint32_t calcEFlags() const override;
RelExpr getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void writePltHeader(uint8_t *Buf) const override;
void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
int32_t Index, unsigned RelOff) const override;
};
} // namespace
// Support V60 only at the moment.
uint32_t Hexagon::calcEFlags() const { return 0x60; }
Hexagon::Hexagon() {
PltRel = R_HEX_JMP_SLOT;
RelativeRel = R_HEX_RELATIVE;
GotRel = R_HEX_GLOB_DAT;
GotEntrySize = 4;
// The zero'th GOT entry is reserved for the address of _DYNAMIC. The
// next 3 are reserved for the dynamic loader.
GotPltHeaderEntriesNum = 4;
GotPltEntrySize = 4;
PltEntrySize = 16;
PltHeaderSize = 32;
// Hexagon Linux uses 64K pages by default.
DefaultMaxPageSize = 0x10000;
NoneRel = R_HEX_NONE;
}
uint32_t Hexagon::calcEFlags() const {
assert(!ObjectFiles.empty());
// The architecture revision must always be equal to or greater than
// greatest revision in the list of inputs.
uint32_t Ret = 0;
for (InputFile *F : ObjectFiles) {
uint32_t EFlags = cast<ObjFile<ELF32LE>>(F)->getObj().getHeader()->e_flags;
if (EFlags > Ret)
Ret = EFlags;
}
return Ret;
}
static uint32_t applyMask(uint32_t Mask, uint32_t Data) {
uint32_t Result = 0;
@ -53,29 +87,143 @@ static uint32_t applyMask(uint32_t Mask, uint32_t Data) {
RelExpr Hexagon::getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const {
switch (Type) {
case R_HEX_B9_PCREL:
case R_HEX_B9_PCREL_X:
case R_HEX_B13_PCREL:
case R_HEX_B15_PCREL:
case R_HEX_B15_PCREL_X:
case R_HEX_6_PCREL_X:
case R_HEX_32_PCREL:
return R_PC;
case R_HEX_B22_PCREL:
case R_HEX_PLT_B22_PCREL:
case R_HEX_B22_PCREL_X:
case R_HEX_B32_PCREL_X:
return R_PC;
return R_PLT_PC;
case R_HEX_GOT_11_X:
case R_HEX_GOT_16_X:
case R_HEX_GOT_32_6_X:
return R_HEXAGON_GOT;
default:
return R_ABS;
}
}
static uint32_t findMaskR6(uint32_t Insn) {
// There are (arguably too) many relocation masks for the DSP's
// R_HEX_6_X type. The table below is used to select the correct mask
// for the given instruction.
struct InstructionMask {
uint32_t CmpMask;
uint32_t RelocMask;
};
static const InstructionMask R6[] = {
{0x38000000, 0x0000201f}, {0x39000000, 0x0000201f},
{0x3e000000, 0x00001f80}, {0x3f000000, 0x00001f80},
{0x40000000, 0x000020f8}, {0x41000000, 0x000007e0},
{0x42000000, 0x000020f8}, {0x43000000, 0x000007e0},
{0x44000000, 0x000020f8}, {0x45000000, 0x000007e0},
{0x46000000, 0x000020f8}, {0x47000000, 0x000007e0},
{0x6a000000, 0x00001f80}, {0x7c000000, 0x001f2000},
{0x9a000000, 0x00000f60}, {0x9b000000, 0x00000f60},
{0x9c000000, 0x00000f60}, {0x9d000000, 0x00000f60},
{0x9f000000, 0x001f0100}, {0xab000000, 0x0000003f},
{0xad000000, 0x0000003f}, {0xaf000000, 0x00030078},
{0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
{0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};
// Duplex forms have a fixed mask and parse bits 15:14 are always
// zero. Non-duplex insns will always have at least one bit set in the
// parse field.
if ((0xC000 & Insn) == 0x0)
return 0x03f00000;
for (InstructionMask I : R6)
if ((0xff000000 & Insn) == I.CmpMask)
return I.RelocMask;
error("unrecognized instruction for R_HEX_6 relocation: 0x" +
utohexstr(Insn));
return 0;
}
static uint32_t findMaskR8(uint32_t Insn) {
if ((0xff000000 & Insn) == 0xde000000)
return 0x00e020e8;
if ((0xff000000 & Insn) == 0x3c000000)
return 0x0000207f;
return 0x00001fe0;
}
static uint32_t findMaskR11(uint32_t Insn) {
if ((0xff000000 & Insn) == 0xa1000000)
return 0x060020ff;
return 0x06003fe0;
}
static uint32_t findMaskR16(uint32_t Insn) {
if ((0xff000000 & Insn) == 0x48000000)
return 0x061f20ff;
if ((0xff000000 & Insn) == 0x49000000)
return 0x061f3fe0;
if ((0xff000000 & Insn) == 0x78000000)
return 0x00df3fe0;
if ((0xff000000 & Insn) == 0xb0000000)
return 0x0fe03fe0;
error("unrecognized instruction for R_HEX_16_X relocation: 0x" +
utohexstr(Insn));
return 0;
}
static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); }
void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
switch (Type) {
case R_HEX_NONE:
break;
case R_HEX_6_PCREL_X:
case R_HEX_6_X:
or32le(Loc, applyMask(findMaskR6(read32le(Loc)), Val));
break;
case R_HEX_8_X:
or32le(Loc, applyMask(findMaskR8(read32le(Loc)), Val));
break;
case R_HEX_9_X:
or32le(Loc, applyMask(0x00003fe0, Val & 0x3f));
break;
case R_HEX_10_X:
or32le(Loc, applyMask(0x00203fe0, Val & 0x3f));
break;
case R_HEX_11_X:
case R_HEX_GOT_11_X:
or32le(Loc, applyMask(findMaskR11(read32le(Loc)), Val & 0x3f));
break;
case R_HEX_12_X:
or32le(Loc, applyMask(0x000007e0, Val));
break;
case R_HEX_16_X: // These relocs only have 6 effective bits.
case R_HEX_GOT_16_X:
or32le(Loc, applyMask(findMaskR16(read32le(Loc)), Val & 0x3f));
break;
case R_HEX_32:
case R_HEX_32_PCREL:
or32le(Loc, Val);
break;
case R_HEX_32_6_X:
case R_HEX_GOT_32_6_X:
or32le(Loc, applyMask(0x0fff3fff, Val >> 6));
break;
case R_HEX_B9_PCREL:
or32le(Loc, applyMask(0x003000fe, Val >> 2));
break;
case R_HEX_B9_PCREL_X:
or32le(Loc, applyMask(0x003000fe, Val & 0x3f));
break;
case R_HEX_B13_PCREL:
or32le(Loc, applyMask(0x00202ffe, Val >> 2));
break;
case R_HEX_B15_PCREL:
or32le(Loc, applyMask(0x00df20fe, Val >> 2));
break;
@ -83,6 +231,7 @@ void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
or32le(Loc, applyMask(0x00df20fe, Val & 0x3f));
break;
case R_HEX_B22_PCREL:
case R_HEX_PLT_B22_PCREL:
or32le(Loc, applyMask(0x1ff3ffe, Val >> 2));
break;
case R_HEX_B22_PCREL_X:
@ -91,12 +240,52 @@ void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
case R_HEX_B32_PCREL_X:
or32le(Loc, applyMask(0x0fff3fff, Val >> 6));
break;
case R_HEX_HI16:
or32le(Loc, applyMask(0x00c03fff, Val >> 16));
break;
case R_HEX_LO16:
or32le(Loc, applyMask(0x00c03fff, Val));
break;
default:
error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
break;
}
}
void Hexagon::writePltHeader(uint8_t *Buf) const {
const uint8_t PltData[] = {
0x00, 0x40, 0x00, 0x00, // { immext (#0)
0x1c, 0xc0, 0x49, 0x6a, // r28 = add (pc, ##GOT0@PCREL) } # @GOT0
0x0e, 0x42, 0x9c, 0xe2, // { r14 -= add (r28, #16) # offset of GOTn
0x4f, 0x40, 0x9c, 0x91, // r15 = memw (r28 + #8) # object ID at GOT2
0x3c, 0xc0, 0x9c, 0x91, // r28 = memw (r28 + #4) }# dynamic link at GOT1
0x0e, 0x42, 0x0e, 0x8c, // { r14 = asr (r14, #2) # index of PLTn
0x00, 0xc0, 0x9c, 0x52, // jumpr r28 } # call dynamic linker
0x0c, 0xdb, 0x00, 0x54, // trap0(#0xdb) # bring plt0 into 16byte alignment
};
memcpy(Buf, PltData, sizeof(PltData));
// Offset from PLT0 to the GOT.
uint64_t Off = In.GotPlt->getVA() - In.Plt->getVA();
relocateOne(Buf, R_HEX_B32_PCREL_X, Off);
relocateOne(Buf + 4, R_HEX_6_PCREL_X, Off);
}
void Hexagon::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) const {
const uint8_t Inst[] = {
0x00, 0x40, 0x00, 0x00, // { immext (#0)
0x0e, 0xc0, 0x49, 0x6a, // r14 = add (pc, ##GOTn@PCREL) }
0x1c, 0xc0, 0x8e, 0x91, // r28 = memw (r14)
0x00, 0xc0, 0x9c, 0x52, // jumpr r28
};
memcpy(Buf, Inst, sizeof(Inst));
relocateOne(Buf, R_HEX_B32_PCREL_X, GotPltEntryAddr - PltEntryAddr);
relocateOne(Buf + 4, R_HEX_6_PCREL_X, GotPltEntryAddr - PltEntryAddr);
}
TargetInfo *elf::getHexagonTargetInfo() {
static Hexagon Target;
return &Target;

94
deps/lld/ELF/Arch/MSP430.cpp vendored Normal file
View File

@ -0,0 +1,94 @@
//===- MSP430.cpp ---------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The MSP430 is a 16-bit microcontroller RISC architecture. The instruction set
// has only 27 core instructions orthogonally augmented with a variety
// of addressing modes for source and destination operands. Entire address space
// of MSP430 is 64KB (the extended MSP430X architecture is not considered here).
// A typical MSP430 MCU has several kilobytes of RAM and ROM, plenty
// of peripherals and is generally optimized for a low power consumption.
//
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
#include "Symbols.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
namespace {
class MSP430 final : public TargetInfo {
public:
MSP430();
RelExpr getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
};
} // namespace
MSP430::MSP430() {
// mov.b #0, r3
TrapInstr = {0x43, 0x43, 0x43, 0x43};
}
RelExpr MSP430::getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const {
switch (Type) {
case R_MSP430_10_PCREL:
case R_MSP430_16_PCREL:
case R_MSP430_16_PCREL_BYTE:
case R_MSP430_2X_PCREL:
case R_MSP430_RL_PCREL:
case R_MSP430_SYM_DIFF:
return R_PC;
default:
return R_ABS;
}
}
void MSP430::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
switch (Type) {
case R_MSP430_8:
checkIntUInt(Loc, Val, 8, Type);
*Loc = Val;
break;
case R_MSP430_16:
case R_MSP430_16_PCREL:
case R_MSP430_16_BYTE:
case R_MSP430_16_PCREL_BYTE:
checkIntUInt(Loc, Val, 16, Type);
write16le(Loc, Val);
break;
case R_MSP430_32:
checkIntUInt(Loc, Val, 32, Type);
write32le(Loc, Val);
break;
case R_MSP430_10_PCREL: {
int16_t Offset = ((int16_t)Val >> 1) - 1;
checkInt(Loc, Offset, 10, Type);
write16le(Loc, (read16le(Loc) & 0xFC00) | (Offset & 0x3FF));
break;
}
default:
error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
}
}
TargetInfo *elf::getMSP430TargetInfo() {
static MSP430 Target;
return &Target;
}

View File

@ -53,9 +53,12 @@ template <class ELFT> MIPS<ELFT>::MIPS() {
PltEntrySize = 16;
PltHeaderSize = 32;
CopyRel = R_MIPS_COPY;
NoneRel = R_MIPS_NONE;
PltRel = R_MIPS_JUMP_SLOT;
NeedsThunks = true;
TrapInstr = 0xefefefef;
// Set `sigrie 1` as a trap instruction.
write32(TrapInstr.data(), 0x04170001);
if (ELFT::Is64Bits) {
RelativeRel = (R_MIPS_64 << 8) | R_MIPS_REL32;
@ -185,7 +188,7 @@ template <class ELFT> RelType MIPS<ELFT>::getDynRel(RelType Type) const {
template <class ELFT>
void MIPS<ELFT>::writeGotPlt(uint8_t *Buf, const Symbol &) const {
uint64_t VA = InX::Plt->getVA();
uint64_t VA = In.Plt->getVA();
if (isMicroMips())
VA |= 1;
write32<ELFT::TargetEndianness>(Buf, VA);
@ -239,8 +242,8 @@ static void writeMicroRelocation16(uint8_t *Loc, uint64_t V, uint8_t BitsSize,
template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const {
const endianness E = ELFT::TargetEndianness;
if (isMicroMips()) {
uint64_t GotPlt = InX::GotPlt->getVA();
uint64_t Plt = InX::Plt->getVA();
uint64_t GotPlt = In.GotPlt->getVA();
uint64_t Plt = In.Plt->getVA();
// Overwrite trap instructions written by Writer::writeTrapInstr.
memset(Buf, 0, PltHeaderSize);
@ -292,7 +295,7 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const {
write32<E>(Buf + 24, JalrInst); // jalr.hb $25 or jalr $25
write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2
uint64_t GotPlt = InX::GotPlt->getVA();
uint64_t GotPlt = In.GotPlt->getVA();
writeValue<E>(Buf, GotPlt + 0x8000, 16, 16);
writeValue<E>(Buf + 4, GotPlt, 16, 0);
writeValue<E>(Buf + 8, GotPlt, 16, 0);

View File

@ -29,6 +29,7 @@ public:
} // namespace
PPC::PPC() {
NoneRel = R_PPC_NONE;
GotBaseSymOff = 0x8000;
GotBaseSymInGotPlt = false;
}
@ -36,6 +37,7 @@ PPC::PPC() {
RelExpr PPC::getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const {
switch (Type) {
case R_PPC_REL14:
case R_PPC_REL24:
case R_PPC_REL32:
return R_PC;
@ -61,6 +63,9 @@ void PPC::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
case R_PPC_REL32:
write32be(Loc, Val);
break;
case R_PPC_REL14:
write32be(Loc, read32be(Loc) | (Val & 0xFFFC));
break;
case R_PPC_PLTREL24:
case R_PPC_REL24:
write32be(Loc, read32be(Loc) | (Val & 0x3FFFFFC));

View File

@ -23,12 +23,49 @@ using namespace lld::elf;
static uint64_t PPC64TocOffset = 0x8000;
static uint64_t DynamicThreadPointerOffset = 0x8000;
// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
// instructions that can be used as part of the initial exec TLS sequence.
enum XFormOpcd {
LBZX = 87,
LHZX = 279,
LWZX = 23,
LDX = 21,
STBX = 215,
STHX = 407,
STWX = 151,
STDX = 149,
ADD = 266,
};
enum DFormOpcd {
LBZ = 34,
LBZU = 35,
LHZ = 40,
LHZU = 41,
LHAU = 43,
LWZ = 32,
LWZU = 33,
LFSU = 49,
LD = 58,
LFDU = 51,
STB = 38,
STBU = 39,
STH = 44,
STHU = 45,
STW = 36,
STWU = 37,
STFSU = 53,
STFDU = 55,
STD = 62,
ADDI = 14
};
uint64_t elf::getPPC64TocBase() {
// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
// TOC starts where the first of these sections starts. We always create a
// .got when we see a relocation that uses it, so for us the start is always
// the .got.
uint64_t TocVA = InX::Got->getVA();
uint64_t TocVA = In.Got->getVA();
// Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
// thus permitting a full 64 Kbytes segment. Note that the glibc startup
@ -37,6 +74,31 @@ uint64_t elf::getPPC64TocBase() {
return TocVA + PPC64TocOffset;
}
unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther) {
// The offset is encoded into the 3 most significant bits of the st_other
// field, with some special values described in section 3.4.1 of the ABI:
// 0 --> Zero offset between the GEP and LEP, and the function does NOT use
// the TOC pointer (r2). r2 will hold the same value on returning from
// the function as it did on entering the function.
// 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a
// caller-saved register for all callers.
// 2-6 --> The binary logarithm of the offset eg:
// 2 --> 2^2 = 4 bytes --> 1 instruction.
// 6 --> 2^6 = 64 bytes --> 16 instructions.
// 7 --> Reserved.
uint8_t GepToLep = (StOther >> 5) & 7;
if (GepToLep < 2)
return 0;
// The value encoded in the st_other bits is the
// log-base-2(offset).
if (GepToLep < 7)
return 1 << GepToLep;
error("reserved value of 7 in the 3 most-significant-bits of st_other");
return 0;
}
namespace {
class PPC64 final : public TargetInfo {
public:
@ -51,11 +113,16 @@ public:
void writeGotHeader(uint8_t *Buf) const override;
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
uint8_t StOther) const override;
};
} // namespace
@ -71,8 +138,64 @@ static uint16_t highera(uint64_t V) { return (V + 0x8000) >> 32; }
static uint16_t highest(uint64_t V) { return V >> 48; }
static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; }
// Extracts the 'PO' field of an instruction encoding.
static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); }
static bool isDQFormInstruction(uint32_t Encoding) {
switch (getPrimaryOpCode(Encoding)) {
default:
return false;
case 56:
// The only instruction with a primary opcode of 56 is `lq`.
return true;
case 61:
// There are both DS and DQ instruction forms with this primary opcode.
// Namely `lxv` and `stxv` are the DQ-forms that use it.
// The DS 'XO' bits being set to 01 is restricted to DQ form.
return (Encoding & 3) == 0x1;
}
}
static bool isInstructionUpdateForm(uint32_t Encoding) {
switch (getPrimaryOpCode(Encoding)) {
default:
return false;
case LBZU:
case LHAU:
case LHZU:
case LWZU:
case LFSU:
case LFDU:
case STBU:
case STHU:
case STWU:
case STFSU:
case STFDU:
return true;
// LWA has the same opcode as LD, and the DS bits is what differentiates
// between LD/LDU/LWA
case LD:
case STD:
return (Encoding & 3) == 1;
}
}
// There are a number of places when we either want to read or write an
// instruction when handling a half16 relocation type. On big-endian the buffer
// pointer is pointing into the middle of the word we want to extract, and on
// little-endian it is pointing to the start of the word. These 2 helpers are to
// simplify reading and writing in that context.
static void writeInstrFromHalf16(uint8_t *Loc, uint32_t Instr) {
write32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0), Instr);
}
static uint32_t readInstrFromHalf16(const uint8_t *Loc) {
return read32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0));
}
PPC64::PPC64() {
GotRel = R_PPC64_GLOB_DAT;
NoneRel = R_PPC64_NONE;
PltRel = R_PPC64_JMP_SLOT;
RelativeRel = R_PPC64_RELATIVE;
IRelativeRel = R_PPC64_IRELATIVE;
@ -85,14 +208,14 @@ PPC64::PPC64() {
GotPltHeaderEntriesNum = 2;
PltHeaderSize = 60;
NeedsThunks = true;
TcbSize = 8;
TlsTpOffset = 0x7000;
TlsModuleIndexRel = R_PPC64_DTPMOD64;
TlsOffsetRel = R_PPC64_DTPREL64;
TlsGotRel = R_PPC64_TPREL64;
NeedsMoreStackNonSplit = false;
// We need 64K pages (at least under glibc/Linux, the loader won't
// set different permissions on a finer granularity than that).
DefaultMaxPageSize = 65536;
@ -107,8 +230,7 @@ PPC64::PPC64() {
// use 0x10000000 as the starting address.
DefaultImageBase = 0x10000000;
TrapInstr =
(Config->IsLE == sys::IsLittleEndianHost) ? 0x7fe00008 : 0x0800e07f;
write32(TrapInstr.data(), 0x7fe00008);
}
static uint32_t getEFlags(InputFile *File) {
@ -146,27 +268,29 @@ void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
// bl __tls_get_addr(x@tlsgd) into nop
// nop into addi r3, r3, x@tprel@l
uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U;
switch (Type) {
case R_PPC64_GOT_TLSGD16_HA:
write32(Loc - EndianOffset, 0x60000000); // nop
writeInstrFromHalf16(Loc, 0x60000000); // nop
break;
case R_PPC64_GOT_TLSGD16:
case R_PPC64_GOT_TLSGD16_LO:
write32(Loc - EndianOffset, 0x3c6d0000); // addis r3, r13
writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13
relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
break;
case R_PPC64_TLSGD:
write32(Loc, 0x60000000); // nop
write32(Loc + 4, 0x38630000); // addi r3, r3
relocateOne(Loc + 4 + EndianOffset, R_PPC64_TPREL16_LO, Val);
// Since we are relocating a half16 type relocation and Loc + 4 points to
// the start of an instruction we need to advance the buffer by an extra
// 2 bytes on BE.
relocateOne(Loc + 4 + (Config->EKind == ELF64BEKind ? 2 : 0),
R_PPC64_TPREL16_LO, Val);
break;
default:
llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
}
}
void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
// Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.
// The local dynamic code sequence for a global `x` will look like:
@ -183,13 +307,12 @@ void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
// bl __tls_get_addr(x@tlsgd) into nop
// nop into addi r3, r3, 4096
uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U;
switch (Type) {
case R_PPC64_GOT_TLSLD16_HA:
write32(Loc - EndianOffset, 0x60000000); // nop
writeInstrFromHalf16(Loc, 0x60000000); // nop
break;
case R_PPC64_GOT_TLSLD16_LO:
write32(Loc - EndianOffset, 0x3c6d0000); // addis r3, r13, 0
writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13, 0
break;
case R_PPC64_TLSLD:
write32(Loc, 0x60000000); // nop
@ -212,9 +335,90 @@ void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
}
}
static unsigned getDFormOp(unsigned SecondaryOp) {
switch (SecondaryOp) {
case LBZX:
return LBZ;
case LHZX:
return LHZ;
case LWZX:
return LWZ;
case LDX:
return LD;
case STBX:
return STB;
case STHX:
return STH;
case STWX:
return STW;
case STDX:
return STD;
case ADD:
return ADDI;
default:
error("unrecognized instruction for IE to LE R_PPC64_TLS");
return 0;
}
}
void PPC64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
// The initial exec code sequence for a global `x` will look like:
// Instruction Relocation Symbol
// addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x
// ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x
// add r9, r9, x@tls R_PPC64_TLS x
// Relaxing to local exec entails converting:
// addis r9, r2, x@got@tprel@ha into nop
// ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha
// add r9, r9, x@tls into addi r9, r9, x@tprel@l
// x@tls R_PPC64_TLS is a relocation which does not compute anything,
// it is replaced with r13 (thread pointer).
// The add instruction in the initial exec sequence has multiple variations
// that need to be handled. If we are building an address it will use an add
// instruction, if we are accessing memory it will use any of the X-form
// indexed load or store instructions.
unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0;
switch (Type) {
case R_PPC64_GOT_TPREL16_HA:
write32(Loc - Offset, 0x60000000); // nop
break;
case R_PPC64_GOT_TPREL16_LO_DS:
case R_PPC64_GOT_TPREL16_DS: {
uint32_t RegNo = read32(Loc - Offset) & 0x03E00000; // bits 6-10
write32(Loc - Offset, 0x3C0D0000 | RegNo); // addis RegNo, r13
relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
break;
}
case R_PPC64_TLS: {
uint32_t PrimaryOp = getPrimaryOpCode(read32(Loc));
if (PrimaryOp != 31)
error("unrecognized instruction for IE to LE R_PPC64_TLS");
uint32_t SecondaryOp = (read32(Loc) & 0x000007FE) >> 1; // bits 21-30
uint32_t DFormOp = getDFormOp(SecondaryOp);
write32(Loc, ((DFormOp << 26) | (read32(Loc) & 0x03FFFFFF)));
relocateOne(Loc + Offset, R_PPC64_TPREL16_LO, Val);
break;
}
default:
llvm_unreachable("unknown relocation for IE to LE");
break;
}
}
RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const {
switch (Type) {
case R_PPC64_GOT16:
case R_PPC64_GOT16_DS:
case R_PPC64_GOT16_HA:
case R_PPC64_GOT16_HI:
case R_PPC64_GOT16_LO:
case R_PPC64_GOT16_LO_DS:
return R_GOT_OFF;
case R_PPC64_TOC16:
case R_PPC64_TOC16_DS:
case R_PPC64_TOC16_HA:
@ -224,6 +428,7 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
return R_GOTREL;
case R_PPC64_TOC:
return R_PPC_TOC;
case R_PPC64_REL14:
case R_PPC64_REL24:
return R_PPC_CALL_PLT;
case R_PPC64_REL16_LO:
@ -279,7 +484,7 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
case R_PPC64_TLSLD:
return R_TLSLD_HINT;
case R_PPC64_TLS:
return R_HINT;
return R_TLSIE_HINT;
default:
return R_ABS;
}
@ -308,16 +513,16 @@ void PPC64::writePltHeader(uint8_t *Buf) const {
// The 'bcl' instruction will set the link register to the address of the
// following instruction ('mflr r11'). Here we store the offset from that
// instruction to the first entry in the GotPlt section.
int64_t GotPltOffset = InX::GotPlt->getVA() - (InX::Plt->getVA() + 8);
int64_t GotPltOffset = In.GotPlt->getVA() - (In.Plt->getVA() + 8);
write64(Buf + 52, GotPltOffset);
}
void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
uint64_t PltEntryAddr, int32_t Index,
unsigned RelOff) const {
int32_t Offset = PltHeaderSize + Index * PltEntrySize;
// bl __glink_PLTresolve
write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc));
int32_t Offset = PltHeaderSize + Index * PltEntrySize;
// bl __glink_PLTresolve
write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc));
}
static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
@ -328,30 +533,36 @@ static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
switch (Type) {
// TOC biased relocation.
case R_PPC64_GOT16:
case R_PPC64_GOT_TLSGD16:
case R_PPC64_GOT_TLSLD16:
case R_PPC64_TOC16:
return {R_PPC64_ADDR16, TocBiasedVal};
case R_PPC64_GOT16_DS:
case R_PPC64_TOC16_DS:
case R_PPC64_GOT_TPREL16_DS:
case R_PPC64_GOT_DTPREL16_DS:
return {R_PPC64_ADDR16_DS, TocBiasedVal};
case R_PPC64_GOT16_HA:
case R_PPC64_GOT_TLSGD16_HA:
case R_PPC64_GOT_TLSLD16_HA:
case R_PPC64_GOT_TPREL16_HA:
case R_PPC64_GOT_DTPREL16_HA:
case R_PPC64_TOC16_HA:
return {R_PPC64_ADDR16_HA, TocBiasedVal};
case R_PPC64_GOT16_HI:
case R_PPC64_GOT_TLSGD16_HI:
case R_PPC64_GOT_TLSLD16_HI:
case R_PPC64_GOT_TPREL16_HI:
case R_PPC64_GOT_DTPREL16_HI:
case R_PPC64_TOC16_HI:
return {R_PPC64_ADDR16_HI, TocBiasedVal};
case R_PPC64_GOT16_LO:
case R_PPC64_GOT_TLSGD16_LO:
case R_PPC64_GOT_TLSLD16_LO:
case R_PPC64_TOC16_LO:
return {R_PPC64_ADDR16_LO, TocBiasedVal};
case R_PPC64_GOT16_LO_DS:
case R_PPC64_TOC16_LO_DS:
case R_PPC64_GOT_TPREL16_LO_DS:
case R_PPC64_GOT_DTPREL16_LO_DS:
@ -386,9 +597,27 @@ static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
}
}
static bool isTocOptType(RelType Type) {
switch (Type) {
case R_PPC64_GOT16_HA:
case R_PPC64_GOT16_LO_DS:
case R_PPC64_TOC16_HA:
case R_PPC64_TOC16_LO_DS:
case R_PPC64_TOC16_LO:
return true;
default:
return false;
}
}
void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
// For a TOC-relative relocation, proceed in terms of the corresponding
// ADDR16 relocation type.
// We need to save the original relocation type to use in diagnostics, and
// use the original type to determine if we should toc-optimize the
// instructions being relocated.
RelType OriginalType = Type;
bool ShouldTocOptimize = isTocOptType(Type);
// For dynamic thread pointer relative, toc-relative, and got-indirect
// relocations, proceed in terms of the corresponding ADDR16 relocation type.
std::tie(Type, Val) = toAddr16Rel(Type, Val);
switch (Type) {
@ -401,18 +630,25 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
}
case R_PPC64_ADDR16:
case R_PPC64_TPREL16:
checkInt(Loc, Val, 16, Type);
checkInt(Loc, Val, 16, OriginalType);
write16(Loc, Val);
break;
case R_PPC64_ADDR16_DS:
case R_PPC64_TPREL16_DS:
checkInt(Loc, Val, 16, Type);
write16(Loc, (read16(Loc) & 3) | (Val & ~3));
break;
case R_PPC64_TPREL16_DS: {
checkInt(Loc, Val, 16, OriginalType);
// DQ-form instructions use bits 28-31 as part of the instruction encoding
// DS-form instructions only use bits 30-31.
uint16_t Mask = isDQFormInstruction(readInstrFromHalf16(Loc)) ? 0xF : 0x3;
checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
write16(Loc, (read16(Loc) & Mask) | lo(Val));
} break;
case R_PPC64_ADDR16_HA:
case R_PPC64_REL16_HA:
case R_PPC64_TPREL16_HA:
write16(Loc, ha(Val));
if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0)
writeInstrFromHalf16(Loc, 0x60000000);
else
write16(Loc, ha(Val));
break;
case R_PPC64_ADDR16_HI:
case R_PPC64_REL16_HI:
@ -438,12 +674,40 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
case R_PPC64_ADDR16_LO:
case R_PPC64_REL16_LO:
case R_PPC64_TPREL16_LO:
// When the high-adjusted part of a toc relocation evalutes to 0, it is
// changed into a nop. The lo part then needs to be updated to use the
// toc-pointer register r2, as the base register.
if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
uint32_t Instr = readInstrFromHalf16(Loc);
if (isInstructionUpdateForm(Instr))
error(getErrorLocation(Loc) +
"can't toc-optimize an update instruction: 0x" +
utohexstr(Instr));
Instr = (Instr & 0xFFE00000) | 0x00020000;
writeInstrFromHalf16(Loc, Instr);
}
write16(Loc, lo(Val));
break;
case R_PPC64_ADDR16_LO_DS:
case R_PPC64_TPREL16_LO_DS:
write16(Loc, (read16(Loc) & 3) | (lo(Val) & ~3));
break;
case R_PPC64_TPREL16_LO_DS: {
// DQ-form instructions use bits 28-31 as part of the instruction encoding
// DS-form instructions only use bits 30-31.
uint32_t Inst = readInstrFromHalf16(Loc);
uint16_t Mask = isDQFormInstruction(Inst) ? 0xF : 0x3;
checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
// When the high-adjusted part of a toc relocation evalutes to 0, it is
// changed into a nop. The lo part then needs to be updated to use the toc
// pointer register r2, as the base register.
if (isInstructionUpdateForm(Inst))
error(getErrorLocation(Loc) +
"Can't toc-optimize an update instruction: 0x" +
Twine::utohexstr(Inst));
Inst = (Inst & 0xFFE0000F) | 0x00020000;
writeInstrFromHalf16(Loc, Inst);
}
write16(Loc, (read16(Loc) & Mask) | lo(Val));
} break;
case R_PPC64_ADDR32:
case R_PPC64_REL32:
checkInt(Loc, Val, 32, Type);
@ -454,9 +718,17 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
case R_PPC64_TOC:
write64(Loc, Val);
break;
case R_PPC64_REL14: {
uint32_t Mask = 0x0000FFFC;
checkInt(Loc, Val, 16, Type);
checkAlignment(Loc, Val, 4, Type);
write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
break;
}
case R_PPC64_REL24: {
uint32_t Mask = 0x03FFFFFC;
checkInt(Loc, Val, 24, Type);
checkInt(Loc, Val, 26, Type);
checkAlignment(Loc, Val, 4, Type);
write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
break;
}
@ -470,9 +742,30 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const {
// If a function is in the plt it needs to be called through
// a call stub.
return Type == R_PPC64_REL24 && S.isInPlt();
if (Type != R_PPC64_REL14 && Type != R_PPC64_REL24)
return false;
// If a function is in the Plt it needs to be called with a call-stub.
if (S.isInPlt())
return true;
// If a symbol is a weak undefined and we are compiling an executable
// it doesn't need a range-extending thunk since it can't be called.
if (S.isUndefWeak() && !Config->Shared)
return false;
// If the offset exceeds the range of the branch type then it will need
// a range-extending thunk.
return !inBranchRange(Type, BranchAddr, S.getVA());
}
bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
int64_t Offset = Dst - Src;
if (Type == R_PPC64_REL14)
return isInt<16>(Offset);
if (Type == R_PPC64_REL24)
return isInt<26>(Offset);
llvm_unreachable("unsupported relocation type used in branch");
}
RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
@ -511,9 +804,8 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
case R_PPC64_GOT_TLSGD16_LO: {
// Relax from addi r3, rA, sym@got@tlsgd@l to
// ld r3, sym@got@tprel@l(rA)
uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U;
uint32_t InputRegister = (read32(Loc - EndianOffset) & (0x1f << 16));
write32(Loc - EndianOffset, 0xE8600000 | InputRegister);
uint32_t InputRegister = (readInstrFromHalf16(Loc) & (0x1f << 16));
writeInstrFromHalf16(Loc, 0xE8600000 | InputRegister);
relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val);
return;
}
@ -526,6 +818,113 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
}
}
// The prologue for a split-stack function is expected to look roughly
// like this:
// .Lglobal_entry_point:
// # TOC pointer initalization.
// ...
// .Llocal_entry_point:
// # load the __private_ss member of the threads tcbhead.
// ld r0,-0x7000-64(r13)
// # subtract the functions stack size from the stack pointer.
// addis r12, r1, ha(-stack-frame size)
// addi r12, r12, l(-stack-frame size)
// # compare needed to actual and branch to allocate_more_stack if more
// # space is needed, otherwise fallthrough to 'normal' function body.
// cmpld cr7,r12,r0
// blt- cr7, .Lallocate_more_stack
//
// -) The allocate_more_stack block might be placed after the split-stack
// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
// instead.
// -) If either the addis or addi is not needed due to the stack size being
// smaller then 32K or a multiple of 64K they will be replaced with a nop,
// but there will always be 2 instructions the linker can overwrite for the
// adjusted stack size.
//
// The linkers job here is to increase the stack size used in the addis/addi
// pair by split-stack-size-adjust.
// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
uint8_t StOther) const {
// If the caller has a global entry point adjust the buffer past it. The start
// of the split-stack prologue will be at the local entry point.
Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther);
// At the very least we expect to see a load of some split-stack data from the
// tcb, and 2 instructions that calculate the ending stack address this
// function will require. If there is not enough room for at least 3
// instructions it can't be a split-stack prologue.
if (Loc + 12 >= End)
return false;
// First instruction must be `ld r0, -0x7000-64(r13)`
if (read32(Loc) != 0xe80d8fc0)
return false;
int16_t HiImm = 0;
int16_t LoImm = 0;
// First instruction can be either an addis if the frame size is larger then
// 32K, or an addi if the size is less then 32K.
int32_t FirstInstr = read32(Loc + 4);
if (getPrimaryOpCode(FirstInstr) == 15) {
HiImm = FirstInstr & 0xFFFF;
} else if (getPrimaryOpCode(FirstInstr) == 14) {
LoImm = FirstInstr & 0xFFFF;
} else {
return false;
}
// Second instruction is either an addi or a nop. If the first instruction was
// an addi then LoImm is set and the second instruction must be a nop.
uint32_t SecondInstr = read32(Loc + 8);
if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) {
LoImm = SecondInstr & 0xFFFF;
} else if (SecondInstr != 0x60000000) {
return false;
}
// The register operands of the first instruction should be the stack-pointer
// (r1) as the input (RA) and r12 as the output (RT). If the second
// instruction is not a nop, then it should use r12 as both input and output.
auto CheckRegOperands = [](uint32_t Instr, uint8_t ExpectedRT,
uint8_t ExpectedRA) {
return ((Instr & 0x3E00000) >> 21 == ExpectedRT) &&
((Instr & 0x1F0000) >> 16 == ExpectedRA);
};
if (!CheckRegOperands(FirstInstr, 12, 1))
return false;
if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12))
return false;
int32_t StackFrameSize = (HiImm * 65536) + LoImm;
// Check that the adjusted size doesn't overflow what we can represent with 2
// instructions.
if (StackFrameSize < Config->SplitStackAdjustSize + INT32_MIN) {
error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows");
return false;
}
int32_t AdjustedStackFrameSize =
StackFrameSize - Config->SplitStackAdjustSize;
LoImm = AdjustedStackFrameSize & 0xFFFF;
HiImm = (AdjustedStackFrameSize + 0x8000) >> 16;
if (HiImm) {
write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm);
// If the low immediate is zero the second instruction will be a nop.
SecondInstr = LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000;
write32(Loc + 8, SecondInstr);
} else {
// addi r12, r1, imm
write32(Loc + 4, (0x39810000) | (uint16_t)LoImm);
write32(Loc + 8, 0x60000000);
}
return true;
}
TargetInfo *elf::getPPC64TargetInfo() {
static PPC64 Target;
return &Target;

279
deps/lld/ELF/Arch/RISCV.cpp vendored Normal file
View File

@ -0,0 +1,279 @@
//===- RISCV.cpp ----------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
#include "Target.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
namespace {
class RISCV final : public TargetInfo {
public:
RISCV();
uint32_t calcEFlags() const override;
RelExpr getRelExpr(RelType Type, const Symbol &S,
const uint8_t *Loc) const override;
void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
};
} // end anonymous namespace
RISCV::RISCV() { NoneRel = R_RISCV_NONE; }
static uint32_t getEFlags(InputFile *F) {
if (Config->Is64)
return cast<ObjFile<ELF64LE>>(F)->getObj().getHeader()->e_flags;
return cast<ObjFile<ELF32LE>>(F)->getObj().getHeader()->e_flags;
}
uint32_t RISCV::calcEFlags() const {
assert(!ObjectFiles.empty());
uint32_t Target = getEFlags(ObjectFiles.front());
for (InputFile *F : ObjectFiles) {
uint32_t EFlags = getEFlags(F);
if (EFlags & EF_RISCV_RVC)
Target |= EF_RISCV_RVC;
if ((EFlags & EF_RISCV_FLOAT_ABI) != (Target & EF_RISCV_FLOAT_ABI))
error(toString(F) +
": cannot link object files with different floating-point ABI");
if ((EFlags & EF_RISCV_RVE) != (Target & EF_RISCV_RVE))
error(toString(F) +
": cannot link object files with different EF_RISCV_RVE");
}
return Target;
}
RelExpr RISCV::getRelExpr(const RelType Type, const Symbol &S,
const uint8_t *Loc) const {
switch (Type) {
case R_RISCV_JAL:
case R_RISCV_BRANCH:
case R_RISCV_CALL:
case R_RISCV_PCREL_HI20:
case R_RISCV_RVC_BRANCH:
case R_RISCV_RVC_JUMP:
case R_RISCV_32_PCREL:
return R_PC;
case R_RISCV_PCREL_LO12_I:
case R_RISCV_PCREL_LO12_S:
return R_RISCV_PC_INDIRECT;
case R_RISCV_RELAX:
case R_RISCV_ALIGN:
return R_HINT;
default:
return R_ABS;
}
}
// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63.
static uint32_t extractBits(uint64_t V, uint32_t Begin, uint32_t End) {
return (V & ((1ULL << (Begin + 1)) - 1)) >> End;
}
void RISCV::relocateOne(uint8_t *Loc, const RelType Type,
const uint64_t Val) const {
switch (Type) {
case R_RISCV_32:
write32le(Loc, Val);
return;
case R_RISCV_64:
write64le(Loc, Val);
return;
case R_RISCV_RVC_BRANCH: {
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 8, Type);
checkAlignment(Loc, Val, 2, Type);
uint16_t Insn = read16le(Loc) & 0xE383;
uint16_t Imm8 = extractBits(Val, 8, 8) << 12;
uint16_t Imm4_3 = extractBits(Val, 4, 3) << 10;
uint16_t Imm7_6 = extractBits(Val, 7, 6) << 5;
uint16_t Imm2_1 = extractBits(Val, 2, 1) << 3;
uint16_t Imm5 = extractBits(Val, 5, 5) << 2;
Insn |= Imm8 | Imm4_3 | Imm7_6 | Imm2_1 | Imm5;
write16le(Loc, Insn);
return;
}
case R_RISCV_RVC_JUMP: {
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 11, Type);
checkAlignment(Loc, Val, 2, Type);
uint16_t Insn = read16le(Loc) & 0xE003;
uint16_t Imm11 = extractBits(Val, 11, 11) << 12;
uint16_t Imm4 = extractBits(Val, 4, 4) << 11;
uint16_t Imm9_8 = extractBits(Val, 9, 8) << 9;
uint16_t Imm10 = extractBits(Val, 10, 10) << 8;
uint16_t Imm6 = extractBits(Val, 6, 6) << 7;
uint16_t Imm7 = extractBits(Val, 7, 7) << 6;
uint16_t Imm3_1 = extractBits(Val, 3, 1) << 3;
uint16_t Imm5 = extractBits(Val, 5, 5) << 2;
Insn |= Imm11 | Imm4 | Imm9_8 | Imm10 | Imm6 | Imm7 | Imm3_1 | Imm5;
write16le(Loc, Insn);
return;
}
case R_RISCV_RVC_LUI: {
int32_t Imm = ((Val + 0x800) >> 12);
checkUInt(Loc, Imm, 6, Type);
if (Imm == 0) { // `c.lui rd, 0` is illegal, convert to `c.li rd, 0`
write16le(Loc, (read16le(Loc) & 0x0F83) | 0x4000);
} else {
uint16_t Imm17 = extractBits(Val + 0x800, 17, 17) << 12;
uint16_t Imm16_12 = extractBits(Val + 0x800, 16, 12) << 2;
write16le(Loc, (read16le(Loc) & 0xEF83) | Imm17 | Imm16_12);
}
return;
}
case R_RISCV_JAL: {
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 20, Type);
checkAlignment(Loc, Val, 2, Type);
uint32_t Insn = read32le(Loc) & 0xFFF;
uint32_t Imm20 = extractBits(Val, 20, 20) << 31;
uint32_t Imm10_1 = extractBits(Val, 10, 1) << 21;
uint32_t Imm11 = extractBits(Val, 11, 11) << 20;
uint32_t Imm19_12 = extractBits(Val, 19, 12) << 12;
Insn |= Imm20 | Imm10_1 | Imm11 | Imm19_12;
write32le(Loc, Insn);
return;
}
case R_RISCV_BRANCH: {
checkInt(Loc, static_cast<int64_t>(Val) >> 1, 12, Type);
checkAlignment(Loc, Val, 2, Type);
uint32_t Insn = read32le(Loc) & 0x1FFF07F;
uint32_t Imm12 = extractBits(Val, 12, 12) << 31;
uint32_t Imm10_5 = extractBits(Val, 10, 5) << 25;
uint32_t Imm4_1 = extractBits(Val, 4, 1) << 8;
uint32_t Imm11 = extractBits(Val, 11, 11) << 7;
Insn |= Imm12 | Imm10_5 | Imm4_1 | Imm11;
write32le(Loc, Insn);
return;
}
// auipc + jalr pair
case R_RISCV_CALL: {
checkInt(Loc, Val, 32, Type);
if (isInt<32>(Val)) {
relocateOne(Loc, R_RISCV_PCREL_HI20, Val);
relocateOne(Loc + 4, R_RISCV_PCREL_LO12_I, Val);
}
return;
}
case R_RISCV_PCREL_HI20:
case R_RISCV_HI20: {
checkInt(Loc, Val, 32, Type);
uint32_t Hi = Val + 0x800;
write32le(Loc, (read32le(Loc) & 0xFFF) | (Hi & 0xFFFFF000));
return;
}
case R_RISCV_PCREL_LO12_I:
case R_RISCV_LO12_I: {
checkInt(Loc, Val, 32, Type);
uint32_t Hi = Val + 0x800;
uint32_t Lo = Val - (Hi & 0xFFFFF000);
write32le(Loc, (read32le(Loc) & 0xFFFFF) | ((Lo & 0xFFF) << 20));
return;
}
case R_RISCV_PCREL_LO12_S:
case R_RISCV_LO12_S: {
checkInt(Loc, Val, 32, Type);
uint32_t Hi = Val + 0x800;
uint32_t Lo = Val - (Hi & 0xFFFFF000);
uint32_t Imm11_5 = extractBits(Lo, 11, 5) << 25;
uint32_t Imm4_0 = extractBits(Lo, 4, 0) << 7;
write32le(Loc, (read32le(Loc) & 0x1FFF07F) | Imm11_5 | Imm4_0);
return;
}
case R_RISCV_ADD8:
*Loc += Val;
return;
case R_RISCV_ADD16:
write16le(Loc, read16le(Loc) + Val);
return;
case R_RISCV_ADD32:
write32le(Loc, read32le(Loc) + Val);
return;
case R_RISCV_ADD64:
write64le(Loc, read64le(Loc) + Val);
return;
case R_RISCV_SUB6:
*Loc = (*Loc & 0xc0) | (((*Loc & 0x3f) - Val) & 0x3f);
return;
case R_RISCV_SUB8:
*Loc -= Val;
return;
case R_RISCV_SUB16:
write16le(Loc, read16le(Loc) - Val);
return;
case R_RISCV_SUB32:
write32le(Loc, read32le(Loc) - Val);
return;
case R_RISCV_SUB64:
write64le(Loc, read64le(Loc) - Val);
return;
case R_RISCV_SET6:
*Loc = (*Loc & 0xc0) | (Val & 0x3f);
return;
case R_RISCV_SET8:
*Loc = Val;
return;
case R_RISCV_SET16:
write16le(Loc, Val);
return;
case R_RISCV_SET32:
case R_RISCV_32_PCREL:
write32le(Loc, Val);
return;
case R_RISCV_ALIGN:
case R_RISCV_RELAX:
return; // Ignored (for now)
case R_RISCV_NONE:
return; // Do nothing
// These are handled by the dynamic linker
case R_RISCV_RELATIVE:
case R_RISCV_COPY:
case R_RISCV_JUMP_SLOT:
// GP-relative relocations are only produced after relaxation, which
// we don't support for now
case R_RISCV_GPREL_I:
case R_RISCV_GPREL_S:
default:
error(getErrorLocation(Loc) +
"unimplemented relocation: " + toString(Type));
return;
}
}
TargetInfo *elf::getRISCVTargetInfo() {
static RISCV Target;
return &Target;
}

View File

@ -35,6 +35,7 @@ public:
SPARCV9::SPARCV9() {
CopyRel = R_SPARC_COPY;
GotRel = R_SPARC_GLOB_DAT;
NoneRel = R_SPARC_NONE;
PltRel = R_SPARC_JMP_SLOT;
RelativeRel = R_SPARC_RELATIVE;
GotEntrySize = 8;

View File

@ -48,6 +48,7 @@ public:
X86::X86() {
CopyRel = R_386_COPY;
GotRel = R_386_GLOB_DAT;
NoneRel = R_386_NONE;
PltRel = R_386_JUMP_SLOT;
IRelativeRel = R_386_IRELATIVE;
RelativeRel = R_386_RELATIVE;
@ -59,7 +60,11 @@ X86::X86() {
PltEntrySize = 16;
PltHeaderSize = 16;
TlsGdRelaxSkip = 2;
TrapInstr = 0xcccccccc; // 0xcc = INT3
TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
// Align to the non-PAE large page size (known as a superpage or huge page).
// FreeBSD automatically promotes large, superpage-aligned allocations.
DefaultImageBase = 0x400000;
}
static bool hasBaseReg(uint8_t ModRM) { return (ModRM & 0xc7) != 0x5; }
@ -152,7 +157,7 @@ RelExpr X86::adjustRelaxExpr(RelType Type, const uint8_t *Data,
}
void X86::writeGotPltHeader(uint8_t *Buf) const {
write32le(Buf, InX::Dynamic->getVA());
write32le(Buf, In.Dynamic->getVA());
}
void X86::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
@ -183,8 +188,8 @@ void X86::writePltHeader(uint8_t *Buf) const {
};
memcpy(Buf, V, sizeof(V));
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
write32le(Buf + 2, GotPlt + 4);
write32le(Buf + 8, GotPlt + 8);
return;
@ -196,7 +201,7 @@ void X86::writePltHeader(uint8_t *Buf) const {
0x90, 0x90, 0x90, 0x90, // nop
};
memcpy(Buf, PltData, sizeof(PltData));
uint32_t GotPlt = InX::GotPlt->getVA();
uint32_t GotPlt = In.GotPlt->getVA();
write32le(Buf + 2, GotPlt + 4);
write32le(Buf + 8, GotPlt + 8);
}
@ -213,7 +218,7 @@ void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
if (Config->Pic) {
// jmp *foo@GOT(%ebx)
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
Buf[1] = 0xa3;
write32le(Buf + 2, GotPltEntryAddr - Ebx);
} else {
@ -447,8 +452,8 @@ void RetpolinePic::writePltHeader(uint8_t *Buf) const {
};
memcpy(Buf, Insn, sizeof(Insn));
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
write32le(Buf + 2, GotPlt + 4);
write32le(Buf + 9, GotPlt + 8);
}
@ -467,7 +472,7 @@ void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
};
memcpy(Buf, Insn, sizeof(Insn));
uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
unsigned Off = getPltEntryOffset(Index);
write32le(Buf + 3, GotPltEntryAddr - Ebx);
write32le(Buf + 8, -Off - 12 + 32);
@ -506,7 +511,7 @@ void RetpolineNoPic::writePltHeader(uint8_t *Buf) const {
};
memcpy(Buf, Insn, sizeof(Insn));
uint32_t GotPlt = InX::GotPlt->getVA();
uint32_t GotPlt = In.GotPlt->getVA();
write32le(Buf + 2, GotPlt + 4);
write32le(Buf + 8, GotPlt + 8);
}

View File

@ -43,8 +43,8 @@ public:
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
bool adjustPrologueForCrossSplitStack(uint8_t *Loc,
uint8_t *End) const override;
bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
uint8_t StOther) const override;
private:
void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
@ -55,6 +55,7 @@ private:
template <class ELFT> X86_64<ELFT>::X86_64() {
CopyRel = R_X86_64_COPY;
GotRel = R_X86_64_GLOB_DAT;
NoneRel = R_X86_64_NONE;
PltRel = R_X86_64_JUMP_SLOT;
RelativeRel = R_X86_64_RELATIVE;
IRelativeRel = R_X86_64_IRELATIVE;
@ -66,7 +67,7 @@ template <class ELFT> X86_64<ELFT>::X86_64() {
PltEntrySize = 16;
PltHeaderSize = 16;
TlsGdRelaxSkip = 2;
TrapInstr = 0xcccccccc; // 0xcc = INT3
TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
// Align to the large page size (known as a superpage or huge page).
// FreeBSD automatically promotes large, superpage-aligned allocations.
@ -124,7 +125,7 @@ template <class ELFT> void X86_64<ELFT>::writeGotPltHeader(uint8_t *Buf) const {
// required, but it is documented in the psabi and the glibc dynamic linker
// seems to use it (note that this is relevant for linking ld.so, not any
// other program).
write64le(Buf, InX::Dynamic->getVA());
write64le(Buf, In.Dynamic->getVA());
}
template <class ELFT>
@ -140,8 +141,8 @@ template <class ELFT> void X86_64<ELFT>::writePltHeader(uint8_t *Buf) const {
0x0f, 0x1f, 0x40, 0x00, // nop
};
memcpy(Buf, PltData, sizeof(PltData));
uint64_t GotPlt = InX::GotPlt->getVA();
uint64_t Plt = InX::Plt->getVA();
uint64_t GotPlt = In.GotPlt->getVA();
uint64_t Plt = In.Plt->getVA();
write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8
write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16
}
@ -263,15 +264,6 @@ void X86_64<ELFT>::relaxTlsIeToLe(uint8_t *Loc, RelType Type,
template <class ELFT>
void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
uint64_t Val) const {
// Convert
// leaq bar@tlsld(%rip), %rdi
// callq __tls_get_addr@PLT
// leaq bar@dtpoff(%rax), %rcx
// to
// .word 0x6666
// .byte 0x66
// mov %fs:0,%rax
// leaq bar@tpoff(%rax), %rcx
if (Type == R_X86_64_DTPOFF64) {
write64le(Loc, Val);
return;
@ -286,7 +278,37 @@ void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
0x66, // .byte 0x66
0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
};
memcpy(Loc - 3, Inst, sizeof(Inst));
if (Loc[4] == 0xe8) {
// Convert
// leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc>
// callq __tls_get_addr@PLT # e8 <disp32>
// leaq bar@dtpoff(%rax), %rcx
// to
// .word 0x6666
// .byte 0x66
// mov %fs:0,%rax
// leaq bar@tpoff(%rax), %rcx
memcpy(Loc - 3, Inst, sizeof(Inst));
return;
}
if (Loc[4] == 0xff && Loc[5] == 0x15) {
// Convert
// leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc>
// call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32>
// to
// .long 0x66666666
// movq %fs:0,%rax
// See "Table 11.9: LD -> LE Code Transition (LP64)" in
// https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
Loc[-3] = 0x66;
memcpy(Loc - 2, Inst, sizeof(Inst));
return;
}
error(getErrorLocation(Loc - 3) +
"expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
}
template <class ELFT>
@ -481,23 +503,27 @@ namespace {
// B) Or a load of a stack pointer offset with an lea to r10 or r11.
template <>
bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
uint8_t *End) const {
uint8_t *End,
uint8_t StOther) const {
if (Loc + 8 >= End)
return false;
// Replace "cmp %fs:0x70,%rsp" and subsequent branch
// with "stc, nopl 0x0(%rax,%rax,1)"
if (Loc + 8 < End && memcmp(Loc, "\x64\x48\x3b\x24\x25", 4) == 0) {
if (memcmp(Loc, "\x64\x48\x3b\x24\x25", 5) == 0) {
memcpy(Loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);
return true;
}
// Adjust "lea -0x200(%rsp),%r10" to lea "-0x4200(%rsp),%r10"
if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x94\x24\x00\xfe\xff", 7) == 0) {
memcpy(Loc, "\x4c\x8d\x94\x24\x00\xbe\xff", 7);
return true;
}
// Adjust "lea -0x200(%rsp),%r11" to lea "-0x4200(%rsp),%r11"
if (Loc + 7 < End && memcmp(Loc, "\x4c\x8d\x9c\x24\x00\xfe\xff", 7) == 0) {
memcpy(Loc, "\x4c\x8d\x9c\x24\x00\xbe\xff", 7);
// Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
// be r10 or r11. The lea instruction feeds a subsequent compare which checks
// if there is X available stack space. Making X larger effectively reserves
// that much additional space. The stack grows downward so subtract the value.
if (memcmp(Loc, "\x4c\x8d\x94\x24", 4) == 0 ||
memcmp(Loc, "\x4c\x8d\x9c\x24", 4) == 0) {
// The offset bytes are encoded four bytes after the start of the
// instruction.
write32le(Loc + 4, read32le(Loc + 4) - 0x4000);
return true;
}
return false;
@ -505,7 +531,8 @@ bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
template <>
bool X86_64<ELF32LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
uint8_t *End) const {
uint8_t *End,
uint8_t StOther) const {
llvm_unreachable("Target doesn't support split stacks.");
}
@ -566,8 +593,8 @@ template <class ELFT> void Retpoline<ELFT>::writePltHeader(uint8_t *Buf) const {
};
memcpy(Buf, Insn, sizeof(Insn));
uint64_t GotPlt = InX::GotPlt->getVA();
uint64_t Plt = InX::Plt->getVA();
uint64_t GotPlt = In.GotPlt->getVA();
uint64_t Plt = In.Plt->getVA();
write32le(Buf + 2, GotPlt - Plt - 6 + 8);
write32le(Buf + 9, GotPlt - Plt - 13 + 16);
}
@ -586,7 +613,7 @@ void Retpoline<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
};
memcpy(Buf, Insn, sizeof(Insn));
uint64_t Off = TargetInfo::getPltEntryOffset(Index);
uint64_t Off = getPltEntryOffset(Index);
write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
write32le(Buf + 8, -Off - 12 + 32);
@ -629,7 +656,7 @@ void RetpolineZNow<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
memcpy(Buf, Insn, sizeof(Insn));
write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
write32le(Buf + 8, -TargetInfo::getPltEntryOffset(Index) - 12);
write32le(Buf + 8, -getPltEntryOffset(Index) - 12);
}
template <class ELFT> static TargetInfo *getTargetInfo() {

View File

@ -15,17 +15,19 @@ add_lld_library(lldELF
Arch/Hexagon.cpp
Arch/Mips.cpp
Arch/MipsArchTree.cpp
Arch/MSP430.cpp
Arch/PPC.cpp
Arch/PPC64.cpp
Arch/RISCV.cpp
Arch/SPARCV9.cpp
Arch/X86.cpp
Arch/X86_64.cpp
CallGraphSort.cpp
DWARF.cpp
Driver.cpp
DriverUtils.cpp
EhFrame.cpp
Filesystem.cpp
GdbIndex.cpp
ICF.cpp
InputFiles.cpp
InputSection.cpp

View File

@ -57,10 +57,7 @@ struct Edge {
};
struct Cluster {
Cluster(int Sec, size_t S) {
Sections.push_back(Sec);
Size = S;
}
Cluster(int Sec, size_t S) : Sections{Sec}, Size(S) {}
double getDensity() const {
if (Size == 0)
@ -72,7 +69,7 @@ struct Cluster {
size_t Size = 0;
uint64_t Weight = 0;
uint64_t InitialWeight = 0;
std::vector<Edge> Preds;
Edge BestPred = {-1, 0};
};
class CallGraphSort {
@ -96,12 +93,14 @@ constexpr int MAX_DENSITY_DEGRADATION = 8;
constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
} // end anonymous namespace
typedef std::pair<const InputSectionBase *, const InputSectionBase *>
SectionPair;
// Take the edge list in Config->CallGraphProfile, resolve symbol names to
// Symbols, and generate a graph between InputSections with the provided
// weights.
CallGraphSort::CallGraphSort() {
llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
uint64_t> &Profile = Config->CallGraphProfile;
MapVector<SectionPair, uint64_t> &Profile = Config->CallGraphProfile;
DenseMap<const InputSectionBase *, int> SecToCluster;
auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
@ -114,7 +113,7 @@ CallGraphSort::CallGraphSort() {
};
// Create the graph.
for (const auto &C : Profile) {
for (std::pair<SectionPair, uint64_t> &C : Profile) {
const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
uint64_t Weight = C.second;
@ -136,8 +135,12 @@ CallGraphSort::CallGraphSort() {
if (From == To)
continue;
// Add an edge
Clusters[To].Preds.push_back({From, Weight});
// Remember the best edge.
Cluster &ToC = Clusters[To];
if (ToC.BestPred.From == -1 || ToC.BestPred.Weight < Weight) {
ToC.BestPred.From = From;
ToC.BestPred.Weight = Weight;
}
}
for (Cluster &C : Clusters)
C.InitialWeight = C.Weight;
@ -146,9 +149,7 @@ CallGraphSort::CallGraphSort() {
// It's bad to merge clusters which would degrade the density too much.
static bool isNewDensityBad(Cluster &A, Cluster &B) {
double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
if (NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION)
return true;
return false;
return NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION;
}
static void mergeClusters(Cluster &Into, Cluster &From) {
@ -167,9 +168,9 @@ void CallGraphSort::groupClusters() {
std::vector<int> SortedSecs(Clusters.size());
std::vector<Cluster *> SecToCluster(Clusters.size());
for (int SI = 0, SE = Clusters.size(); SI != SE; ++SI) {
SortedSecs[SI] = SI;
SecToCluster[SI] = &Clusters[SI];
for (size_t I = 0; I < Clusters.size(); ++I) {
SortedSecs[I] = I;
SecToCluster[I] = &Clusters[I];
}
std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
@ -181,21 +182,11 @@ void CallGraphSort::groupClusters() {
// been merged into another cluster yet.
Cluster &C = Clusters[SI];
int BestPred = -1;
uint64_t BestWeight = 0;
for (Edge &E : C.Preds) {
if (BestPred == -1 || E.Weight > BestWeight) {
BestPred = E.From;
BestWeight = E.Weight;
}
}
// don't consider merging if the edge is unlikely.
if (BestWeight * 10 <= C.InitialWeight)
// Don't consider merging if the edge is unlikely.
if (C.BestPred.From == -1 || C.BestPred.Weight * 10 <= C.InitialWeight)
continue;
Cluster *PredC = SecToCluster[BestPred];
Cluster *PredC = SecToCluster[C.BestPred.From];
if (PredC == &C)
continue;
@ -229,7 +220,7 @@ DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
groupClusters();
// Generate order.
llvm::DenseMap<const InputSectionBase *, int> OrderMap;
DenseMap<const InputSectionBase *, int> OrderMap;
ssize_t CurOrder = 1;
for (const Cluster &C : Clusters)

12
deps/lld/ELF/Config.h vendored
View File

@ -47,7 +47,7 @@ enum class ICFLevel { None, Safe, All };
enum class StripPolicy { None, All, Debug };
// For --unresolved-symbols.
enum class UnresolvedPolicy { ReportError, Warn, Ignore, IgnoreAll };
enum class UnresolvedPolicy { ReportError, Warn, Ignore };
// For --orphan-handling.
enum class OrphanHandlingPolicy { Place, Warn, Error };
@ -127,6 +127,7 @@ struct Configuration {
bool AsNeeded = false;
bool Bsymbolic;
bool BsymbolicFunctions;
bool CallGraphProfileSort;
bool CheckSections;
bool CompressDebugSections;
bool Cref;
@ -134,11 +135,13 @@ struct Configuration {
bool Demangle = true;
bool DisableVerify;
bool EhFrameHdr;
bool EmitLLVM;
bool EmitRelocs;
bool EnableNewDtags;
bool ExecuteOnly;
bool ExportDynamic;
bool FixCortexA53Errata843419;
bool FormatBinary = false;
bool GcSections;
bool GdbIndex;
bool GnuHash = false;
@ -156,6 +159,7 @@ struct Configuration {
bool OFormatBinary;
bool Omagic;
bool OptRemarksWithHotness;
bool PicThunk;
bool Pie;
bool PrintGcSections;
bool PrintIcfSections;
@ -170,19 +174,24 @@ struct Configuration {
bool Trace;
bool ThinLTOEmitImportsFiles;
bool ThinLTOIndexOnly;
bool TocOptimize;
bool UndefinedVersion;
bool UseAndroidRelrTags = false;
bool WarnBackrefs;
bool WarnCommon;
bool WarnIfuncTextrel;
bool WarnMissingEntry;
bool WarnSymbolOrdering;
bool WriteAddends;
bool ZCombreloc;
bool ZCopyreloc;
bool ZExecstack;
bool ZGlobal;
bool ZHazardplt;
bool ZInitfirst;
bool ZInterpose;
bool ZKeepTextSectionPrefix;
bool ZNodefaultlib;
bool ZNodelete;
bool ZNodlopen;
bool ZNow;
@ -212,6 +221,7 @@ struct Configuration {
unsigned LTOO;
unsigned Optimize;
unsigned ThinLTOJobs;
int32_t SplitStackAdjustSize;
// The following config options do not directly correspond to any
// particualr command line options.

View File

@ -1,4 +1,4 @@
//===- GdbIndex.cpp -------------------------------------------------------===//
//===- DWARF.cpp ----------------------------------------------------------===//
//
// The LLVM Linker
//
@ -14,8 +14,9 @@
//
//===----------------------------------------------------------------------===//
#include "GdbIndex.h"
#include "DWARF.h"
#include "Symbols.h"
#include "Target.h"
#include "lld/Common/Memory.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
#include "llvm/Object/ELFObjectFile.h"
@ -29,24 +30,28 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *Obj) {
for (InputSectionBase *Sec : Obj->getSections()) {
if (!Sec)
continue;
if (LLDDWARFSection *M = StringSwitch<LLDDWARFSection *>(Sec->Name)
.Case(".debug_info", &InfoSection)
.Case(".debug_ranges", &RangeSection)
.Case(".debug_line", &LineSection)
.Default(nullptr)) {
Sec->maybeDecompress();
M->Data = toStringRef(Sec->Data);
if (LLDDWARFSection *M =
StringSwitch<LLDDWARFSection *>(Sec->Name)
.Case(".debug_addr", &AddrSection)
.Case(".debug_gnu_pubnames", &GnuPubNamesSection)
.Case(".debug_gnu_pubtypes", &GnuPubTypesSection)
.Case(".debug_info", &InfoSection)
.Case(".debug_ranges", &RangeSection)
.Case(".debug_rnglists", &RngListsSection)
.Case(".debug_line", &LineSection)
.Default(nullptr)) {
M->Data = toStringRef(Sec->data());
M->Sec = Sec;
continue;
}
if (Sec->Name == ".debug_abbrev")
AbbrevSection = toStringRef(Sec->Data);
else if (Sec->Name == ".debug_gnu_pubnames")
GnuPubNamesSection = toStringRef(Sec->Data);
else if (Sec->Name == ".debug_gnu_pubtypes")
GnuPubTypesSection = toStringRef(Sec->Data);
AbbrevSection = toStringRef(Sec->data());
else if (Sec->Name == ".debug_str")
StrSection = toStringRef(Sec->Data);
StrSection = toStringRef(Sec->data());
else if (Sec->Name == ".debug_line_str")
LineStringSection = toStringRef(Sec->data());
}
}
@ -73,7 +78,10 @@ LLDDwarfObj<ELFT>::findAux(const InputSectionBase &Sec, uint64_t Pos,
// Broken debug info can point to a non-Defined symbol.
auto *DR = dyn_cast<Defined>(&File->getRelocTargetSym(Rel));
if (!DR) {
error("unsupported relocation target while parsing debug info");
RelType Type = Rel.getType(Config->IsMips64EL);
if (Type != Target->NoneRel)
error(toString(File) + ": relocation " + lld::toString(Type) + " at 0x" +
llvm::utohexstr(Rel.r_offset) + " has unsupported target");
return None;
}
uint64_t Val = DR->Value + getAddend<ELFT>(Rel);

View File

@ -1,4 +1,4 @@
//===- GdbIndex.h --------------------------------------------*- C++ -*-===//
//===- DWARF.h -----------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
@ -7,10 +7,11 @@
//
//===-------------------------------------------------------------------===//
#ifndef LLD_ELF_GDB_INDEX_H
#define LLD_ELF_GDB_INDEX_H
#ifndef LLD_ELF_DWARF_H
#define LLD_ELF_DWARF_H
#include "InputFiles.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Object/ELF.h"
@ -24,44 +25,66 @@ struct LLDDWARFSection final : public llvm::DWARFSection {
};
template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
LLDDWARFSection InfoSection;
LLDDWARFSection RangeSection;
LLDDWARFSection LineSection;
StringRef AbbrevSection;
StringRef GnuPubNamesSection;
StringRef GnuPubTypesSection;
StringRef StrSection;
public:
explicit LLDDwarfObj(ObjFile<ELFT> *Obj);
void forEachInfoSections(
llvm::function_ref<void(const llvm::DWARFSection &)> F) const override {
F(InfoSection);
}
const llvm::DWARFSection &getRangeSection() const override {
return RangeSection;
}
const llvm::DWARFSection &getRnglistsSection() const override {
return RngListsSection;
}
const llvm::DWARFSection &getLineSection() const override {
return LineSection;
}
const llvm::DWARFSection &getAddrSection() const override {
return AddrSection;
}
const llvm::DWARFSection &getGnuPubNamesSection() const override {
return GnuPubNamesSection;
}
const llvm::DWARFSection &getGnuPubTypesSection() const override {
return GnuPubTypesSection;
}
StringRef getFileName() const override { return ""; }
StringRef getAbbrevSection() const override { return AbbrevSection; }
StringRef getStringSection() const override { return StrSection; }
StringRef getLineStringSection() const override { return LineStringSection; }
bool isLittleEndian() const override {
return ELFT::TargetEndianness == llvm::support::little;
}
llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &Sec,
uint64_t Pos) const override;
private:
template <class RelTy>
llvm::Optional<llvm::RelocAddrEntry> findAux(const InputSectionBase &Sec,
uint64_t Pos,
ArrayRef<RelTy> Rels) const;
public:
explicit LLDDwarfObj(ObjFile<ELFT> *Obj);
const llvm::DWARFSection &getInfoSection() const override {
return InfoSection;
}
const llvm::DWARFSection &getRangeSection() const override {
return RangeSection;
}
const llvm::DWARFSection &getLineSection() const override {
return LineSection;
}
StringRef getFileName() const override { return ""; }
StringRef getAbbrevSection() const override { return AbbrevSection; }
StringRef getStringSection() const override { return StrSection; }
StringRef getGnuPubNamesSection() const override {
return GnuPubNamesSection;
}
StringRef getGnuPubTypesSection() const override {
return GnuPubTypesSection;
}
bool isLittleEndian() const override {
return ELFT::TargetEndianness == llvm::support::little;
}
llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &Sec,
uint64_t Pos) const override;
LLDDWARFSection GnuPubNamesSection;
LLDDWARFSection GnuPubTypesSection;
LLDDWARFSection InfoSection;
LLDDWARFSection RangeSection;
LLDDWARFSection RngListsSection;
LLDDWARFSection LineSection;
LLDDWARFSection AddrSection;
StringRef AbbrevSection;
StringRef StrSection;
StringRef LineStringSection;
};
} // namespace elf

View File

@ -63,6 +63,7 @@ using namespace llvm;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::sys;
using namespace llvm::support;
using namespace lld;
using namespace lld::elf;
@ -74,7 +75,7 @@ static void setConfigs(opt::InputArgList &Args);
bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
raw_ostream &Error) {
errorHandler().LogName = sys::path::filename(Args[0]);
errorHandler().LogName = args::getFilenameWithoutExe(Args[0]);
errorHandler().ErrorLimitExceededMsg =
"too many errors emitted, stopping now (use "
"-error-limit=0 to see all errors)";
@ -84,7 +85,6 @@ bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
InputSections.clear();
OutputSections.clear();
Tar = nullptr;
BinaryFiles.clear();
BitcodeFiles.clear();
ObjectFiles.clear();
@ -94,6 +94,10 @@ bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
Driver = make<LinkerDriver>();
Script = make<LinkerScript>();
Symtab = make<SymbolTable>();
Tar = nullptr;
memset(&In, 0, sizeof(In));
Config->ProgName = Args[0];
Driver->main(Args);
@ -125,9 +129,11 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) {
.Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
.Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS})
.Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS})
.Case("elf32ppc", {ELF32BEKind, EM_PPC})
.Case("elf32lriscv", {ELF32LEKind, EM_RISCV})
.Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC})
.Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
.Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
.Case("elf64lriscv", {ELF64LEKind, EM_RISCV})
.Case("elf64ppc", {ELF64BEKind, EM_PPC64})
.Case("elf64lppc", {ELF64LEKind, EM_PPC64})
.Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64})
@ -183,7 +189,7 @@ void LinkerDriver::addFile(StringRef Path, bool WithLOption) {
return;
MemoryBufferRef MBRef = *Buffer;
if (InBinary) {
if (Config->FormatBinary) {
Files.push_back(make<BinaryFile>(MBRef));
return;
}
@ -218,7 +224,7 @@ void LinkerDriver::addFile(StringRef Path, bool WithLOption) {
return;
}
case file_magic::elf_shared_object:
if (Config->Relocatable) {
if (Config->Static || Config->Relocatable) {
error("attempted static link of dynamic object " + Path);
return;
}
@ -269,14 +275,17 @@ static void initLLVM() {
// Some command line options or some combinations of them are not allowed.
// This function checks for such errors.
static void checkOptions(opt::InputArgList &Args) {
static void checkOptions() {
// The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
// table which is a relatively new feature.
if (Config->EMachine == EM_MIPS && Config->GnuHash)
error("the .gnu.hash section is not compatible with the MIPS target.");
error("the .gnu.hash section is not compatible with the MIPS target");
if (Config->FixCortexA53Errata843419 && Config->EMachine != EM_AARCH64)
error("--fix-cortex-a53-843419 is only supported on AArch64 targets.");
error("--fix-cortex-a53-843419 is only supported on AArch64 targets");
if (Config->TocOptimize && Config->EMachine != EM_PPC64)
error("--toc-optimize is only supported on the PowerPC64 target");
if (Config->Pie && Config->Shared)
error("-shared and -pie may not be used together");
@ -336,12 +345,13 @@ static bool getZFlag(opt::InputArgList &Args, StringRef K1, StringRef K2,
return Default;
}
static bool isKnown(StringRef S) {
static bool isKnownZFlag(StringRef S) {
return S == "combreloc" || S == "copyreloc" || S == "defs" ||
S == "execstack" || S == "hazardplt" || S == "initfirst" ||
S == "execstack" || S == "global" || S == "hazardplt" ||
S == "initfirst" || S == "interpose" ||
S == "keep-text-section-prefix" || S == "lazy" || S == "muldefs" ||
S == "nocombreloc" || S == "nocopyreloc" || S == "nodelete" ||
S == "nodlopen" || S == "noexecstack" ||
S == "nocombreloc" || S == "nocopyreloc" || S == "nodefaultlib" ||
S == "nodelete" || S == "nodlopen" || S == "noexecstack" ||
S == "nokeep-text-section-prefix" || S == "norelro" || S == "notext" ||
S == "now" || S == "origin" || S == "relro" || S == "retpolineplt" ||
S == "rodynamic" || S == "text" || S == "wxneeded" ||
@ -351,7 +361,7 @@ static bool isKnown(StringRef S) {
// Report an error for an unknown -z option.
static void checkZOptions(opt::InputArgList &Args) {
for (auto *Arg : Args.filtered(OPT_z))
if (!isKnown(Arg->getValue()))
if (!isKnownZFlag(Arg->getValue()))
error("unknown -z value: " + StringRef(Arg->getValue()));
}
@ -386,6 +396,23 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
if (Args.hasArg(OPT_v) || Args.hasArg(OPT_version))
message(getLLDVersion() + " (compatible with GNU linkers)");
if (const char *Path = getReproduceOption(Args)) {
// Note that --reproduce is a debug option so you can ignore it
// if you are trying to understand the whole picture of the code.
Expected<std::unique_ptr<TarWriter>> ErrOrWriter =
TarWriter::create(Path, path::stem(Path));
if (ErrOrWriter) {
Tar = std::move(*ErrOrWriter);
Tar->append("response.txt", createResponseFile(Args));
Tar->append("version.txt", getLLDVersion() + "\n");
} else {
error("--reproduce: " + toString(ErrOrWriter.takeError()));
}
}
readConfigs(Args);
checkZOptions(Args);
// The behavior of -v or --version is a bit strange, but this is
// needed for compatibility with GNU linkers.
if (Args.hasArg(OPT_v) && !Args.hasArg(OPT_INPUT))
@ -393,24 +420,6 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
if (Args.hasArg(OPT_version))
return;
if (const char *Path = getReproduceOption(Args)) {
// Note that --reproduce is a debug option so you can ignore it
// if you are trying to understand the whole picture of the code.
Expected<std::unique_ptr<TarWriter>> ErrOrWriter =
TarWriter::create(Path, path::stem(Path));
if (ErrOrWriter) {
Tar = ErrOrWriter->get();
Tar->append("response.txt", createResponseFile(Args));
Tar->append("version.txt", getLLDVersion() + "\n");
make<std::unique_ptr<TarWriter>>(std::move(*ErrOrWriter));
} else {
error(Twine("--reproduce: failed to open ") + Path + ": " +
toString(ErrOrWriter.takeError()));
}
}
readConfigs(Args);
checkZOptions(Args);
initLLVM();
createFiles(Args);
if (errorCount())
@ -418,7 +427,7 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr) {
inferMachineType();
setConfigs(Args);
checkOptions(Args);
checkOptions();
if (errorCount())
return;
@ -448,9 +457,6 @@ static std::string getRpath(opt::InputArgList &Args) {
// Determines what we should do if there are remaining unresolved
// symbols after the name resolution.
static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &Args) {
if (Args.hasArg(OPT_relocatable))
return UnresolvedPolicy::IgnoreAll;
UnresolvedPolicy ErrorOrWarn = Args.hasFlag(OPT_error_unresolved_symbols,
OPT_warn_unresolved_symbols, true)
? UnresolvedPolicy::ReportError
@ -497,14 +503,11 @@ static Target2Policy getTarget2(opt::InputArgList &Args) {
}
static bool isOutputFormatBinary(opt::InputArgList &Args) {
if (auto *Arg = Args.getLastArg(OPT_oformat)) {
StringRef S = Arg->getValue();
if (S == "binary")
return true;
if (S.startswith("elf"))
return false;
StringRef S = Args.getLastArgValue(OPT_oformat, "elf");
if (S == "binary")
return true;
if (!S.startswith("elf"))
error("unknown --oformat value: " + S);
}
return false;
}
@ -645,38 +648,56 @@ static std::pair<bool, bool> getPackDynRelocs(opt::InputArgList &Args) {
static void readCallGraph(MemoryBufferRef MB) {
// Build a map from symbol name to section
DenseMap<StringRef, const Symbol *> SymbolNameToSymbol;
DenseMap<StringRef, Symbol *> Map;
for (InputFile *File : ObjectFiles)
for (Symbol *Sym : File->getSymbols())
SymbolNameToSymbol[Sym->getName()] = Sym;
Map[Sym->getName()] = Sym;
for (StringRef L : args::getLines(MB)) {
SmallVector<StringRef, 3> Fields;
L.split(Fields, ' ');
uint64_t Count;
if (Fields.size() != 3 || !to_integer(Fields[2], Count))
fatal(MB.getBufferIdentifier() + ": parse error");
const Symbol *FromSym = SymbolNameToSymbol.lookup(Fields[0]);
const Symbol *ToSym = SymbolNameToSymbol.lookup(Fields[1]);
if (Config->WarnSymbolOrdering) {
if (!FromSym)
warn(MB.getBufferIdentifier() + ": no such symbol: " + Fields[0]);
if (!ToSym)
warn(MB.getBufferIdentifier() + ": no such symbol: " + Fields[1]);
auto FindSection = [&](StringRef Name) -> InputSectionBase * {
Symbol *Sym = Map.lookup(Name);
if (!Sym) {
if (Config->WarnSymbolOrdering)
warn(MB.getBufferIdentifier() + ": no such symbol: " + Name);
return nullptr;
}
maybeWarnUnorderableSymbol(Sym);
if (Defined *DR = dyn_cast_or_null<Defined>(Sym))
return dyn_cast_or_null<InputSectionBase>(DR->Section);
return nullptr;
};
for (StringRef Line : args::getLines(MB)) {
SmallVector<StringRef, 3> Fields;
Line.split(Fields, ' ');
uint64_t Count;
if (Fields.size() != 3 || !to_integer(Fields[2], Count)) {
error(MB.getBufferIdentifier() + ": parse error");
return;
}
if (InputSectionBase *From = FindSection(Fields[0]))
if (InputSectionBase *To = FindSection(Fields[1]))
Config->CallGraphProfile[std::make_pair(From, To)] += Count;
}
}
template <class ELFT> static void readCallGraphsFromObjectFiles() {
for (auto File : ObjectFiles) {
auto *Obj = cast<ObjFile<ELFT>>(File);
for (const Elf_CGProfile_Impl<ELFT> &CGPE : Obj->CGProfile) {
auto *FromSym = dyn_cast<Defined>(&Obj->getSymbol(CGPE.cgp_from));
auto *ToSym = dyn_cast<Defined>(&Obj->getSymbol(CGPE.cgp_to));
if (!FromSym || !ToSym)
continue;
auto *From = dyn_cast_or_null<InputSectionBase>(FromSym->Section);
auto *To = dyn_cast_or_null<InputSectionBase>(ToSym->Section);
if (From && To)
Config->CallGraphProfile[{From, To}] += CGPE.cgp_weight;
}
if (!FromSym || !ToSym || Count == 0)
continue;
warnUnorderableSymbol(FromSym);
warnUnorderableSymbol(ToSym);
const Defined *FromSymD = dyn_cast<Defined>(FromSym);
const Defined *ToSymD = dyn_cast<Defined>(ToSym);
if (!FromSymD || !ToSymD)
continue;
const auto *FromSB = dyn_cast_or_null<InputSectionBase>(FromSymD->Section);
const auto *ToSB = dyn_cast_or_null<InputSectionBase>(ToSymD->Section);
if (!FromSB || !ToSB)
continue;
Config->CallGraphProfile[std::make_pair(FromSB, ToSB)] += Count;
}
}
@ -753,7 +774,10 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Config->DynamicLinker = getDynamicLinker(Args);
Config->EhFrameHdr =
Args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
Config->EmitLLVM = Args.hasArg(OPT_plugin_opt_emit_llvm, false);
Config->EmitRelocs = Args.hasArg(OPT_emit_relocs);
Config->CallGraphProfileSort = Args.hasFlag(
OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
Config->EnableNewDtags =
Args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true);
Config->Entry = Args.getLastArgValue(OPT_entry);
@ -808,6 +832,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Config->SingleRoRx = Args.hasArg(OPT_no_rosegment);
Config->SoName = Args.getLastArgValue(OPT_soname);
Config->SortSection = getSortSection(Args);
Config->SplitStackAdjustSize = args::getInteger(Args, OPT_split_stack_adjust_size, 16384);
Config->Strip = getStrip(Args);
Config->Sysroot = Args.getLastArgValue(OPT_sysroot);
Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
@ -837,15 +862,20 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Config->WarnBackrefs =
Args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false);
Config->WarnCommon = Args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
Config->WarnIfuncTextrel =
Args.hasFlag(OPT_warn_ifunc_textrel, OPT_no_warn_ifunc_textrel, false);
Config->WarnSymbolOrdering =
Args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
Config->ZCombreloc = getZFlag(Args, "combreloc", "nocombreloc", true);
Config->ZCopyreloc = getZFlag(Args, "copyreloc", "nocopyreloc", true);
Config->ZExecstack = getZFlag(Args, "execstack", "noexecstack", false);
Config->ZGlobal = hasZOption(Args, "global");
Config->ZHazardplt = hasZOption(Args, "hazardplt");
Config->ZInitfirst = hasZOption(Args, "initfirst");
Config->ZInterpose = hasZOption(Args, "interpose");
Config->ZKeepTextSectionPrefix = getZFlag(
Args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
Config->ZNodefaultlib = hasZOption(Args, "nodefaultlib");
Config->ZNodelete = hasZOption(Args, "nodelete");
Config->ZNodlopen = hasZOption(Args, "nodlopen");
Config->ZNow = getZFlag(Args, "now", "lazy", false);
@ -876,6 +906,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
if (Config->ThinLTOJobs == 0)
error("--thinlto-jobs: number of threads must be > 0");
if (Config->SplitStackAdjustSize < 0)
error("--split-stack-adjust-size: size must be >= 0");
// Parse ELF{32,64}{LE,BE} and CPU type.
if (auto *Arg = Args.getLastArg(OPT_m)) {
StringRef S = Arg->getValue();
@ -964,22 +997,18 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
// This function initialize such members. See Config.h for the details
// of these values.
static void setConfigs(opt::InputArgList &Args) {
ELFKind Kind = Config->EKind;
uint16_t Machine = Config->EMachine;
ELFKind K = Config->EKind;
uint16_t M = Config->EMachine;
Config->CopyRelocs = (Config->Relocatable || Config->EmitRelocs);
Config->Is64 = (Kind == ELF64LEKind || Kind == ELF64BEKind);
Config->IsLE = (Kind == ELF32LEKind || Kind == ELF64LEKind);
Config->Endianness =
Config->IsLE ? support::endianness::little : support::endianness::big;
Config->IsMips64EL = (Kind == ELF64LEKind && Machine == EM_MIPS);
Config->Is64 = (K == ELF64LEKind || K == ELF64BEKind);
Config->IsLE = (K == ELF32LEKind || K == ELF64LEKind);
Config->Endianness = Config->IsLE ? endianness::little : endianness::big;
Config->IsMips64EL = (K == ELF64LEKind && M == EM_MIPS);
Config->Pic = Config->Pie || Config->Shared;
Config->PicThunk = Args.hasArg(OPT_pic_veneer, Config->Pic);
Config->Wordsize = Config->Is64 ? 8 : 4;
// There is an ILP32 ABI for x86-64, although it's not very popular.
// It is called the x32 ABI.
bool IsX32 = (Kind == ELF32LEKind && Machine == EM_X86_64);
// ELF defines two different ways to store relocation addends as shown below:
//
// Rel: Addends are stored to the location where relocations are applied.
@ -993,8 +1022,9 @@ static void setConfigs(opt::InputArgList &Args) {
// You cannot choose which one, Rel or Rela, you want to use. Instead each
// ABI defines which one you need to use. The following expression expresses
// that.
Config->IsRela =
(Config->Is64 || IsX32 || Machine == EM_PPC) && Machine != EM_MIPS;
Config->IsRela = M == EM_AARCH64 || M == EM_AMDGPU || M == EM_HEXAGON ||
M == EM_PPC || M == EM_PPC64 || M == EM_RISCV ||
M == EM_X86_64;
// If the output uses REL relocations we must store the dynamic relocation
// addends to the output sections. We also store addends for RELA relocations
@ -1004,10 +1034,13 @@ static void setConfigs(opt::InputArgList &Args) {
Config->WriteAddends = Args.hasFlag(OPT_apply_dynamic_relocs,
OPT_no_apply_dynamic_relocs, false) ||
!Config->IsRela;
Config->TocOptimize =
Args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, M == EM_PPC64);
}
// Returns a value of "-format" option.
static bool getBinaryOption(StringRef S) {
static bool isFormatBinary(StringRef S) {
if (S == "binary")
return true;
if (S == "elf" || S == "default")
@ -1034,7 +1067,10 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) {
StringRef From;
StringRef To;
std::tie(From, To) = StringRef(Arg->getValue()).split('=');
readDefsym(From, MemoryBufferRef(To, "-defsym"));
if (From.empty() || To.empty())
error("-defsym: syntax error: " + StringRef(Arg->getValue()));
else
readDefsym(From, MemoryBufferRef(To, "-defsym"));
break;
}
case OPT_script:
@ -1049,7 +1085,7 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) {
Config->AsNeeded = true;
break;
case OPT_format:
InBinary = getBinaryOption(Arg->getValue());
Config->FormatBinary = isFormatBinary(Arg->getValue());
break;
case OPT_no_as_needed:
Config->AsNeeded = false;
@ -1220,33 +1256,34 @@ template <class ELFT> static void handleUndefined(StringRef Name) {
Symtab->fetchLazy<ELFT>(Sym);
}
template <class ELFT> static bool shouldDemote(Symbol &Sym) {
// If all references to a DSO happen to be weak, the DSO is not added to
// DT_NEEDED. If that happens, we need to eliminate shared symbols created
// from the DSO. Otherwise, they become dangling references that point to a
// non-existent DSO.
if (auto *S = dyn_cast<SharedSymbol>(&Sym))
return !S->getFile<ELFT>().IsNeeded;
template <class ELFT> static void handleLibcall(StringRef Name) {
Symbol *Sym = Symtab->find(Name);
if (!Sym || !Sym->isLazy())
return;
// We are done processing archives, so lazy symbols that were used but not
// found can be converted to undefined. We could also just delete the other
// lazy symbols, but that seems to be more work than it is worth.
return Sym.isLazy() && Sym.IsUsedInRegularObj;
MemoryBufferRef MB;
if (auto *LO = dyn_cast<LazyObject>(Sym))
MB = LO->File->MB;
else
MB = cast<LazyArchive>(Sym)->getMemberBuffer();
if (isBitcode(MB))
Symtab->fetchLazy<ELFT>(Sym);
}
// Some files, such as .so or files between -{start,end}-lib may be removed
// after their symbols are added to the symbol table. If that happens, we
// need to remove symbols that refer files that no longer exist, so that
// they won't appear in the symbol table of the output file.
//
// We remove symbols by demoting them to undefined symbol.
template <class ELFT> static void demoteSymbols() {
// If all references to a DSO happen to be weak, the DSO is not added
// to DT_NEEDED. If that happens, we need to eliminate shared symbols
// created from the DSO. Otherwise, they become dangling references
// that point to a non-existent DSO.
template <class ELFT> static void demoteSharedSymbols() {
for (Symbol *Sym : Symtab->getSymbols()) {
if (shouldDemote<ELFT>(*Sym)) {
bool Used = Sym->Used;
replaceSymbol<Undefined>(Sym, nullptr, Sym->getName(), Sym->Binding,
Sym->StOther, Sym->Type);
Sym->Used = Used;
if (auto *S = dyn_cast<SharedSymbol>(Sym)) {
if (!S->getFile<ELFT>().IsNeeded) {
bool Used = S->Used;
replaceSymbol<Undefined>(S, nullptr, S->getName(), STB_WEAK, S->StOther,
S->Type);
S->Used = Used;
}
}
}
}
@ -1315,6 +1352,85 @@ static void findKeepUniqueSections(opt::InputArgList &Args) {
}
}
template <class ELFT> static Symbol *addUndefined(StringRef Name) {
return Symtab->addUndefined<ELFT>(Name, STB_GLOBAL, STV_DEFAULT, 0, false,
nullptr);
}
// The --wrap option is a feature to rename symbols so that you can write
// wrappers for existing functions. If you pass `-wrap=foo`, all
// occurrences of symbol `foo` are resolved to `wrap_foo` (so, you are
// expected to write `wrap_foo` function as a wrapper). The original
// symbol becomes accessible as `real_foo`, so you can call that from your
// wrapper.
//
// This data structure is instantiated for each -wrap option.
struct WrappedSymbol {
Symbol *Sym;
Symbol *Real;
Symbol *Wrap;
};
// Handles -wrap option.
//
// This function instantiates wrapper symbols. At this point, they seem
// like they are not being used at all, so we explicitly set some flags so
// that LTO won't eliminate them.
template <class ELFT>
static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &Args) {
std::vector<WrappedSymbol> V;
DenseSet<StringRef> Seen;
for (auto *Arg : Args.filtered(OPT_wrap)) {
StringRef Name = Arg->getValue();
if (!Seen.insert(Name).second)
continue;
Symbol *Sym = Symtab->find(Name);
if (!Sym)
continue;
Symbol *Real = addUndefined<ELFT>(Saver.save("__real_" + Name));
Symbol *Wrap = addUndefined<ELFT>(Saver.save("__wrap_" + Name));
V.push_back({Sym, Real, Wrap});
// We want to tell LTO not to inline symbols to be overwritten
// because LTO doesn't know the final symbol contents after renaming.
Real->CanInline = false;
Sym->CanInline = false;
// Tell LTO not to eliminate these symbols.
Sym->IsUsedInRegularObj = true;
Wrap->IsUsedInRegularObj = true;
}
return V;
}
// Do renaming for -wrap by updating pointers to symbols.
//
// When this function is executed, only InputFiles and symbol table
// contain pointers to symbol objects. We visit them to replace pointers,
// so that wrapped symbols are swapped as instructed by the command line.
template <class ELFT> static void wrapSymbols(ArrayRef<WrappedSymbol> Wrapped) {
DenseMap<Symbol *, Symbol *> Map;
for (const WrappedSymbol &W : Wrapped) {
Map[W.Sym] = W.Wrap;
Map[W.Real] = W.Sym;
}
// Update pointers in input files.
parallelForEach(ObjectFiles, [&](InputFile *File) {
std::vector<Symbol *> &Syms = File->getMutableSymbols();
for (size_t I = 0, E = Syms.size(); I != E; ++I)
if (Symbol *S = Map.lookup(Syms[I]))
Syms[I] = S;
});
// Update pointers in the symbol table.
for (const WrappedSymbol &W : Wrapped)
Symtab->wrap(W.Sym, W.Real, W.Wrap);
}
static const char *LibcallRoutineNames[] = {
#define HANDLE_LIBCALL(code, name) name,
#include "llvm/IR/RuntimeLibcalls.def"
@ -1325,6 +1441,8 @@ static const char *LibcallRoutineNames[] = {
// all linker scripts have already been parsed.
template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
Target = getTarget();
InX<ELFT>::VerSym = nullptr;
InX<ELFT>::VerNeed = nullptr;
Config->MaxPageSize = getMaxPageSize(Args);
Config->ImageBase = getImageBase(Args);
@ -1380,8 +1498,8 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
// Some symbols (such as __ehdr_start) are defined lazily only when there
// are undefined symbols for them, so we add these to trigger that logic.
for (StringRef Sym : Script->ReferencedSymbols)
Symtab->addUndefined<ELFT>(Sym);
for (StringRef Name : Script->ReferencedSymbols)
addUndefined<ELFT>(Name);
// Handle the `--undefined <sym>` options.
for (StringRef S : Config->Undefined)
@ -1396,11 +1514,20 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
// in a bitcode file in an archive member, we need to arrange to use LTO to
// compile those archive members by adding them to the link beforehand.
//
// With this the symbol table should be complete. After this, no new names
// except a few linker-synthesized ones will be added to the symbol table.
// However, adding all libcall symbols to the link can have undesired
// consequences. For example, the libgcc implementation of
// __sync_val_compare_and_swap_8 on 32-bit ARM pulls in an .init_array entry
// that aborts the program if the Linux kernel does not support 64-bit
// atomics, which would prevent the program from running even if it does not
// use 64-bit atomics.
//
// Therefore, we only add libcall symbols to the link before LTO if we have
// to, i.e. if the symbol's definition is in bitcode. Any other required
// libcall symbols will be added to the link after LTO when we add the LTO
// object file to the link.
if (!BitcodeFiles.empty())
for (const char *S : LibcallRoutineNames)
handleUndefined<ELFT>(S);
handleLibcall<ELFT>(S);
// Return if there were name resolution errors.
if (errorCount())
@ -1424,6 +1551,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
Out::ElfHeader = make<OutputSection>("", 0, SHF_ALLOC);
Out::ElfHeader->Size = sizeof(typename ELFT::Ehdr);
// Create wrapped symbols for -wrap option.
std::vector<WrappedSymbol> Wrapped = addWrappedSymbols<ELFT>(Args);
// We need to create some reserved symbols such as _end. Create them.
if (!Config->Relocatable)
addReservedSymbols();
@ -1436,12 +1566,11 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
if (!Config->Relocatable)
Symtab->scanVersionScript();
// Create wrapped symbols for -wrap option.
for (auto *Arg : Args.filtered(OPT_wrap))
Symtab->addSymbolWrap<ELFT>(Arg->getValue());
// Do link-time optimization if given files are LLVM bitcode files.
// This compiles bitcode files into real object files.
//
// With this the symbol table should be complete. After this, no new names
// except a few linker-synthesized ones will be added to the symbol table.
Symtab->addCombinedLTOObject<ELFT>();
if (errorCount())
return;
@ -1452,8 +1581,15 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
if (Config->ThinLTOIndexOnly)
return;
// Likewise, --plugin-opt=emit-llvm is an option to make LTO create
// an output file in bitcode and exit, so that you can just get a
// combined bitcode file.
if (Config->EmitLLVM)
return;
// Apply symbol renames for -wrap.
Symtab->applySymbolWrap();
if (!Wrapped.empty())
wrapSymbols<ELFT>(Wrapped);
// Now that we have a complete list of input files.
// Beyond this point, no new files are added.
@ -1481,27 +1617,19 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
// supports them.
if (Config->ARMHasBlx == false)
warn("lld uses blx instruction, no object with architecture supporting "
"feature detected.");
if (Config->ARMJ1J2BranchEncoding == false)
warn("lld uses extended branch encoding, no object with architecture "
"supporting feature detected.");
if (Config->ARMHasMovtMovw == false)
warn("lld may use movt/movw, no object with architecture supporting "
"feature detected.");
"feature detected");
}
// This adds a .comment section containing a version string. We have to add it
// before decompressAndMergeSections because the .comment section is a
// mergeable section.
// before mergeSections because the .comment section is a mergeable section.
if (!Config->Relocatable)
InputSections.push_back(createCommentSection());
// Do size optimizations: garbage collection, merging of SHF_MERGE sections
// and identical code folding.
decompressSections();
splitSections<ELFT>();
markLive<ELFT>();
demoteSymbols<ELFT>();
demoteSharedSymbols<ELFT>();
mergeSections();
if (Config->ICF != ICFLevel::None) {
findKeepUniqueSections<ELFT>(Args);
@ -1509,9 +1637,12 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
}
// Read the callgraph now that we know what was gced or icfed
if (auto *Arg = Args.getLastArg(OPT_call_graph_ordering_file))
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
readCallGraph(*Buffer);
if (Config->CallGraphProfileSort) {
if (auto *Arg = Args.getLastArg(OPT_call_graph_ordering_file))
if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
readCallGraph(*Buffer);
readCallGraphsFromObjectFiles<ELFT>();
}
// Write the result to the file.
writeResult<ELFT>();

View File

@ -42,9 +42,6 @@ private:
// True if we are in --start-lib and --end-lib.
bool InLib = false;
// True if we are in -format=binary and -format=elf.
bool InBinary = false;
std::vector<InputFile *> Files;
};

View File

@ -139,8 +139,9 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) {
}
void elf::printHelp() {
ELFOptTable().PrintHelp(outs(), Config->ProgName.data(), "lld",
false /*ShowHidden*/, true /*ShowAllAliases*/);
ELFOptTable().PrintHelp(
outs(), (Config->ProgName + " [options] file...").str().c_str(), "lld",
false /*ShowHidden*/, true /*ShowAllAliases*/);
outs() << "\n";
// Scripts generated by Libtool versions up to at least 2.4.6 (the most

View File

@ -44,7 +44,7 @@ public:
private:
template <class P> void failOn(const P *Loc, const Twine &Msg) {
fatal("corrupted .eh_frame: " + Msg + "\n>>> defined in " +
IS->getObjMsg((const uint8_t *)Loc - IS->Data.data()));
IS->getObjMsg((const uint8_t *)Loc - IS->data().data()));
}
uint8_t readByte();
@ -59,7 +59,7 @@ private:
}
size_t elf::readEhRecordSize(InputSectionBase *S, size_t Off) {
return EhReader(S, S->Data.slice(Off)).readEhRecordSize();
return EhReader(S, S->data().slice(Off)).readEhRecordSize();
}
// .eh_frame section is a sequence of records. Each record starts with

35
deps/lld/ELF/ICF.cpp vendored
View File

@ -252,7 +252,10 @@ bool ICF<ELFT>::constantEq(const InputSection *SecA, ArrayRef<RelTy> RA,
auto *DA = dyn_cast<Defined>(&SA);
auto *DB = dyn_cast<Defined>(&SB);
if (!DA || !DB)
// Placeholder symbols generated by linker scripts look the same now but
// may have different values later.
if (!DA || !DB || DA->ScriptDefined || DB->ScriptDefined)
return false;
// Relocations referring to absolute symbols are constant-equal if their
@ -298,7 +301,7 @@ bool ICF<ELFT>::constantEq(const InputSection *SecA, ArrayRef<RelTy> RA,
template <class ELFT>
bool ICF<ELFT>::equalsConstant(const InputSection *A, const InputSection *B) {
if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags ||
A->getSize() != B->getSize() || A->Data != B->Data)
A->getSize() != B->getSize() || A->data() != B->data())
return false;
// If two sections have different output sections, we cannot merge them.
@ -420,6 +423,22 @@ void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> Fn) {
++Cnt;
}
// Combine the hashes of the sections referenced by the given section into its
// hash.
template <class ELFT, class RelTy>
static void combineRelocHashes(unsigned Cnt, InputSection *IS,
ArrayRef<RelTy> Rels) {
uint32_t Hash = IS->Class[Cnt % 2];
for (RelTy Rel : Rels) {
Symbol &S = IS->template getFile<ELFT>()->getRelocTargetSym(Rel);
if (auto *D = dyn_cast<Defined>(&S))
if (auto *RelSec = dyn_cast_or_null<InputSection>(D->Section))
Hash += RelSec->Class[Cnt % 2];
}
// Set MSB to 1 to avoid collisions with non-hash IDs.
IS->Class[(Cnt + 1) % 2] = Hash | (1U << 31);
}
static void print(const Twine &S) {
if (Config->PrintIcfSections)
message(S);
@ -435,10 +454,18 @@ template <class ELFT> void ICF<ELFT>::run() {
// Initially, we use hash values to partition sections.
parallelForEach(Sections, [&](InputSection *S) {
// Set MSB to 1 to avoid collisions with non-hash IDs.
S->Class[0] = xxHash64(S->Data) | (1U << 31);
S->Class[0] = xxHash64(S->data());
});
for (unsigned Cnt = 0; Cnt != 2; ++Cnt) {
parallelForEach(Sections, [&](InputSection *S) {
if (S->AreRelocsRela)
combineRelocHashes<ELFT>(Cnt, S, S->template relas<ELFT>());
else
combineRelocHashes<ELFT>(Cnt, S, S->template rels<ELFT>());
});
}
// From now on, sections in Sections vector are ordered so that sections
// in the same equivalence class are consecutive in the vector.
std::stable_sort(Sections.begin(), Sections.end(),

View File

@ -46,7 +46,7 @@ std::vector<LazyObjFile *> elf::LazyObjFiles;
std::vector<InputFile *> elf::ObjectFiles;
std::vector<InputFile *> elf::SharedFiles;
TarWriter *elf::Tar;
std::unique_ptr<TarWriter> elf::Tar;
InputFile::InputFile(Kind K, MemoryBufferRef M)
: MB(M), GroupId(NextGroupId), FileKind(K) {
@ -125,11 +125,7 @@ std::string InputFile::getSrcMsg(const Symbol &Sym, InputSectionBase &Sec,
template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
Dwarf = llvm::make_unique<DWARFContext>(make_unique<LLDDwarfObj<ELFT>>(this));
const DWARFObject &Obj = Dwarf->getDWARFObj();
DWARFDataExtractor LineData(Obj, Obj.getLineSection(), Config->IsLE,
Config->Wordsize);
for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf->compile_units()) {
for (std::unique_ptr<DWARFUnit> &CU : Dwarf->compile_units()) {
auto Report = [](Error Err) {
handleAllErrors(std::move(Err),
[](ErrorInfoBase &Info) { warn(Info.message()); });
@ -324,17 +320,6 @@ StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
return Signature;
}
template <class ELFT>
ArrayRef<typename ObjFile<ELFT>::Elf_Word>
ObjFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) {
const ELFFile<ELFT> &Obj = this->getObj();
ArrayRef<Elf_Word> Entries =
CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
if (Entries.empty() || Entries[0] != GRP_COMDAT)
fatal(toString(this) + ": unsupported SHT_GROUP format");
return Entries.slice(1);
}
template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
// On a regular link we don't merge sections if -O0 (default is -O1). This
// sometimes makes the linker significantly faster, although the output will
@ -416,6 +401,11 @@ void ObjFile<ELFT>::initializeSections(
continue;
const Elf_Shdr &Sec = ObjSections[I];
if (Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
CGProfile = check(
this->getObj().template getSectionContentsAsArray<Elf_CGProfile>(
&Sec));
// SHF_EXCLUDE'ed sections are discarded by the linker. However,
// if -r is given, we'll let the final link discard such sections.
// This is compatible with GNU.
@ -439,22 +429,34 @@ void ObjFile<ELFT>::initializeSections(
case SHT_GROUP: {
// De-duplicate section groups by their signatures.
StringRef Signature = getShtGroupSignature(ObjSections, Sec);
bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
this->Sections[I] = &InputSection::Discarded;
// If it is a new section group, we want to keep group members.
// Group leader sections, which contain indices of group members, are
// discarded because they are useless beyond this point. The only
// exception is the -r option because in order to produce re-linkable
// object files, we want to pass through basically everything.
ArrayRef<Elf_Word> Entries =
CHECK(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), this);
if (Entries.empty())
fatal(toString(this) + ": empty SHT_GROUP");
// The first word of a SHT_GROUP section contains flags. Currently,
// the standard defines only "GRP_COMDAT" flag for the COMDAT group.
// An group with the empty flag doesn't define anything; such sections
// are just skipped.
if (Entries[0] == 0)
continue;
if (Entries[0] != GRP_COMDAT)
fatal(toString(this) + ": unsupported SHT_GROUP format");
bool IsNew = ComdatGroups.insert(CachedHashStringRef(Signature)).second;
if (IsNew) {
if (Config->Relocatable)
this->Sections[I] = createInputSection(Sec);
continue;
continue;
}
// Otherwise, discard group members.
for (uint32_t SecIndex : getShtGroupEntries(Sec)) {
for (uint32_t SecIndex : Entries.slice(1)) {
if (SecIndex >= Size)
fatal(toString(this) +
": invalid section index in group: " + Twine(SecIndex));
@ -478,11 +480,13 @@ void ObjFile<ELFT>::initializeSections(
// .ARM.exidx sections have a reverse dependency on the InputSection they
// have a SHF_LINK_ORDER dependency, this is identified by the sh_link.
if (Sec.sh_flags & SHF_LINK_ORDER) {
if (Sec.sh_link >= this->Sections.size())
InputSectionBase *LinkSec = nullptr;
if (Sec.sh_link < this->Sections.size())
LinkSec = this->Sections[Sec.sh_link];
if (!LinkSec)
fatal(toString(this) +
": invalid sh_link index: " + Twine(Sec.sh_link));
InputSectionBase *LinkSec = this->Sections[Sec.sh_link];
InputSection *IS = cast<InputSection>(this->Sections[I]);
LinkSec->DependentSections.push_back(IS);
if (!isa<InputSection>(LinkSec))
@ -598,7 +602,7 @@ InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
// as a given section.
static InputSection *toRegularSection(MergeInputSection *Sec) {
return make<InputSection>(Sec->File, Sec->Flags, Sec->Type, Sec->Alignment,
Sec->Data, Sec->Name);
Sec->data(), Sec->Name);
}
template <class ELFT>
@ -618,9 +622,9 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
// FIXME: Retain the first attribute section we see. The eglibc ARM
// dynamic loaders require the presence of an attribute section for dlopen
// to work. In a full implementation we would merge all attribute sections.
if (InX::ARMAttributes == nullptr) {
InX::ARMAttributes = make<InputSection>(*this, Sec, Name);
return InX::ARMAttributes;
if (In.ARMAttributes == nullptr) {
In.ARMAttributes = make<InputSection>(*this, Sec, Name);
return In.ARMAttributes;
}
return &InputSection::Discarded;
}
@ -638,8 +642,16 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
// This section contains relocation information.
// If -r is given, we do not interpret or apply relocation
// but just copy relocation sections to output.
if (Config->Relocatable)
return make<InputSection>(*this, Sec, Name);
if (Config->Relocatable) {
InputSection *RelocSec = make<InputSection>(*this, Sec, Name);
// We want to add a dependency to target, similar like we do for
// -emit-relocs below. This is useful for the case when linker script
// contains the "/DISCARD/". It is perhaps uncommon to use a script with
// -r, but we faced it in the Linux kernel and have to handle such case
// and not to crash.
Target->DependentSections.push_back(RelocSec);
return RelocSec;
}
if (Target->FirstRelocation)
fatal(toString(this) +
@ -704,7 +716,7 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
// for split stack will include a .note.GNU-split-stack section.
if (Name == ".note.GNU-split-stack") {
if (Config->Relocatable) {
error("Cannot mix split-stack and non-split-stack in a relocatable link");
error("cannot mix split-stack and non-split-stack in a relocatable link");
return &InputSection::Discarded;
}
this->SplitStack = true;
@ -724,7 +736,8 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
// sections. Drop those sections to avoid duplicate symbol errors.
// FIXME: This is glibc PR20543, we should remove this hack once that has been
// fixed for a while.
if (Name.startswith(".gnu.linkonce."))
if (Name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" ||
Name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx")
return &InputSection::Discarded;
// If we are creating a new .build-id section, strip existing .build-id
@ -806,7 +819,7 @@ template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
if (Sec == &InputSection::Discarded)
return Symtab->addUndefined<ELFT>(Name, Binding, StOther, Type,
/*CanOmitFromDynSym=*/false, this);
return Symtab->addRegular(Name, StOther, Type, Value, Size, Binding, Sec,
return Symtab->addDefined(Name, StOther, Type, Value, Size, Binding, Sec,
this);
}
}
@ -940,8 +953,7 @@ std::vector<const typename ELFT::Verdef *> SharedFile<ELFT>::parseVerdefs() {
auto *CurVerdef = reinterpret_cast<const Elf_Verdef *>(Verdef);
Verdef += CurVerdef->vd_next;
unsigned VerdefIndex = CurVerdef->vd_ndx;
if (Verdefs.size() <= VerdefIndex)
Verdefs.resize(VerdefIndex + 1);
Verdefs.resize(VerdefIndex + 1);
Verdefs[VerdefIndex] = CurVerdef;
}
@ -993,7 +1005,17 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() {
for (size_t I = 0; I < Syms.size(); ++I) {
const Elf_Sym &Sym = Syms[I];
// ELF spec requires that all local symbols precede weak or global
// symbols in each symbol table, and the index of first non-local symbol
// is stored to sh_info. If a local symbol appears after some non-local
// symbol, that's a violation of the spec.
StringRef Name = CHECK(Sym.getName(this->StringTable), this);
if (Sym.getBinding() == STB_LOCAL) {
warn("found local symbol '" + Name +
"' in global part of symbol table in file " + toString(this));
continue;
}
if (Sym.isUndefined()) {
Symbol *S = Symtab->addUndefined<ELFT>(Name, Sym.getBinding(),
Sym.st_other, Sym.getType(),
@ -1002,16 +1024,6 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() {
continue;
}
// ELF spec requires that all local symbols precede weak or global
// symbols in each symbol table, and the index of first non-local symbol
// is stored to sh_info. If a local symbol appears after some non-local
// symbol, that's a violation of the spec.
if (Sym.getBinding() == STB_LOCAL) {
warn("found local symbol '" + Name +
"' in global part of symbol table in file " + toString(this));
continue;
}
// MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly
// assigns VER_NDX_LOCAL to this section global symbol. Here is a
// workaround for this bug.
@ -1054,6 +1066,9 @@ static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
switch (T.getArch()) {
case Triple::aarch64:
return EM_AARCH64;
case Triple::amdgcn:
case Triple::r600:
return EM_AMDGPU;
case Triple::arm:
case Triple::thumb:
return EM_ARM;
@ -1064,9 +1079,12 @@ static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) {
case Triple::mips64:
case Triple::mips64el:
return EM_MIPS;
case Triple::msp430:
return EM_MSP430;
case Triple::ppc:
return EM_PPC;
case Triple::ppc64:
case Triple::ppc64le:
return EM_PPC64;
case Triple::x86:
return T.isOSIAMCU() ? EM_IAMCU : EM_386;
@ -1178,7 +1196,7 @@ static ELFKind getELFKind(MemoryBufferRef MB) {
}
void BinaryFile::parse() {
ArrayRef<uint8_t> Data = toArrayRef(MB.getBuffer());
ArrayRef<uint8_t> Data = arrayRefFromStringRef(MB.getBuffer());
auto *Section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
8, Data, ".data");
Sections.push_back(Section);
@ -1192,11 +1210,11 @@ void BinaryFile::parse() {
if (!isAlnum(S[I]))
S[I] = '_';
Symtab->addRegular(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT, 0, 0,
Symtab->addDefined(Saver.save(S + "_start"), STV_DEFAULT, STT_OBJECT, 0, 0,
STB_GLOBAL, Section, nullptr);
Symtab->addRegular(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT,
Symtab->addDefined(Saver.save(S + "_end"), STV_DEFAULT, STT_OBJECT,
Data.size(), 0, STB_GLOBAL, Section, nullptr);
Symtab->addRegular(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT,
Symtab->addDefined(Saver.save(S + "_size"), STV_DEFAULT, STT_OBJECT,
Data.size(), 0, STB_GLOBAL, nullptr, nullptr);
}
@ -1262,25 +1280,11 @@ template <class ELFT> void LazyObjFile::parse() {
return;
}
switch (getELFKind(this->MB)) {
case ELF32LEKind:
addElfSymbols<ELF32LE>();
if (getELFKind(this->MB) != Config->EKind) {
error("incompatible file: " + this->MB.getBufferIdentifier());
return;
case ELF32BEKind:
addElfSymbols<ELF32BE>();
return;
case ELF64LEKind:
addElfSymbols<ELF64LE>();
return;
case ELF64BEKind:
addElfSymbols<ELF64BE>();
return;
default:
llvm_unreachable("getELFKind");
}
}
template <class ELFT> void LazyObjFile::addElfSymbols() {
ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
@ -1305,12 +1309,9 @@ std::string elf::replaceThinLTOSuffix(StringRef Path) {
StringRef Suffix = Config->ThinLTOObjectSuffixReplace.first;
StringRef Repl = Config->ThinLTOObjectSuffixReplace.second;
if (!Path.endswith(Suffix)) {
error("-thinlto-object-suffix-replace=" + Suffix + ";" + Repl +
" was given, but " + Path + " does not end with the suffix");
return "";
}
return (Path.drop_back(Suffix.size()) + Repl).str();
if (Path.consume_back(Suffix))
return (Path + Repl).str();
return Path;
}
template void ArchiveFile::parse<ELF32LE>();

View File

@ -50,7 +50,7 @@ class Symbol;
// If -reproduce option is given, all input files are written
// to this tar archive.
extern llvm::TarWriter *Tar;
extern std::unique_ptr<llvm::TarWriter> Tar;
// Opens a given file.
llvm::Optional<MemoryBufferRef> readFile(StringRef Path);
@ -86,7 +86,9 @@ public:
// Returns object file symbols. It is a runtime error to call this
// function on files of other types.
ArrayRef<Symbol *> getSymbols() {
ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); }
std::vector<Symbol *> &getMutableSymbols() {
assert(FileKind == BinaryKind || FileKind == ObjKind ||
FileKind == BitcodeKind);
return Symbols;
@ -169,10 +171,10 @@ template <class ELFT> class ObjFile : public ELFFileBase<ELFT> {
typedef typename ELFT::Sym Elf_Sym;
typedef typename ELFT::Shdr Elf_Shdr;
typedef typename ELFT::Word Elf_Word;
typedef typename ELFT::CGProfile Elf_CGProfile;
StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
const Elf_Shdr &Sec);
ArrayRef<Elf_Word> getShtGroupEntries(const Elf_Shdr &Sec);
public:
static bool classof(const InputFile *F) { return F->kind() == Base::ObjKind; }
@ -218,6 +220,9 @@ public:
// Pointer to this input file's .llvm_addrsig section, if it has one.
const Elf_Shdr *AddrsigSec = nullptr;
// SHT_LLVM_CALL_GRAPH_PROFILE table
ArrayRef<Elf_CGProfile> CGProfile;
private:
void
initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);
@ -272,8 +277,6 @@ public:
bool AddedToLink = false;
private:
template <class ELFT> void addElfSymbols();
uint64_t OffsetInArchive;
};

View File

@ -21,7 +21,6 @@
#include "Thunks.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/Object/Decompressor.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
@ -64,11 +63,11 @@ InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags,
StringRef Name, Kind SectionKind)
: SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info,
Link),
File(File), Data(Data) {
File(File), RawData(Data) {
// In order to reduce memory allocation, we assume that mergeable
// sections are smaller than 4 GiB, which is not an unreasonable
// assumption as of 2017.
if (SectionKind == SectionBase::Merge && Data.size() > UINT32_MAX)
if (SectionKind == SectionBase::Merge && RawData.size() > UINT32_MAX)
error(toString(this) + ": section too large");
NumRelocations = 0;
@ -80,6 +79,17 @@ InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags,
if (!isPowerOf2_64(V))
fatal(toString(File) + ": section sh_addralign is not a power of 2");
this->Alignment = V;
// In ELF, each section can be compressed by zlib, and if compressed,
// section name may be mangled by appending "z" (e.g. ".zdebug_info").
// If that's the case, demangle section name so that we can handle a
// section as if it weren't compressed.
if ((Flags & SHF_COMPRESSED) || Name.startswith(".zdebug")) {
if (!zlib::isAvailable())
error(toString(File) + ": contains a compressed section, " +
"but zlib is not available");
parseCompressedHeader();
}
}
// Drop SHF_GROUP bit unless we are producing a re-linkable object file.
@ -128,13 +138,25 @@ InputSectionBase::InputSectionBase(ObjFile<ELFT> &File,
size_t InputSectionBase::getSize() const {
if (auto *S = dyn_cast<SyntheticSection>(this))
return S->getSize();
if (UncompressedSize >= 0)
return UncompressedSize;
return RawData.size();
}
return Data.size();
void InputSectionBase::uncompress() const {
size_t Size = UncompressedSize;
UncompressedBuf.reset(new char[Size]);
if (Error E =
zlib::uncompress(toStringRef(RawData), UncompressedBuf.get(), Size))
fatal(toString(this) +
": uncompress failed: " + llvm::toString(std::move(E)));
RawData = makeArrayRef((uint8_t *)UncompressedBuf.get(), Size);
}
uint64_t InputSectionBase::getOffsetInFile() const {
const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart();
const uint8_t *SecStart = Data.begin();
const uint8_t *SecStart = data().begin();
return SecStart - FileStart;
}
@ -180,34 +202,70 @@ OutputSection *SectionBase::getOutputSection() {
return Sec ? Sec->getParent() : nullptr;
}
// Decompress section contents if required. Note that this function
// is called from parallelForEach, so it must be thread-safe.
void InputSectionBase::maybeDecompress() {
if (DecompressBuf)
return;
if (!(Flags & SHF_COMPRESSED) && !Name.startswith(".zdebug"))
// When a section is compressed, `RawData` consists with a header followed
// by zlib-compressed data. This function parses a header to initialize
// `UncompressedSize` member and remove the header from `RawData`.
void InputSectionBase::parseCompressedHeader() {
typedef typename ELF64LE::Chdr Chdr64;
typedef typename ELF32LE::Chdr Chdr32;
// Old-style header
if (Name.startswith(".zdebug")) {
if (!toStringRef(RawData).startswith("ZLIB")) {
error(toString(this) + ": corrupted compressed section header");
return;
}
RawData = RawData.slice(4);
if (RawData.size() < 8) {
error(toString(this) + ": corrupted compressed section header");
return;
}
UncompressedSize = read64be(RawData.data());
RawData = RawData.slice(8);
// Restore the original section name.
// (e.g. ".zdebug_info" -> ".debug_info")
Name = Saver.save("." + Name.substr(2));
return;
}
// Decompress a section.
Decompressor Dec = check(Decompressor::create(Name, toStringRef(Data),
Config->IsLE, Config->Is64));
size_t Size = Dec.getDecompressedSize();
DecompressBuf.reset(new char[Size + Name.size()]());
if (Error E = Dec.decompress({DecompressBuf.get(), Size}))
fatal(toString(this) +
": decompress failed: " + llvm::toString(std::move(E)));
Data = makeArrayRef((uint8_t *)DecompressBuf.get(), Size);
assert(Flags & SHF_COMPRESSED);
Flags &= ~(uint64_t)SHF_COMPRESSED;
// A section name may have been altered if compressed. If that's
// the case, restore the original name. (i.e. ".zdebug_" -> ".debug_")
if (Name.startswith(".zdebug")) {
DecompressBuf[Size] = '.';
memcpy(&DecompressBuf[Size + 1], Name.data() + 2, Name.size() - 2);
Name = StringRef(&DecompressBuf[Size], Name.size() - 1);
// New-style 64-bit header
if (Config->Is64) {
if (RawData.size() < sizeof(Chdr64)) {
error(toString(this) + ": corrupted compressed section");
return;
}
auto *Hdr = reinterpret_cast<const Chdr64 *>(RawData.data());
if (Hdr->ch_type != ELFCOMPRESS_ZLIB) {
error(toString(this) + ": unsupported compression type");
return;
}
UncompressedSize = Hdr->ch_size;
RawData = RawData.slice(sizeof(*Hdr));
return;
}
// New-style 32-bit header
if (RawData.size() < sizeof(Chdr32)) {
error(toString(this) + ": corrupted compressed section");
return;
}
auto *Hdr = reinterpret_cast<const Chdr32 *>(RawData.data());
if (Hdr->ch_type != ELFCOMPRESS_ZLIB) {
error(toString(this) + ": unsupported compression type");
return;
}
UncompressedSize = Hdr->ch_size;
RawData = RawData.slice(sizeof(*Hdr));
}
InputSection *InputSectionBase::getLinkOrderDep() const {
@ -230,14 +288,17 @@ Defined *InputSectionBase::getEnclosingFunction(uint64_t Offset) {
// Returns a source location string. Used to construct an error message.
template <class ELFT>
std::string InputSectionBase::getLocation(uint64_t Offset) {
std::string SecAndOffset = (Name + "+0x" + utohexstr(Offset)).str();
// We don't have file for synthetic sections.
if (getFile<ELFT>() == nullptr)
return (Config->OutputFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")")
return (Config->OutputFile + ":(" + SecAndOffset + ")")
.str();
// First check if we can get desired values from debugging information.
if (Optional<DILineInfo> Info = getFile<ELFT>()->getDILineInfo(this, Offset))
return Info->FileName + ":" + std::to_string(Info->Line);
return Info->FileName + ":" + std::to_string(Info->Line) + ":(" +
SecAndOffset + ")";
// File->SourceFile contains STT_FILE symbol that contains a
// source file name. If it's missing, we use an object file name.
@ -246,10 +307,10 @@ std::string InputSectionBase::getLocation(uint64_t Offset) {
SrcFile = toString(File);
if (Defined *D = getEnclosingFunction<ELFT>(Offset))
return SrcFile + ":(function " + toString(*D) + ")";
return SrcFile + ":(function " + toString(*D) + ": " + SecAndOffset + ")";
// If there's no symbol, print out the offset in the section.
return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str();
return (SrcFile + ":(" + SecAndOffset + ")");
}
// This function is intended to be used for constructing an error message.
@ -259,9 +320,6 @@ std::string InputSectionBase::getLocation(uint64_t Offset) {
//
// Returns an empty string if there's no way to get line info.
std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) {
// Synthetic sections don't have input files.
if (!File)
return "";
return File->getSrcMsg(Sym, *this, Offset);
}
@ -275,9 +333,6 @@ std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) {
//
// path/to/foo.o:(function bar) in archive path/to/bar.a
std::string InputSectionBase::getObjMsg(uint64_t Off) {
// Synthetic sections don't have input files.
if (!File)
return ("<internal>:(" + Name + "+0x" + utohexstr(Off) + ")").str();
std::string Filename = File->getName();
std::string Archive;
@ -362,7 +417,7 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
// Output section VA is zero for -r, so r_offset is an offset within the
// section, but for --emit-relocs it is an virtual address.
P->r_offset = Sec->getVA(Rel.r_offset);
P->setSymbolAndType(InX::SymTab->getSymbolIndex(&Sym), Type,
P->setSymbolAndType(In.SymTab->getSymbolIndex(&Sym), Type,
Config->IsMips64EL);
if (Sym.Type == STT_SECTION) {
@ -380,14 +435,14 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
error("STT_SECTION symbol should be defined");
continue;
}
SectionBase *Section = D->Section;
if (Section == &InputSection::Discarded) {
SectionBase *Section = D->Section->Repl;
if (!Section->Live) {
P->setSymbolAndType(0, 0, false);
continue;
}
int64_t Addend = getAddend<ELFT>(Rel);
const uint8_t *BufLoc = Sec->Data.begin() + Rel.r_offset;
const uint8_t *BufLoc = Sec->data().begin() + Rel.r_offset;
if (!RelTy::IsRela)
Addend = Target->getImplicitAddend(BufLoc, Type);
@ -487,6 +542,62 @@ static uint64_t getARMStaticBase(const Symbol &Sym) {
return OS->PtLoad->FirstSec->Addr;
}
// For R_RISCV_PC_INDIRECT (R_RISCV_PCREL_LO12_{I,S}), the symbol actually
// points the corresponding R_RISCV_PCREL_HI20 relocation, and the target VA
// is calculated using PCREL_HI20's symbol.
//
// This function returns the R_RISCV_PCREL_HI20 relocation from
// R_RISCV_PCREL_LO12's symbol and addend.
static Relocation *getRISCVPCRelHi20(const Symbol *Sym, uint64_t Addend) {
const Defined *D = cast<Defined>(Sym);
InputSection *IS = cast<InputSection>(D->Section);
if (Addend != 0)
warn("Non-zero addend in R_RISCV_PCREL_LO12 relocation to " +
IS->getObjMsg(D->Value) + " is ignored");
// Relocations are sorted by offset, so we can use std::equal_range to do
// binary search.
auto Range = std::equal_range(IS->Relocations.begin(), IS->Relocations.end(),
D->Value, RelocationOffsetComparator{});
for (auto It = std::get<0>(Range); It != std::get<1>(Range); ++It)
if (isRelExprOneOf<R_PC>(It->Expr))
return &*It;
error("R_RISCV_PCREL_LO12 relocation points to " + IS->getObjMsg(D->Value) +
" without an associated R_RISCV_PCREL_HI20 relocation");
return nullptr;
}
// A TLS symbol's virtual address is relative to the TLS segment. Add a
// target-specific adjustment to produce a thread-pointer-relative offset.
static int64_t getTlsTpOffset() {
switch (Config->EMachine) {
case EM_ARM:
case EM_AARCH64:
// Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
// followed by a variable amount of alignment padding, followed by the TLS
// segment.
//
// NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld
// effectively increases the TCB size to 8 words for Android compatibility.
// It accomplishes this by increasing the segment's alignment.
return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
case EM_386:
case EM_X86_64:
// Variant 2. The TLS segment is located just before the thread pointer.
return -Out::TlsPhdr->p_memsz;
case EM_PPC64:
// The thread pointer points to a fixed offset from the start of the
// executable's TLS segment. An offset of 0x7000 allows a signed 16-bit
// offset to reach 0x1000 of TCB/thread-library data and 0xf000 of the
// program's TLS segment.
return -0x7000;
default:
llvm_unreachable("unhandled Config->EMachine");
}
}
static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
uint64_t P, const Symbol &Sym, RelExpr Expr) {
switch (Expr) {
@ -501,38 +612,37 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
case R_ARM_SBREL:
return Sym.getVA(A) - getARMStaticBase(Sym);
case R_GOT:
case R_GOT_PLT:
case R_RELAX_TLS_GD_TO_IE_ABS:
return Sym.getGotVA() + A;
case R_GOTONLY_PC:
return InX::Got->getVA() + A - P;
return In.Got->getVA() + A - P;
case R_GOTONLY_PC_FROM_END:
return InX::Got->getVA() + A - P + InX::Got->getSize();
return In.Got->getVA() + A - P + In.Got->getSize();
case R_GOTREL:
return Sym.getVA(A) - InX::Got->getVA();
return Sym.getVA(A) - In.Got->getVA();
case R_GOTREL_FROM_END:
return Sym.getVA(A) - InX::Got->getVA() - InX::Got->getSize();
return Sym.getVA(A) - In.Got->getVA() - In.Got->getSize();
case R_GOT_FROM_END:
case R_RELAX_TLS_GD_TO_IE_END:
return Sym.getGotOffset() + A - InX::Got->getSize();
return Sym.getGotOffset() + A - In.Got->getSize();
case R_TLSLD_GOT_OFF:
case R_GOT_OFF:
case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
return Sym.getGotOffset() + A;
case R_GOT_PAGE_PC:
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
case R_AARCH64_GOT_PAGE_PC:
case R_AARCH64_GOT_PAGE_PC_PLT:
case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
return getAArch64Page(Sym.getGotVA() + A) - getAArch64Page(P);
case R_GOT_PC:
case R_RELAX_TLS_GD_TO_IE:
return Sym.getGotVA() + A - P;
case R_HINT:
case R_NONE:
case R_TLSDESC_CALL:
case R_TLSLD_HINT:
llvm_unreachable("cannot relocate hint relocs");
case R_HEXAGON_GOT:
return Sym.getGotVA() - In.GotPlt->getVA();
case R_MIPS_GOTREL:
return Sym.getVA(A) - InX::MipsGot->getGp(File);
return Sym.getVA(A) - In.MipsGot->getGp(File);
case R_MIPS_GOT_GP:
return InX::MipsGot->getGp(File) + A;
return In.MipsGot->getGp(File) + A;
case R_MIPS_GOT_GP_PC: {
// R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target
// is _gp_disp symbol. In that case we should use the following
@ -541,7 +651,7 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
// microMIPS variants of these relocations use slightly different
// expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi()
// to correctly handle less-sugnificant bit of the microMIPS symbol.
uint64_t V = InX::MipsGot->getGp(File) + A - P;
uint64_t V = In.MipsGot->getGp(File) + A - P;
if (Type == R_MIPS_LO16 || Type == R_MICROMIPS_LO16)
V += 4;
if (Type == R_MICROMIPS_LO16 || Type == R_MICROMIPS_HI16)
@ -552,31 +662,34 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
// If relocation against MIPS local symbol requires GOT entry, this entry
// should be initialized by 'page address'. This address is high 16-bits
// of sum the symbol's value and the addend.
return InX::MipsGot->getVA() +
InX::MipsGot->getPageEntryOffset(File, Sym, A) -
InX::MipsGot->getGp(File);
return In.MipsGot->getVA() + In.MipsGot->getPageEntryOffset(File, Sym, A) -
In.MipsGot->getGp(File);
case R_MIPS_GOT_OFF:
case R_MIPS_GOT_OFF32:
// In case of MIPS if a GOT relocation has non-zero addend this addend
// should be applied to the GOT entry content not to the GOT entry offset.
// That is why we use separate expression type.
return InX::MipsGot->getVA() +
InX::MipsGot->getSymEntryOffset(File, Sym, A) -
InX::MipsGot->getGp(File);
return In.MipsGot->getVA() + In.MipsGot->getSymEntryOffset(File, Sym, A) -
In.MipsGot->getGp(File);
case R_MIPS_TLSGD:
return InX::MipsGot->getVA() + InX::MipsGot->getGlobalDynOffset(File, Sym) -
InX::MipsGot->getGp(File);
return In.MipsGot->getVA() + In.MipsGot->getGlobalDynOffset(File, Sym) -
In.MipsGot->getGp(File);
case R_MIPS_TLSLD:
return InX::MipsGot->getVA() + InX::MipsGot->getTlsIndexOffset(File) -
InX::MipsGot->getGp(File);
case R_PAGE_PC:
case R_PLT_PAGE_PC: {
uint64_t Dest;
if (Sym.isUndefWeak())
Dest = getAArch64Page(A);
else
Dest = getAArch64Page(Sym.getVA(A));
return Dest - getAArch64Page(P);
return In.MipsGot->getVA() + In.MipsGot->getTlsIndexOffset(File) -
In.MipsGot->getGp(File);
case R_AARCH64_PAGE_PC: {
uint64_t Val = Sym.isUndefWeak() ? P + A : Sym.getVA(A);
return getAArch64Page(Val) - getAArch64Page(P);
}
case R_AARCH64_PLT_PAGE_PC: {
uint64_t Val = Sym.isUndefWeak() ? P + A : Sym.getPltVA() + A;
return getAArch64Page(Val) - getAArch64Page(P);
}
case R_RISCV_PC_INDIRECT: {
if (const Relocation *HiRel = getRISCVPCRelHi20(&Sym, A))
return getRelocTargetVA(File, HiRel->Type, HiRel->Addend, Sym.getVA(),
*HiRel->Sym, HiRel->Expr);
return 0;
}
case R_PC: {
uint64_t Dest;
@ -608,16 +721,12 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
return 0;
// PPC64 V2 ABI describes two entry points to a function. The global entry
// point sets up the TOC base pointer. When calling a local function, the
// call should branch to the local entry point rather than the global entry
// point. Section 3.4.1 describes using the 3 most significant bits of the
// st_other field to find out how many instructions there are between the
// local and global entry point.
uint8_t StOther = (Sym.StOther >> 5) & 7;
if (StOther == 0 || StOther == 1)
return SymVA - P;
return SymVA - P + (1LL << StOther);
// point is used for calls where the caller and callee (may) have different
// TOC base pointers and r2 needs to be modified to hold the TOC base for
// the callee. For local calls the caller and callee share the same
// TOC base and so the TOC pointer initialization code should be skipped by
// branching to the local entry point.
return SymVA - P + getPPC64GlobalEntryToLocalEntryOffset(Sym.StOther);
}
case R_PPC_TOC:
return getPPC64TocBase() + A;
@ -634,48 +743,32 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
// statically to zero.
if (Sym.isTls() && Sym.isUndefWeak())
return 0;
// For TLS variant 1 the TCB is a fixed size, whereas for TLS variant 2 the
// TCB is on unspecified size and content. Targets that implement variant 1
// should set TcbSize.
if (Target->TcbSize) {
// PPC64 V2 ABI has the thread pointer offset into the middle of the TLS
// storage area by TlsTpOffset for efficient addressing TCB and up to
// 4KB 8 B of other thread library information (placed before the TCB).
// Subtracting this offset will get the address of the first TLS block.
if (Target->TlsTpOffset)
return Sym.getVA(A) - Target->TlsTpOffset;
// If thread pointer is not offset into the middle, the first thing in the
// TLS storage area is the TCB. Add the TcbSize to get the address of the
// first TLS block.
return Sym.getVA(A) + alignTo(Target->TcbSize, Out::TlsPhdr->p_align);
}
return Sym.getVA(A) - Out::TlsPhdr->p_memsz;
return Sym.getVA(A) + getTlsTpOffset();
case R_RELAX_TLS_GD_TO_LE_NEG:
case R_NEG_TLS:
return Out::TlsPhdr->p_memsz - Sym.getVA(A);
case R_SIZE:
return Sym.getSize() + A;
case R_TLSDESC:
return InX::Got->getGlobalDynAddr(Sym) + A;
case R_TLSDESC_PAGE:
return getAArch64Page(InX::Got->getGlobalDynAddr(Sym) + A) -
return In.Got->getGlobalDynAddr(Sym) + A;
case R_AARCH64_TLSDESC_PAGE:
return getAArch64Page(In.Got->getGlobalDynAddr(Sym) + A) -
getAArch64Page(P);
case R_TLSGD_GOT:
return InX::Got->getGlobalDynOffset(Sym) + A;
return In.Got->getGlobalDynOffset(Sym) + A;
case R_TLSGD_GOT_FROM_END:
return InX::Got->getGlobalDynOffset(Sym) + A - InX::Got->getSize();
return In.Got->getGlobalDynOffset(Sym) + A - In.Got->getSize();
case R_TLSGD_PC:
return InX::Got->getGlobalDynAddr(Sym) + A - P;
return In.Got->getGlobalDynAddr(Sym) + A - P;
case R_TLSLD_GOT_FROM_END:
return InX::Got->getTlsIndexOff() + A - InX::Got->getSize();
return In.Got->getTlsIndexOff() + A - In.Got->getSize();
case R_TLSLD_GOT:
return InX::Got->getTlsIndexOff() + A;
return In.Got->getTlsIndexOff() + A;
case R_TLSLD_PC:
return InX::Got->getTlsIndexVA() + A - P;
return In.Got->getTlsIndexVA() + A - P;
default:
llvm_unreachable("invalid expression");
}
llvm_unreachable("Invalid expression");
}
// This function applies relocations to sections without SHF_ALLOC bit.
@ -808,10 +901,10 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
case R_RELAX_TLS_GD_TO_LE_NEG:
Target->relaxTlsGdToLe(BufLoc, Type, TargetVA);
break;
case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
case R_RELAX_TLS_GD_TO_IE:
case R_RELAX_TLS_GD_TO_IE_ABS:
case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
case R_RELAX_TLS_GD_TO_IE_END:
Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
break;
@ -848,16 +941,20 @@ static void switchMorestackCallsToMorestackNonSplit(
// __morestack inside that function should be switched to
// __morestack_non_split.
Symbol *MoreStackNonSplit = Symtab->find("__morestack_non_split");
if (!MoreStackNonSplit) {
error("Mixing split-stack objects requires a definition of "
"__morestack_non_split");
return;
}
// Sort both collections to compare addresses efficiently.
llvm::sort(MorestackCalls.begin(), MorestackCalls.end(),
[](const Relocation *L, const Relocation *R) {
return L->Offset < R->Offset;
});
llvm::sort(MorestackCalls, [](const Relocation *L, const Relocation *R) {
return L->Offset < R->Offset;
});
std::vector<Defined *> Functions(Prologues.begin(), Prologues.end());
llvm::sort(
Functions.begin(), Functions.end(),
[](const Defined *L, const Defined *R) { return L->Value < R->Value; });
llvm::sort(Functions, [](const Defined *L, const Defined *R) {
return L->Value < R->Value;
});
auto It = MorestackCalls.begin();
for (Defined *F : Functions) {
@ -872,8 +969,8 @@ static void switchMorestackCallsToMorestackNonSplit(
}
}
static bool enclosingPrologueAdjusted(uint64_t Offset,
const DenseSet<Defined *> &Prologues) {
static bool enclosingPrologueAttempted(uint64_t Offset,
const DenseSet<Defined *> &Prologues) {
for (Defined *F : Prologues)
if (F->Value <= Offset && Offset < F->Value + F->Size)
return true;
@ -889,7 +986,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
uint8_t *End) {
if (!getFile<ELFT>()->SplitStack)
return;
DenseSet<Defined *> AdjustedPrologues;
DenseSet<Defined *> Prologues;
std::vector<Relocation *> MorestackCalls;
for (Relocation &Rel : Relocations) {
@ -898,15 +995,9 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
if (Rel.Sym->isLocal())
continue;
Defined *D = dyn_cast<Defined>(Rel.Sym);
// A reference to an undefined symbol was an error, and should not
// have gotten to this point.
if (!D)
continue;
// Ignore calls into the split-stack api.
if (D->getName().startswith("__morestack")) {
if (D->getName().equals("__morestack"))
if (Rel.Sym->getName().startswith("__morestack")) {
if (Rel.Sym->getName().equals("__morestack"))
MorestackCalls.push_back(&Rel);
continue;
}
@ -914,24 +1005,36 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
// A relocation to non-function isn't relevant. Sometimes
// __morestack is not marked as a function, so this check comes
// after the name check.
if (D->Type != STT_FUNC)
if (Rel.Sym->Type != STT_FUNC)
continue;
if (enclosingPrologueAdjusted(Rel.Offset, AdjustedPrologues))
// If the callee's-file was compiled with split stack, nothing to do. In
// this context, a "Defined" symbol is one "defined by the binary currently
// being produced". So an "undefined" symbol might be provided by a shared
// library. It is not possible to tell how such symbols were compiled, so be
// conservative.
if (Defined *D = dyn_cast<Defined>(Rel.Sym))
if (InputSection *IS = cast_or_null<InputSection>(D->Section))
if (!IS || !IS->getFile<ELFT>() || IS->getFile<ELFT>()->SplitStack)
continue;
if (enclosingPrologueAttempted(Rel.Offset, Prologues))
continue;
if (Defined *F = getEnclosingFunction<ELFT>(Rel.Offset)) {
if (Target->adjustPrologueForCrossSplitStack(Buf + F->Value, End)) {
AdjustedPrologues.insert(F);
Prologues.insert(F);
if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value),
End, F->StOther))
continue;
}
if (!getFile<ELFT>()->SomeNoSplitStack)
error(lld::toString(this) + ": " + F->getName() +
" (with -fsplit-stack) calls " + Rel.Sym->getName() +
" (without -fsplit-stack), but couldn't adjust its prologue");
}
if (!getFile<ELFT>()->SomeNoSplitStack)
error("function call at " + getErrorLocation(Buf + Rel.Offset) +
"crosses a split-stack boundary, but unable " +
"to adjust the enclosing function's prologue");
}
switchMorestackCallsToMorestackNonSplit(AdjustedPrologues, MorestackCalls);
if (Target->NeedsMoreStackNonSplit)
switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
}
template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
@ -960,10 +1063,23 @@ template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
return;
}
// If this is a compressed section, uncompress section contents directly
// to the buffer.
if (UncompressedSize >= 0 && !UncompressedBuf) {
size_t Size = UncompressedSize;
if (Error E = zlib::uncompress(toStringRef(RawData),
(char *)(Buf + OutSecOff), Size))
fatal(toString(this) +
": uncompress failed: " + llvm::toString(std::move(E)));
uint8_t *BufEnd = Buf + OutSecOff + Size;
relocate<ELFT>(Buf, BufEnd);
return;
}
// Copy section contents from source object file to output file
// and then apply relocations.
memcpy(Buf + OutSecOff, Data.data(), Data.size());
uint8_t *BufEnd = Buf + OutSecOff + Data.size();
memcpy(Buf + OutSecOff, data().data(), data().size());
uint8_t *BufEnd = Buf + OutSecOff + data().size();
relocate<ELFT>(Buf, BufEnd);
}
@ -1014,7 +1130,7 @@ template <class ELFT> void EhInputSection::split() {
template <class ELFT, class RelTy>
void EhInputSection::split(ArrayRef<RelTy> Rels) {
unsigned RelI = 0;
for (size_t Off = 0, End = Data.size(); Off != End;) {
for (size_t Off = 0, End = data().size(); Off != End;) {
size_t Size = readEhRecordSize(this, Off);
Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI));
// The empty record is the end marker.
@ -1094,65 +1210,32 @@ void MergeInputSection::splitIntoPieces() {
assert(Pieces.empty());
if (Flags & SHF_STRINGS)
splitStrings(Data, Entsize);
splitStrings(data(), Entsize);
else
splitNonStrings(Data, Entsize);
OffsetMap.reserve(Pieces.size());
for (size_t I = 0, E = Pieces.size(); I != E; ++I)
OffsetMap[Pieces[I].InputOff] = I;
}
template <class It, class T, class Compare>
static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) {
size_t Size = std::distance(First, Last);
assert(Size != 0);
while (Size != 1) {
size_t H = Size / 2;
const It MI = First + H;
Size -= H;
First = Comp(Value, *MI) ? First : First + H;
}
return Comp(Value, *First) ? First : First + 1;
}
// Do binary search to get a section piece at a given input offset.
static SectionPiece *findSectionPiece(MergeInputSection *Sec, uint64_t Offset) {
if (Sec->Data.size() <= Offset)
fatal(toString(Sec) + ": entry is past the end of the section");
// Find the element this offset points to.
auto I = fastUpperBound(
Sec->Pieces.begin(), Sec->Pieces.end(), Offset,
[](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; });
--I;
return &*I;
splitNonStrings(data(), Entsize);
}
SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) {
// Find a piece starting at a given offset.
auto It = OffsetMap.find(Offset);
if (It != OffsetMap.end())
return &Pieces[It->second];
if (this->data().size() <= Offset)
fatal(toString(this) + ": offset is outside the section");
// If Offset is not at beginning of a section piece, it is not in the map.
// In that case we need to search from the original section piece vector.
return findSectionPiece(this, Offset);
// In that case we need to do a binary search of the original section piece vector.
auto It2 =
llvm::upper_bound(Pieces, Offset, [](uint64_t Offset, SectionPiece P) {
return Offset < P.InputOff;
});
return &It2[-1];
}
// Returns the offset in an output section for a given input offset.
// Because contents of a mergeable section is not contiguous in output,
// it is not just an addition to a base output offset.
uint64_t MergeInputSection::getParentOffset(uint64_t Offset) const {
// Find a string starting at a given offset.
auto It = OffsetMap.find(Offset);
if (It != OffsetMap.end())
return Pieces[It->second].OutputOff;
// If Offset is not at beginning of a section piece, it is not in the map.
// In that case we need to search from the original section piece vector.
const SectionPiece &Piece =
*findSectionPiece(const_cast<MergeInputSection *>(this), Offset);
*(const_cast<MergeInputSection *>(this)->getSectionPiece (Offset));
uint64_t Addend = Offset - Piece.InputOff;
return Piece.OutputOff + Addend;
}

View File

@ -115,7 +115,12 @@ public:
return cast_or_null<ObjFile<ELFT>>(File);
}
ArrayRef<uint8_t> Data;
ArrayRef<uint8_t> data() const {
if (UncompressedSize >= 0 && !UncompressedBuf)
uncompress();
return RawData;
}
uint64_t getOffsetInFile() const;
// True if this section has already been placed to a linker script
@ -169,11 +174,6 @@ public:
template <class ELFT>
Defined *getEnclosingFunction(uint64_t Offset);
// Compilers emit zlib-compressed debug sections if the -gz option
// is given. This function checks if this section is compressed, and
// if so, decompress in memory.
void maybeDecompress();
// Returns a source location string. Used to construct an error message.
template <class ELFT> std::string getLocation(uint64_t Offset);
std::string getSrcMsg(const Symbol &Sym, uint64_t Offset);
@ -200,15 +200,21 @@ public:
template <typename T> llvm::ArrayRef<T> getDataAs() const {
size_t S = Data.size();
size_t S = data().size();
assert(S % sizeof(T) == 0);
return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T));
return llvm::makeArrayRef<T>((const T *)data().data(), S / sizeof(T));
}
private:
// A pointer that owns decompressed data if a section is compressed by zlib.
protected:
void parseCompressedHeader();
void uncompress() const;
mutable ArrayRef<uint8_t> RawData;
// A pointer that owns uncompressed data if a section is compressed by zlib.
// Since the feature is not used often, this is usually a nullptr.
std::unique_ptr<char[]> DecompressBuf;
mutable std::unique_ptr<char[]> UncompressedBuf;
int64_t UncompressedSize = -1;
};
// SectionPiece represents a piece of splittable section contents.
@ -247,7 +253,6 @@ public:
// Splittable sections are handled as a sequence of data
// rather than a single large blob of data.
std::vector<SectionPiece> Pieces;
llvm::DenseMap<uint32_t, uint32_t> OffsetMap;
// Returns I'th piece's data. This function is very hot when
// string merging is enabled, so we want to inline.
@ -255,8 +260,8 @@ public:
llvm::CachedHashStringRef getData(size_t I) const {
size_t Begin = Pieces[I].InputOff;
size_t End =
(Pieces.size() - 1 == I) ? Data.size() : Pieces[I + 1].InputOff;
return {toStringRef(Data.slice(Begin, End - Begin)), Pieces[I].Hash};
(Pieces.size() - 1 == I) ? data().size() : Pieces[I + 1].InputOff;
return {toStringRef(data().slice(Begin, End - Begin)), Pieces[I].Hash};
}
// Returns the SectionPiece at a given input section offset.
@ -277,7 +282,9 @@ struct EhSectionPiece {
unsigned FirstRelocation)
: InputOff(Off), Sec(Sec), Size(Size), FirstRelocation(FirstRelocation) {}
ArrayRef<uint8_t> data() { return {Sec->Data.data() + this->InputOff, Size}; }
ArrayRef<uint8_t> data() {
return {Sec->data().data() + this->InputOff, Size};
}
size_t InputOff;
ssize_t OutputOff = -1;
@ -353,6 +360,7 @@ private:
// The list of all input sections.
extern std::vector<InputSectionBase *> InputSections;
} // namespace elf
std::string toString(const elf::InputSectionBase *);

48
deps/lld/ELF/LTO.cpp vendored
View File

@ -67,9 +67,10 @@ static std::string getThinLTOOutputFile(StringRef ModulePath) {
static lto::Config createConfig() {
lto::Config C;
// LLD supports the new relocations.
// LLD supports the new relocations and address-significance tables.
C.Options = InitTargetOptionsFromCodeGenFlags();
C.Options.RelaxELFRelocations = true;
C.Options.EmitAddrsig = true;
// Always emit a section per function/datum with LTO.
C.Options.FunctionSections = true;
@ -87,6 +88,7 @@ static lto::Config createConfig() {
C.DiagHandler = diagnosticHandler;
C.OptLevel = Config->LTOO;
C.CPU = GetCPUStr();
C.MAttrs = GetMAttrs();
// Set up a custom pipeline if we've been asked to.
C.OptPipeline = Config->LTONewPmPasses;
@ -101,6 +103,14 @@ static lto::Config createConfig() {
C.DebugPassManager = Config->LTODebugPassManager;
C.DwoDir = Config->DwoDir;
if (Config->EmitLLVM) {
C.PostInternalizeModuleHook = [](size_t Task, const Module &M) {
if (std::unique_ptr<raw_fd_ostream> OS = openFile(Config->OutputFile))
WriteBitcodeToFile(M, *OS, false);
return false;
};
}
if (Config->SaveTemps)
checkError(C.addSaveTemps(Config->OutputFile.str() + ".",
/*UseInputModulePath*/ true));
@ -108,18 +118,14 @@ static lto::Config createConfig() {
}
BitcodeCompiler::BitcodeCompiler() {
// Initialize IndexFile.
if (!Config->ThinLTOIndexOnlyArg.empty())
IndexFile = openFile(Config->ThinLTOIndexOnlyArg);
// Initialize LTOObj.
lto::ThinBackend Backend;
if (Config->ThinLTOIndexOnly) {
StringRef Path = Config->ThinLTOIndexOnlyArg;
if (!Path.empty())
IndexFile = openFile(Path);
auto OnIndexWrite = [&](const std::string &Identifier) {
ObjectToIndexFileState[Identifier] = true;
};
auto OnIndexWrite = [&](StringRef S) { ThinIndices.erase(S); };
Backend = lto::createWriteIndexesThinBackend(
Config->ThinLTOPrefixReplace.first, Config->ThinLTOPrefixReplace.second,
Config->ThinLTOEmitImportsFiles, IndexFile.get(), OnIndexWrite);
@ -132,10 +138,10 @@ BitcodeCompiler::BitcodeCompiler() {
// Initialize UsedStartStop.
for (Symbol *Sym : Symtab->getSymbols()) {
StringRef Name = Sym->getName();
StringRef S = Sym->getName();
for (StringRef Prefix : {"__start_", "__stop_"})
if (Name.startswith(Prefix))
UsedStartStop.insert(Name.substr(Prefix.size()));
if (S.startswith(Prefix))
UsedStartStop.insert(S.substr(Prefix.size()));
}
}
@ -151,7 +157,7 @@ void BitcodeCompiler::add(BitcodeFile &F) {
bool IsExec = !Config->Shared && !Config->Relocatable;
if (Config->ThinLTOIndexOnly)
ObjectToIndexFileState.insert({Obj.getName(), false});
ThinIndices.insert(Obj.getName());
ArrayRef<Symbol *> Syms = F.getSymbols();
ArrayRef<lto::InputFile::Symbol> ObjSyms = Obj.symbols();
@ -240,15 +246,11 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
Cache));
// Emit empty index files for non-indexed files
if (Config->ThinLTOIndexOnly) {
for (auto &Identifier : ObjectToIndexFileState)
if (!Identifier.getValue()) {
std::string Path = getThinLTOOutputFile(Identifier.getKey());
openFile(Path + ".thinlto.bc");
if (Config->ThinLTOEmitImportsFiles)
openFile(Path + ".imports");
}
for (StringRef S : ThinIndices) {
std::string Path = getThinLTOOutputFile(S);
openFile(Path + ".thinlto.bc");
if (Config->ThinLTOEmitImportsFiles)
openFile(Path + ".imports");
}
// If LazyObjFile has not been added to link, emit empty index files.

2
deps/lld/ELF/LTO.h vendored
View File

@ -55,7 +55,7 @@ private:
std::vector<std::unique_ptr<MemoryBuffer>> Files;
llvm::DenseSet<StringRef> UsedStartStop;
std::unique_ptr<llvm::raw_fd_ostream> IndexFile;
llvm::StringMap<bool> ObjectToIndexFileState;
llvm::DenseSet<StringRef> ThinIndices;
};
} // namespace elf
} // namespace lld

View File

@ -169,7 +169,7 @@ void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
// Define a symbol.
Symbol *Sym;
uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, /*Type*/ 0, Visibility,
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, Visibility,
/*CanOmitFromDynSym*/ false,
/*File*/ nullptr);
ExprValue Value = Cmd->Expression();
@ -202,13 +202,14 @@ static void declareSymbol(SymbolAssignment *Cmd) {
// We can't calculate final value right now.
Symbol *Sym;
uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, /*Type*/ 0, Visibility,
std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, Visibility,
/*CanOmitFromDynSym*/ false,
/*File*/ nullptr);
replaceSymbol<Defined>(Sym, nullptr, Cmd->Name, STB_GLOBAL, Visibility,
STT_NOTYPE, 0, 0, nullptr);
Cmd->Sym = cast<Defined>(Sym);
Cmd->Provide = false;
Sym->ScriptDefined = true;
}
// This method is used to handle INSERT AFTER statement. Here we rebuild
@ -414,18 +415,16 @@ LinkerScript::computeInputSections(const InputSectionDescription *Cmd) {
void LinkerScript::discard(ArrayRef<InputSection *> V) {
for (InputSection *S : V) {
if (S == InX::ShStrTab || S == InX::Dynamic || S == InX::DynSymTab ||
S == InX::DynStrTab || S == InX::RelaPlt || S == InX::RelaDyn ||
S == InX::RelrDyn)
if (S == In.ShStrTab || S == In.RelaDyn || S == In.RelrDyn)
error("discarding " + S->Name + " section is not allowed");
// You can discard .hash and .gnu.hash sections by linker scripts. Since
// they are synthesized sections, we need to handle them differently than
// other regular sections.
if (S == InX::GnuHashTab)
InX::GnuHashTab = nullptr;
if (S == InX::HashTab)
InX::HashTab = nullptr;
if (S == In.GnuHashTab)
In.GnuHashTab = nullptr;
if (S == In.HashTab)
In.HashTab = nullptr;
S->Assigned = false;
S->Live = false;
@ -701,6 +700,7 @@ uint64_t LinkerScript::advance(uint64_t Size, unsigned Alignment) {
}
void LinkerScript::output(InputSection *S) {
assert(Ctx->OutSec == S->getParent());
uint64_t Before = advance(0, 1);
uint64_t Pos = advance(S->getSize(), S->Alignment);
S->OutSecOff = Pos - S->getSize() - Ctx->OutSec->Addr;
@ -816,21 +816,8 @@ void LinkerScript::assignOffsets(OutputSection *Sec) {
// Handle a single input section description command.
// It calculates and assigns the offsets for each section and also
// updates the output section size.
auto *Cmd = cast<InputSectionDescription>(Base);
for (InputSection *Sec : Cmd->Sections) {
// We tentatively added all synthetic sections at the beginning and
// removed empty ones afterwards (because there is no way to know
// whether they were going be empty or not other than actually running
// linker scripts.) We need to ignore remains of empty sections.
if (auto *S = dyn_cast<SyntheticSection>(Sec))
if (S->empty())
continue;
if (!Sec->Live)
continue;
assert(Ctx->OutSec == Sec->getParent());
for (InputSection *Sec : cast<InputSectionDescription>(Base)->Sections)
output(Sec);
}
}
}

Some files were not shown because too many files have changed in this diff Show More